Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -114,6 +114,10 @@ bool legalizeSDIV_SREM32(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; + void legalizeUDIV_UREM64Impl(MachineIRBuilder &B, + Register DstReg, Register Num, Register Den, + bool IsRem) const; + bool legalizeUDIV_UREM64(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; bool legalizeSDIV_SREM(MachineInstr &MI, MachineRegisterInfo &MRI, Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2674,15 +2674,14 @@ return {ResultLo.getReg(0), ResultHi.getReg(0)}; } -bool AMDGPULegalizerInfo::legalizeUDIV_UREM64(MachineInstr &MI, - MachineRegisterInfo &MRI, - MachineIRBuilder &B) const { - const bool IsDiv = MI.getOpcode() == TargetOpcode::G_UDIV; +void AMDGPULegalizerInfo::legalizeUDIV_UREM64Impl(MachineIRBuilder &B, + Register DstReg, + Register Numer, + Register Denom, + bool IsRem) const { const LLT S32 = LLT::scalar(32); const LLT S64 = LLT::scalar(64); const LLT S1 = LLT::scalar(1); - Register Numer = MI.getOperand(1).getReg(); - Register Denom = MI.getOperand(2).getReg(); Register RcpLo, RcpHi; std::tie(RcpLo, RcpHi) = emitReciprocalU64(B, Denom); @@ -2775,63 +2774,81 @@ // endif C6 // endif C3 - if (IsDiv) { + if (!IsRem) { auto Sel1 = B.buildSelect( S64, B.buildICmp(CmpInst::ICMP_NE, S1, C6, Zero32), Add4, Add3); - B.buildSelect(MI.getOperand(0), + B.buildSelect(DstReg, B.buildICmp(CmpInst::ICMP_NE, S1, C3, Zero32), Sel1, MulHi3); } else { auto Sel2 = B.buildSelect( S64, B.buildICmp(CmpInst::ICMP_NE, S1, C6, Zero32), Sub3, Sub2); - B.buildSelect(MI.getOperand(0), + B.buildSelect(DstReg, B.buildICmp(CmpInst::ICMP_NE, S1, C3, Zero32), Sel2, Sub1); } - - MI.eraseFromParent(); - return true; } bool AMDGPULegalizerInfo::legalizeUDIV_UREM(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - if (Ty == LLT::scalar(32)) - return legalizeUDIV_UREM32(MI, MRI, B); - if (Ty == LLT::scalar(64)) - return legalizeUDIV_UREM64(MI, MRI, B); - return false; + const LLT S64 = LLT::scalar(64); + const LLT S32 = LLT::scalar(32); + const bool IsRem = MI.getOpcode() == AMDGPU::G_UREM; + Register DstReg = MI.getOperand(0).getReg(); + Register Num = MI.getOperand(1).getReg(); + Register Den = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(DstReg); + + if (Ty == S32) + legalizeUDIV_UREM32Impl(B, DstReg, Num, Den, IsRem); + else if (Ty == S64) + legalizeUDIV_UREM64Impl(B, DstReg, Num, Den, IsRem); + else + return false; + + MI.eraseFromParent(); + return true; + } -bool AMDGPULegalizerInfo::legalizeSDIV_SREM32(MachineInstr &MI, - MachineRegisterInfo &MRI, - MachineIRBuilder &B) const { +bool AMDGPULegalizerInfo::legalizeSDIV_SREM(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + const LLT S64 = LLT::scalar(64); const LLT S32 = LLT::scalar(32); - const bool IsRem = MI.getOpcode() == AMDGPU::G_SREM; Register DstReg = MI.getOperand(0).getReg(); + const LLT Ty = MRI.getType(DstReg); + if (Ty != S32 && Ty != S64) + return false; + + const bool IsRem = MI.getOpcode() == AMDGPU::G_SREM; + Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); - auto ThirtyOne = B.buildConstant(S32, 31); - auto LHSign = B.buildAShr(S32, LHS, ThirtyOne); - auto RHSign = B.buildAShr(S32, RHS, ThirtyOne); + auto SignBitOffset = B.buildConstant(S32, Ty.getSizeInBits() - 1); + auto LHSign = B.buildAShr(Ty, LHS, SignBitOffset); + auto RHSign = B.buildAShr(Ty, RHS, SignBitOffset); - LHS = B.buildAdd(S32, LHS, LHSign).getReg(0); - RHS = B.buildAdd(S32, RHS, RHSign).getReg(0); + LHS = B.buildAdd(Ty, LHS, LHSign).getReg(0); + RHS = B.buildAdd(Ty, RHS, RHSign).getReg(0); - LHS = B.buildXor(S32, LHS, LHSign).getReg(0); - RHS = B.buildXor(S32, RHS, RHSign).getReg(0); + LHS = B.buildXor(Ty, LHS, LHSign).getReg(0); + RHS = B.buildXor(Ty, RHS, RHSign).getReg(0); - Register UDivRem = MRI.createGenericVirtualRegister(S32); - legalizeUDIV_UREM32Impl(B, UDivRem, LHS, RHS, IsRem); + Register UDivRem = MRI.createGenericVirtualRegister(Ty); + if (Ty == S32) + legalizeUDIV_UREM32Impl(B, UDivRem, LHS, RHS, IsRem); + else + legalizeUDIV_UREM64Impl(B, UDivRem, LHS, RHS, IsRem); if (IsRem) { auto RSign = LHSign; // Remainder sign is the same as LHS - UDivRem = B.buildXor(S32, UDivRem, RSign).getReg(0); + UDivRem = B.buildXor(Ty, UDivRem, RSign).getReg(0); B.buildSub(DstReg, UDivRem, RSign); } else { - auto DSign = B.buildXor(S32, LHSign, RHSign); - UDivRem = B.buildXor(S32, UDivRem, DSign).getReg(0); + auto DSign = B.buildXor(Ty, LHSign, RHSign); + UDivRem = B.buildXor(Ty, UDivRem, DSign).getReg(0); B.buildSub(DstReg, UDivRem, DSign); } @@ -2839,14 +2856,6 @@ return true; } -bool AMDGPULegalizerInfo::legalizeSDIV_SREM(MachineInstr &MI, - MachineRegisterInfo &MRI, - MachineIRBuilder &B) const { - if (MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32)) - return legalizeSDIV_SREM32(MI, MRI, B); - return false; -} - bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir @@ -384,18 +384,528 @@ ; GFX6-LABEL: name: test_sdiv_s64 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[COPY]], [[COPY1]] - ; GFX6: $vgpr0_vgpr1 = COPY [[SDIV]](s64) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) + ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32) + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX6: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] + ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]] + ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]] + ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]] + ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]] + ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]] + ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]] + ; GFX6: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]] + ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]] + ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]] + ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]] + ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX6: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]] + ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]] + ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD16]](s32) + ; GFX6: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]] + ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]] + ; GFX6: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX6: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX6: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX6: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] + ; GFX6: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV23]], [[UADDO39]] + ; GFX6: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32) + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX6: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX6: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] + ; GFX6: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[UV25]], [[UADDO41]] + ; GFX6: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32) + ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] + ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX6: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX6: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX6: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX6: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] + ; GFX6: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]] + ; GFX6: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[MV5]](s64) ; GFX8-LABEL: name: test_sdiv_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[COPY]], [[COPY1]] - ; GFX8: $vgpr0_vgpr1 = COPY [[SDIV]](s64) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) + ; GFX8: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32) + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX8: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX8: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] + ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]] + ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]] + ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]] + ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]] + ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]] + ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]] + ; GFX8: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]] + ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]] + ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]] + ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]] + ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX8: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]] + ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]] + ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD16]](s32) + ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]] + ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]] + ; GFX8: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX8: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX8: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX8: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] + ; GFX8: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV23]], [[UADDO39]] + ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32) + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX8: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX8: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] + ; GFX8: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[UV25]], [[UADDO41]] + ; GFX8: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32) + ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] + ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX8: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX8: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX8: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX8: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] + ; GFX8: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]] + ; GFX8: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[MV5]](s64) ; GFX9-LABEL: name: test_sdiv_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[COPY]], [[COPY1]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SDIV]](s64) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32) + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX9: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX9: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] + ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]] + ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]] + ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]] + ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]] + ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]] + ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]] + ; GFX9: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]] + ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]] + ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]] + ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]] + ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX9: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]] + ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]] + ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD16]](s32) + ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]] + ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]] + ; GFX9: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX9: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] + ; GFX9: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV23]], [[UADDO39]] + ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32) + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX9: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX9: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] + ; GFX9: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[UV25]], [[UADDO41]] + ; GFX9: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32) + ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] + ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX9: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX9: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX9: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX9: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] + ; GFX9: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]] + ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[MV5]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_SDIV %0, %1 @@ -413,27 +923,1035 @@ ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[UV]], [[UV2]] - ; GFX6: [[SDIV1:%[0-9]+]]:_(s64) = G_SDIV [[UV1]], [[UV3]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SDIV]](s64), [[SDIV1]](s64) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32) + ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32) + ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX6: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32) + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] + ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]] + ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]] + ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]] + ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]] + ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]] + ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]] + ; GFX6: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]] + ; GFX6: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX6: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] + ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE8]] + ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] + ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE8]] + ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] + ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE8]] + ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX6: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]] + ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE8]] + ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD16]](s32) + ; GFX6: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] + ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] + ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD16]] + ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] + ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] + ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD18]], [[USUBO3]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD18]] + ; GFX6: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] + ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX6: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX6: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX6: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV26]] + ; GFX6: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV27]], [[UADDO39]] + ; GFX6: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32) + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] + ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] + ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX6: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX6: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV28]] + ; GFX6: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[UV29]], [[UADDO41]] + ; GFX6: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32) + ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]] + ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] + ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX6: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX6: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX6: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX6: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV30]], [[UV32]] + ; GFX6: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV31]], [[UV33]], [[USUBO9]] + ; GFX6: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX6: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) + ; GFX6: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) + ; GFX6: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX6: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX6: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] + ; GFX6: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO43]] + ; GFX6: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE14]](s32) + ; GFX6: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX6: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) + ; GFX6: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV38]], [[UV40]] + ; GFX6: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[UV39]], [[UV41]], [[UADDO45]] + ; GFX6: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE16]](s32) + ; GFX6: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] + ; GFX6: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] + ; GFX6: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX6: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV42]](s32) + ; GFX6: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV43]](s32) + ; GFX6: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C8]] + ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) + ; GFX6: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C9]] + ; GFX6: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C10]] + ; GFX6: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX6: [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C11]] + ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX6: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) + ; GFX6: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) + ; GFX6: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX6: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX6: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV46]] + ; GFX6: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV47]], [[USUBO11]] + ; GFX6: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI2]] + ; GFX6: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]] + ; GFX6: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]] + ; GFX6: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]] + ; GFX6: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX6: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]] + ; GFX6: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX6: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]] + ; GFX6: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX6: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX6: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) + ; GFX6: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] + ; GFX6: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) + ; GFX6: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX6: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]] + ; GFX6: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX6: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]] + ; GFX6: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX6: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) + ; GFX6: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] + ; GFX6: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) + ; GFX6: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX6: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD21]] + ; GFX6: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) + ; GFX6: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]] + ; GFX6: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]] + ; GFX6: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]] + ; GFX6: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]] + ; GFX6: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO57]] + ; GFX6: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]] + ; GFX6: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO56]] + ; GFX6: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO56]] + ; GFX6: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE18]] + ; GFX6: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO56]] + ; GFX6: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX6: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]] + ; GFX6: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE18]], [[MUL24]] + ; GFX6: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD27]] + ; GFX6: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[MUL24]] + ; GFX6: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX6: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) + ; GFX6: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH21]] + ; GFX6: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) + ; GFX6: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX6: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE18]], [[ADD27]] + ; GFX6: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE18]], [[MUL24]] + ; GFX6: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD27]] + ; GFX6: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX6: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) + ; GFX6: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH23]] + ; GFX6: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) + ; GFX6: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX6: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD28]] + ; GFX6: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) + ; GFX6: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]] + ; GFX6: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE18]], [[ADD27]] + ; GFX6: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]] + ; GFX6: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]] + ; GFX6: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO57]] + ; GFX6: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDO69]] + ; GFX6: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX6: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX6: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO68]] + ; GFX6: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE22]] + ; GFX6: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO68]] + ; GFX6: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX6: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) + ; GFX6: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH25]] + ; GFX6: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) + ; GFX6: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX6: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE22]] + ; GFX6: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO68]] + ; GFX6: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE22]] + ; GFX6: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX6: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) + ; GFX6: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH27]] + ; GFX6: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1) + ; GFX6: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX6: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD32]] + ; GFX6: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1) + ; GFX6: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]] + ; GFX6: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE22]] + ; GFX6: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]] + ; GFX6: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD35]](s32) + ; GFX6: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX6: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[UADDO78]] + ; GFX6: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV53]], [[UADDO78]] + ; GFX6: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[ADD35]] + ; GFX6: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV52]], [[UADDO78]] + ; GFX6: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX6: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]] + ; GFX6: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[MUL33]] + ; GFX6: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[ADD37]], [[USUBO13]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[ADD37]] + ; GFX6: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX6: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV55]] + ; GFX6: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) + ; GFX6: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV54]] + ; GFX6: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) + ; GFX6: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV55]] + ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] + ; GFX6: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV54]] + ; GFX6: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO13]] + ; GFX6: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]] + ; GFX6: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX6: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV56]] + ; GFX6: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[ADD35]], [[UV57]], [[UADDO81]] + ; GFX6: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE24]](s32) + ; GFX6: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV55]] + ; GFX6: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) + ; GFX6: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV54]] + ; GFX6: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) + ; GFX6: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV55]] + ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] + ; GFX6: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX6: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV58]] + ; GFX6: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[UV59]], [[UADDO83]] + ; GFX6: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE26]](s32) + ; GFX6: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV54]] + ; GFX6: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV55]], [[USUBO15]] + ; GFX6: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]] + ; GFX6: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] + ; GFX6: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] + ; GFX6: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] + ; GFX6: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] + ; GFX6: [[XOR6:%[0-9]+]]:_(s64) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX6: [[XOR7:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[XOR6]] + ; GFX6: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR7]](s64) + ; GFX6: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR6]](s64) + ; GFX6: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV60]], [[UV62]] + ; GFX6: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV61]], [[UV63]], [[USUBO19]] + ; GFX6: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-LABEL: name: test_sdiv_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[UV]], [[UV2]] - ; GFX8: [[SDIV1:%[0-9]+]]:_(s64) = G_SDIV [[UV1]], [[UV3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SDIV]](s64), [[SDIV1]](s64) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32) + ; GFX8: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32) + ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX8: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX8: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32) + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] + ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]] + ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]] + ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]] + ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]] + ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]] + ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]] + ; GFX8: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]] + ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] + ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE8]] + ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] + ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE8]] + ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] + ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE8]] + ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX8: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]] + ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE8]] + ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD16]](s32) + ; GFX8: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] + ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] + ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD16]] + ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] + ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] + ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD18]], [[USUBO3]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD18]] + ; GFX8: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] + ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX8: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX8: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX8: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV26]] + ; GFX8: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV27]], [[UADDO39]] + ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32) + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] + ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] + ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX8: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX8: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV28]] + ; GFX8: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[UV29]], [[UADDO41]] + ; GFX8: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32) + ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]] + ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] + ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX8: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX8: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX8: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX8: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV30]], [[UV32]] + ; GFX8: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV31]], [[UV33]], [[USUBO9]] + ; GFX8: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX8: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) + ; GFX8: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) + ; GFX8: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX8: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] + ; GFX8: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO43]] + ; GFX8: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE14]](s32) + ; GFX8: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) + ; GFX8: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV38]], [[UV40]] + ; GFX8: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[UV39]], [[UV41]], [[UADDO45]] + ; GFX8: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE16]](s32) + ; GFX8: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] + ; GFX8: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] + ; GFX8: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX8: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV42]](s32) + ; GFX8: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV43]](s32) + ; GFX8: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C8]] + ; GFX8: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) + ; GFX8: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C9]] + ; GFX8: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C10]] + ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX8: [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C11]] + ; GFX8: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX8: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) + ; GFX8: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) + ; GFX8: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX8: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX8: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV46]] + ; GFX8: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV47]], [[USUBO11]] + ; GFX8: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI2]] + ; GFX8: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]] + ; GFX8: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]] + ; GFX8: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]] + ; GFX8: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX8: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]] + ; GFX8: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX8: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]] + ; GFX8: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX8: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX8: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) + ; GFX8: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] + ; GFX8: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) + ; GFX8: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX8: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]] + ; GFX8: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX8: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]] + ; GFX8: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX8: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) + ; GFX8: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] + ; GFX8: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) + ; GFX8: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX8: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD21]] + ; GFX8: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) + ; GFX8: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]] + ; GFX8: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]] + ; GFX8: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]] + ; GFX8: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]] + ; GFX8: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO57]] + ; GFX8: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]] + ; GFX8: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO56]] + ; GFX8: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO56]] + ; GFX8: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE18]] + ; GFX8: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO56]] + ; GFX8: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX8: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]] + ; GFX8: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE18]], [[MUL24]] + ; GFX8: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD27]] + ; GFX8: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[MUL24]] + ; GFX8: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX8: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) + ; GFX8: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH21]] + ; GFX8: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) + ; GFX8: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX8: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE18]], [[ADD27]] + ; GFX8: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE18]], [[MUL24]] + ; GFX8: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD27]] + ; GFX8: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX8: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) + ; GFX8: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH23]] + ; GFX8: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) + ; GFX8: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX8: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD28]] + ; GFX8: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) + ; GFX8: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]] + ; GFX8: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE18]], [[ADD27]] + ; GFX8: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]] + ; GFX8: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]] + ; GFX8: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO57]] + ; GFX8: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDO69]] + ; GFX8: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX8: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX8: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO68]] + ; GFX8: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE22]] + ; GFX8: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO68]] + ; GFX8: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX8: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) + ; GFX8: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH25]] + ; GFX8: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) + ; GFX8: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX8: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE22]] + ; GFX8: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO68]] + ; GFX8: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE22]] + ; GFX8: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX8: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) + ; GFX8: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH27]] + ; GFX8: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1) + ; GFX8: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX8: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD32]] + ; GFX8: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1) + ; GFX8: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]] + ; GFX8: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE22]] + ; GFX8: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]] + ; GFX8: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD35]](s32) + ; GFX8: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX8: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[UADDO78]] + ; GFX8: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV53]], [[UADDO78]] + ; GFX8: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[ADD35]] + ; GFX8: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV52]], [[UADDO78]] + ; GFX8: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX8: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]] + ; GFX8: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[MUL33]] + ; GFX8: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[ADD37]], [[USUBO13]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[ADD37]] + ; GFX8: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX8: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV55]] + ; GFX8: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) + ; GFX8: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV54]] + ; GFX8: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) + ; GFX8: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV55]] + ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] + ; GFX8: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV54]] + ; GFX8: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO13]] + ; GFX8: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]] + ; GFX8: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX8: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV56]] + ; GFX8: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[ADD35]], [[UV57]], [[UADDO81]] + ; GFX8: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE24]](s32) + ; GFX8: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV55]] + ; GFX8: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) + ; GFX8: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV54]] + ; GFX8: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) + ; GFX8: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV55]] + ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] + ; GFX8: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX8: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV58]] + ; GFX8: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[UV59]], [[UADDO83]] + ; GFX8: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE26]](s32) + ; GFX8: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV54]] + ; GFX8: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV55]], [[USUBO15]] + ; GFX8: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]] + ; GFX8: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] + ; GFX8: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] + ; GFX8: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] + ; GFX8: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] + ; GFX8: [[XOR6:%[0-9]+]]:_(s64) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX8: [[XOR7:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[XOR6]] + ; GFX8: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR7]](s64) + ; GFX8: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR6]](s64) + ; GFX8: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV60]], [[UV62]] + ; GFX8: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV61]], [[UV63]], [[USUBO19]] + ; GFX8: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_sdiv_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[UV]], [[UV2]] - ; GFX9: [[SDIV1:%[0-9]+]]:_(s64) = G_SDIV [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SDIV]](s64), [[SDIV1]](s64) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32) + ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX9: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX9: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32) + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] + ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]] + ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]] + ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]] + ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]] + ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]] + ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]] + ; GFX9: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]] + ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] + ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE8]] + ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] + ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE8]] + ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] + ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE8]] + ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX9: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]] + ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE8]] + ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD16]](s32) + ; GFX9: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] + ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] + ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD16]] + ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] + ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] + ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD18]], [[USUBO3]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD18]] + ; GFX9: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] + ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] + ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX9: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV26]] + ; GFX9: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV27]], [[UADDO39]] + ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32) + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] + ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] + ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX9: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX9: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV28]] + ; GFX9: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[UV29]], [[UADDO41]] + ; GFX9: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32) + ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]] + ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] + ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX9: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX9: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX9: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX9: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV30]], [[UV32]] + ; GFX9: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV31]], [[UV33]], [[USUBO9]] + ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX9: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) + ; GFX9: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) + ; GFX9: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX9: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] + ; GFX9: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO43]] + ; GFX9: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO42]](s32), [[UADDE14]](s32) + ; GFX9: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) + ; GFX9: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UV38]], [[UV40]] + ; GFX9: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[UV39]], [[UV41]], [[UADDO45]] + ; GFX9: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO44]](s32), [[UADDE16]](s32) + ; GFX9: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] + ; GFX9: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] + ; GFX9: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX9: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV42]](s32) + ; GFX9: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV43]](s32) + ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C8]] + ; GFX9: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C9]] + ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C10]] + ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX9: [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C11]] + ; GFX9: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX9: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) + ; GFX9: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) + ; GFX9: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX9: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX9: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[UV46]] + ; GFX9: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[UV47]], [[USUBO11]] + ; GFX9: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI2]] + ; GFX9: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]] + ; GFX9: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]] + ; GFX9: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]] + ; GFX9: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX9: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]] + ; GFX9: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX9: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]] + ; GFX9: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX9: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX9: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) + ; GFX9: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH16]] + ; GFX9: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) + ; GFX9: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX9: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]] + ; GFX9: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX9: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]] + ; GFX9: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX9: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) + ; GFX9: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[UADDO50]], [[UMULH18]] + ; GFX9: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO53]](s1) + ; GFX9: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX9: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[ADD21]] + ; GFX9: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) + ; GFX9: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]] + ; GFX9: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]] + ; GFX9: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]] + ; GFX9: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO54]] + ; GFX9: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO57]] + ; GFX9: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]] + ; GFX9: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO56]] + ; GFX9: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO56]] + ; GFX9: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE18]] + ; GFX9: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO56]] + ; GFX9: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX9: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]] + ; GFX9: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE18]], [[MUL24]] + ; GFX9: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO56]], [[ADD27]] + ; GFX9: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[MUL24]] + ; GFX9: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX9: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) + ; GFX9: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH21]] + ; GFX9: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) + ; GFX9: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX9: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE18]], [[ADD27]] + ; GFX9: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE18]], [[MUL24]] + ; GFX9: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO56]], [[ADD27]] + ; GFX9: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX9: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) + ; GFX9: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO62]], [[UMULH23]] + ; GFX9: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO65]](s1) + ; GFX9: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX9: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[UADDO64]], [[ADD28]] + ; GFX9: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) + ; GFX9: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]] + ; GFX9: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE18]], [[ADD27]] + ; GFX9: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]] + ; GFX9: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO56]], [[UADDO66]] + ; GFX9: [[UADDE20:%[0-9]+]]:_(s32), [[UADDE21:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO57]] + ; GFX9: [[UADDE22:%[0-9]+]]:_(s32), [[UADDE23:%[0-9]+]]:_(s1) = G_UADDE [[UADDE20]], [[C6]], [[UADDO69]] + ; GFX9: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX9: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX9: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDO68]] + ; GFX9: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV50]], [[UADDE22]] + ; GFX9: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDO68]] + ; GFX9: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX9: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) + ; GFX9: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH25]] + ; GFX9: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) + ; GFX9: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX9: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV51]], [[UADDE22]] + ; GFX9: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDO68]] + ; GFX9: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV50]], [[UADDE22]] + ; GFX9: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX9: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) + ; GFX9: [[UADDO76:%[0-9]+]]:_(s32), [[UADDO77:%[0-9]+]]:_(s1) = G_UADDO [[UADDO74]], [[UMULH27]] + ; GFX9: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO77]](s1) + ; GFX9: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX9: [[UADDO78:%[0-9]+]]:_(s32), [[UADDO79:%[0-9]+]]:_(s1) = G_UADDO [[UADDO76]], [[ADD32]] + ; GFX9: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO79]](s1) + ; GFX9: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]] + ; GFX9: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV51]], [[UADDE22]] + ; GFX9: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]] + ; GFX9: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO78]](s32), [[ADD35]](s32) + ; GFX9: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX9: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[UADDO78]] + ; GFX9: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV53]], [[UADDO78]] + ; GFX9: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV52]], [[ADD35]] + ; GFX9: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV52]], [[UADDO78]] + ; GFX9: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX9: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]] + ; GFX9: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV48]], [[MUL33]] + ; GFX9: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV49]], [[ADD37]], [[USUBO13]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV49]], [[ADD37]] + ; GFX9: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX9: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV55]] + ; GFX9: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) + ; GFX9: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV54]] + ; GFX9: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) + ; GFX9: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV55]] + ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] + ; GFX9: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV54]] + ; GFX9: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV55]], [[USUBO13]] + ; GFX9: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]] + ; GFX9: [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX9: [[UADDO80:%[0-9]+]]:_(s32), [[UADDO81:%[0-9]+]]:_(s1) = G_UADDO [[UADDO78]], [[UV56]] + ; GFX9: [[UADDE24:%[0-9]+]]:_(s32), [[UADDE25:%[0-9]+]]:_(s1) = G_UADDE [[ADD35]], [[UV57]], [[UADDO81]] + ; GFX9: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO80]](s32), [[UADDE24]](s32) + ; GFX9: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV55]] + ; GFX9: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) + ; GFX9: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV54]] + ; GFX9: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) + ; GFX9: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV55]] + ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] + ; GFX9: [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX9: [[UADDO82:%[0-9]+]]:_(s32), [[UADDO83:%[0-9]+]]:_(s1) = G_UADDO [[UADDO80]], [[UV58]] + ; GFX9: [[UADDE26:%[0-9]+]]:_(s32), [[UADDE27:%[0-9]+]]:_(s1) = G_UADDE [[UADDE24]], [[UV59]], [[UADDO83]] + ; GFX9: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO82]](s32), [[UADDE26]](s32) + ; GFX9: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV54]] + ; GFX9: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV55]], [[USUBO15]] + ; GFX9: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]] + ; GFX9: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] + ; GFX9: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] + ; GFX9: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] + ; GFX9: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] + ; GFX9: [[XOR6:%[0-9]+]]:_(s64) = G_XOR [[ASHR2]], [[ASHR3]] + ; GFX9: [[XOR7:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[XOR6]] + ; GFX9: [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR7]](s64) + ; GFX9: [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR6]](s64) + ; GFX9: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV60]], [[UV62]] + ; GFX9: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV61]], [[UV63]], [[USUBO19]] + ; GFX9: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 @@ -1189,36 +2707,543 @@ ; GFX6-LABEL: name: test_sdiv_s33 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) - ; GFX6: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s33) - ; GFX6: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC1]](s33) - ; GFX6: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[SEXT]], [[SEXT1]] - ; GFX6: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[SDIV]](s64) - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) - ; GFX6: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX6: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY2]], 33 + ; GFX6: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY3]], 33 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) + ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX6: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] + ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]] + ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]] + ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]] + ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]] + ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]] + ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]] + ; GFX6: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]] + ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]] + ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]] + ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]] + ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX6: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]] + ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]] + ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD16]](s32) + ; GFX6: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]] + ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]] + ; GFX6: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX6: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX6: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX6: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] + ; GFX6: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV23]], [[UADDO39]] + ; GFX6: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32) + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX6: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX6: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] + ; GFX6: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[UV25]], [[UADDO41]] + ; GFX6: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32) + ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] + ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX6: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX6: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX6: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX6: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] + ; GFX6: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]] + ; GFX6: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX6: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV5]](s64) + ; GFX6: $vgpr0_vgpr1 = COPY [[COPY4]](s64) ; GFX8-LABEL: name: test_sdiv_s33 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) - ; GFX8: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s33) - ; GFX8: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC1]](s33) - ; GFX8: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[SEXT]], [[SEXT1]] - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[SDIV]](s64) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) - ; GFX8: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX8: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY2]], 33 + ; GFX8: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY3]], 33 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX8: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) + ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX8: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX8: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] + ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]] + ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]] + ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]] + ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]] + ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]] + ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]] + ; GFX8: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]] + ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]] + ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]] + ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]] + ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX8: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]] + ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]] + ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD16]](s32) + ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]] + ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]] + ; GFX8: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX8: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX8: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX8: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] + ; GFX8: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV23]], [[UADDO39]] + ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32) + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX8: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX8: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] + ; GFX8: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[UV25]], [[UADDO41]] + ; GFX8: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32) + ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] + ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX8: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX8: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX8: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX8: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] + ; GFX8: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]] + ; GFX8: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX8: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV5]](s64) + ; GFX8: $vgpr0_vgpr1 = COPY [[COPY4]](s64) ; GFX9-LABEL: name: test_sdiv_s33 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) - ; GFX9: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s33) - ; GFX9: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC1]](s33) - ; GFX9: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[SEXT]], [[SEXT1]] - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[SDIV]](s64) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) - ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY2]], 33 + ; GFX9: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY3]], 33 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) + ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX9: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX9: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] + ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]] + ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]] + ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]] + ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]] + ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]] + ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]] + ; GFX9: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]] + ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]] + ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]] + ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]] + ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX9: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]] + ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]] + ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO36]](s32), [[ADD16]](s32) + ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]] + ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]] + ; GFX9: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX9: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; GFX9: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX9: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UADDO36]], [[UV22]] + ; GFX9: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[ADD16]], [[UV23]], [[UADDO39]] + ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32) + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX9: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64) + ; GFX9: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UADDO38]], [[UV24]] + ; GFX9: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UADDE10]], [[UV25]], [[UADDO41]] + ; GFX9: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32) + ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] + ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[ASHR]], [[ASHR1]] + ; GFX9: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[XOR2]] + ; GFX9: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX9: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX9: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] + ; GFX9: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]] + ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV5]](s64) + ; GFX9: $vgpr0_vgpr1 = COPY [[COPY4]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s33) = G_TRUNC %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir @@ -369,18 +369,504 @@ ; GFX6-LABEL: name: test_srem_s64 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[COPY]], [[COPY1]] - ; GFX6: $vgpr0_vgpr1 = COPY [[SREM]](s64) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) + ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32) + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX6: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] + ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]] + ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]] + ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]] + ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]] + ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]] + ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]] + ; GFX6: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]] + ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]] + ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]] + ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]] + ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX6: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]] + ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]] + ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX6: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]] + ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]] + ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX6: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX6: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] + ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX6: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX6: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX6: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]] + ; GFX6: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] + ; GFX6: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX6: $vgpr0_vgpr1 = COPY [[MV5]](s64) ; GFX8-LABEL: name: test_srem_s64 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[COPY]], [[COPY1]] - ; GFX8: $vgpr0_vgpr1 = COPY [[SREM]](s64) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) + ; GFX8: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32) + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX8: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX8: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] + ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]] + ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]] + ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]] + ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]] + ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]] + ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]] + ; GFX8: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]] + ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]] + ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]] + ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]] + ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX8: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]] + ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]] + ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]] + ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]] + ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX8: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] + ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX8: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX8: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX8: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]] + ; GFX8: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] + ; GFX8: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX8: $vgpr0_vgpr1 = COPY [[MV5]](s64) ; GFX9-LABEL: name: test_srem_s64 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[COPY]], [[COPY1]] - ; GFX9: $vgpr0_vgpr1 = COPY [[SREM]](s64) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[COPY1]], [[C]](s32) + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) + ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX9: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX9: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] + ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]] + ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]] + ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]] + ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]] + ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]] + ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]] + ; GFX9: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]] + ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]] + ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]] + ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]] + ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX9: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]] + ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]] + ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]] + ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]] + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX9: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] + ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX9: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX9: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX9: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX9: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]] + ; GFX9: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] + ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX9: $vgpr0_vgpr1 = COPY [[MV5]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_SREM %0, %1 @@ -398,27 +884,990 @@ ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX6: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[UV]], [[UV2]] - ; GFX6: [[SREM1:%[0-9]+]]:_(s64) = G_SREM [[UV1]], [[UV3]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SREM]](s64), [[SREM1]](s64) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32) + ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32) + ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX6: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32) + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] + ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]] + ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]] + ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]] + ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]] + ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]] + ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]] + ; GFX6: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]] + ; GFX6: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX6: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] + ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE8]] + ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] + ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE8]] + ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] + ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE8]] + ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX6: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]] + ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE8]] + ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX6: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] + ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] + ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD16]] + ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] + ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] + ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD18]], [[USUBO3]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD18]] + ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX6: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] + ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX6: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] + ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] + ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]] + ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] + ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX6: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX6: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX6: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] + ; GFX6: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]] + ; GFX6: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX6: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) + ; GFX6: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) + ; GFX6: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX6: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX6: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV30]], [[UV32]] + ; GFX6: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV31]], [[UV33]], [[UADDO39]] + ; GFX6: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32) + ; GFX6: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX6: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) + ; GFX6: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] + ; GFX6: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO41]] + ; GFX6: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32) + ; GFX6: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] + ; GFX6: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] + ; GFX6: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX6: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV38]](s32) + ; GFX6: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV39]](s32) + ; GFX6: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C7]] + ; GFX6: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) + ; GFX6: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C8]] + ; GFX6: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C9]] + ; GFX6: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX6: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C10]] + ; GFX6: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX6: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) + ; GFX6: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) + ; GFX6: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX6: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX6: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV40]], [[UV42]] + ; GFX6: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV41]], [[UV43]], [[USUBO11]] + ; GFX6: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI2]] + ; GFX6: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]] + ; GFX6: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]] + ; GFX6: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]] + ; GFX6: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX6: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]] + ; GFX6: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX6: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]] + ; GFX6: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX6: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX6: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) + ; GFX6: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH16]] + ; GFX6: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) + ; GFX6: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX6: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]] + ; GFX6: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX6: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]] + ; GFX6: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX6: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) + ; GFX6: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH18]] + ; GFX6: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) + ; GFX6: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX6: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD21]] + ; GFX6: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) + ; GFX6: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]] + ; GFX6: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]] + ; GFX6: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]] + ; GFX6: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]] + ; GFX6: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO53]] + ; GFX6: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]] + ; GFX6: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO52]] + ; GFX6: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]] + ; GFX6: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE14]] + ; GFX6: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO52]] + ; GFX6: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX6: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]] + ; GFX6: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE14]], [[MUL24]] + ; GFX6: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD27]] + ; GFX6: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[MUL24]] + ; GFX6: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX6: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) + ; GFX6: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH21]] + ; GFX6: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) + ; GFX6: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX6: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE14]], [[ADD27]] + ; GFX6: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE14]], [[MUL24]] + ; GFX6: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD27]] + ; GFX6: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX6: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) + ; GFX6: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH23]] + ; GFX6: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) + ; GFX6: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX6: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD28]] + ; GFX6: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) + ; GFX6: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]] + ; GFX6: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE14]], [[ADD27]] + ; GFX6: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]] + ; GFX6: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]] + ; GFX6: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO53]] + ; GFX6: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[C6]], [[UADDO65]] + ; GFX6: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX6: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX6: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO64]] + ; GFX6: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE18]] + ; GFX6: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO64]] + ; GFX6: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX6: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) + ; GFX6: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH25]] + ; GFX6: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) + ; GFX6: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX6: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE18]] + ; GFX6: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO64]] + ; GFX6: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE18]] + ; GFX6: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX6: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) + ; GFX6: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH27]] + ; GFX6: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) + ; GFX6: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX6: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD32]] + ; GFX6: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) + ; GFX6: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]] + ; GFX6: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE18]] + ; GFX6: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]] + ; GFX6: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX6: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[UADDO74]] + ; GFX6: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV49]], [[UADDO74]] + ; GFX6: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[ADD35]] + ; GFX6: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV48]], [[UADDO74]] + ; GFX6: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX6: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]] + ; GFX6: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[MUL33]] + ; GFX6: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[ADD37]], [[USUBO13]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[ADD37]] + ; GFX6: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32) + ; GFX6: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX6: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV51]] + ; GFX6: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) + ; GFX6: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV50]] + ; GFX6: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) + ; GFX6: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV51]] + ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] + ; GFX6: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV50]] + ; GFX6: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV51]], [[USUBO13]] + ; GFX6: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]] + ; GFX6: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE20]](s32) + ; GFX6: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV51]] + ; GFX6: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) + ; GFX6: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV50]] + ; GFX6: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) + ; GFX6: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV51]] + ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] + ; GFX6: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV50]] + ; GFX6: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV51]], [[USUBO15]] + ; GFX6: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]] + ; GFX6: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO16]](s32), [[USUBE24]](s32) + ; GFX6: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] + ; GFX6: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] + ; GFX6: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] + ; GFX6: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] + ; GFX6: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX6: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX6: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX6: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV52]], [[UV54]] + ; GFX6: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV53]], [[UV55]], [[USUBO19]] + ; GFX6: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX8-LABEL: name: test_srem_v2s64 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX8: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[UV]], [[UV2]] - ; GFX8: [[SREM1:%[0-9]+]]:_(s64) = G_SREM [[UV1]], [[UV3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SREM]](s64), [[SREM1]](s64) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32) + ; GFX8: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32) + ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX8: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX8: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32) + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] + ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]] + ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]] + ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]] + ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]] + ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]] + ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]] + ; GFX8: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]] + ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] + ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE8]] + ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] + ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE8]] + ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] + ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE8]] + ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX8: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]] + ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE8]] + ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX8: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] + ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] + ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD16]] + ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] + ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] + ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD18]], [[USUBO3]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD18]] + ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX8: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] + ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] + ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] + ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]] + ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] + ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX8: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX8: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX8: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] + ; GFX8: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]] + ; GFX8: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX8: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) + ; GFX8: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) + ; GFX8: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX8: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX8: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV30]], [[UV32]] + ; GFX8: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV31]], [[UV33]], [[UADDO39]] + ; GFX8: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32) + ; GFX8: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX8: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) + ; GFX8: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] + ; GFX8: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO41]] + ; GFX8: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32) + ; GFX8: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] + ; GFX8: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] + ; GFX8: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX8: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV38]](s32) + ; GFX8: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV39]](s32) + ; GFX8: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C7]] + ; GFX8: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) + ; GFX8: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C8]] + ; GFX8: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C9]] + ; GFX8: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX8: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C10]] + ; GFX8: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX8: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) + ; GFX8: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) + ; GFX8: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX8: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX8: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV40]], [[UV42]] + ; GFX8: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV41]], [[UV43]], [[USUBO11]] + ; GFX8: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI2]] + ; GFX8: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]] + ; GFX8: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]] + ; GFX8: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]] + ; GFX8: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX8: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]] + ; GFX8: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX8: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]] + ; GFX8: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX8: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX8: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) + ; GFX8: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH16]] + ; GFX8: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) + ; GFX8: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX8: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]] + ; GFX8: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX8: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]] + ; GFX8: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX8: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) + ; GFX8: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH18]] + ; GFX8: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) + ; GFX8: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX8: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD21]] + ; GFX8: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) + ; GFX8: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]] + ; GFX8: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]] + ; GFX8: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]] + ; GFX8: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]] + ; GFX8: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO53]] + ; GFX8: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]] + ; GFX8: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO52]] + ; GFX8: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]] + ; GFX8: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE14]] + ; GFX8: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO52]] + ; GFX8: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX8: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]] + ; GFX8: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE14]], [[MUL24]] + ; GFX8: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD27]] + ; GFX8: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[MUL24]] + ; GFX8: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX8: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) + ; GFX8: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH21]] + ; GFX8: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) + ; GFX8: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX8: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE14]], [[ADD27]] + ; GFX8: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE14]], [[MUL24]] + ; GFX8: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD27]] + ; GFX8: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX8: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) + ; GFX8: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH23]] + ; GFX8: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) + ; GFX8: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX8: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD28]] + ; GFX8: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) + ; GFX8: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]] + ; GFX8: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE14]], [[ADD27]] + ; GFX8: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]] + ; GFX8: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]] + ; GFX8: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO53]] + ; GFX8: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[C6]], [[UADDO65]] + ; GFX8: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX8: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX8: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO64]] + ; GFX8: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE18]] + ; GFX8: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO64]] + ; GFX8: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX8: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) + ; GFX8: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH25]] + ; GFX8: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) + ; GFX8: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX8: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE18]] + ; GFX8: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO64]] + ; GFX8: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE18]] + ; GFX8: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX8: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) + ; GFX8: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH27]] + ; GFX8: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) + ; GFX8: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX8: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD32]] + ; GFX8: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) + ; GFX8: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]] + ; GFX8: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE18]] + ; GFX8: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]] + ; GFX8: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX8: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[UADDO74]] + ; GFX8: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV49]], [[UADDO74]] + ; GFX8: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[ADD35]] + ; GFX8: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV48]], [[UADDO74]] + ; GFX8: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX8: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]] + ; GFX8: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[MUL33]] + ; GFX8: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[ADD37]], [[USUBO13]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[ADD37]] + ; GFX8: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32) + ; GFX8: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX8: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV51]] + ; GFX8: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) + ; GFX8: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV50]] + ; GFX8: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) + ; GFX8: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV51]] + ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] + ; GFX8: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV50]] + ; GFX8: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV51]], [[USUBO13]] + ; GFX8: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]] + ; GFX8: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE20]](s32) + ; GFX8: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV51]] + ; GFX8: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) + ; GFX8: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV50]] + ; GFX8: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) + ; GFX8: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV51]] + ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] + ; GFX8: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV50]] + ; GFX8: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV51]], [[USUBO15]] + ; GFX8: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]] + ; GFX8: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO16]](s32), [[USUBE24]](s32) + ; GFX8: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] + ; GFX8: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] + ; GFX8: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] + ; GFX8: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] + ; GFX8: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX8: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX8: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX8: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV52]], [[UV54]] + ; GFX8: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV53]], [[UV55]], [[USUBO19]] + ; GFX8: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) ; GFX9-LABEL: name: test_srem_v2s64 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[UV]], [[UV2]] - ; GFX9: [[SREM1:%[0-9]+]]:_(s64) = G_SREM [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SREM]](s64), [[SREM1]](s64) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV2]], [[C]](s32) + ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) + ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO1]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV2]](s64) + ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV8]], [[UV10]] + ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV9]], [[UV11]], [[UADDO3]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX9: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX9: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV12]](s32) + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV13]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[UV16]] + ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[UV17]], [[USUBO1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]] + ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]] + ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]] + ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]] + ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]] + ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]] + ; GFX9: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]] + ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDO26]] + ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV20]], [[UADDE8]] + ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDO26]] + ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV21]], [[UADDE8]] + ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDO26]] + ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV20]], [[UADDE8]] + ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX9: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]] + ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV21]], [[UADDE8]] + ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX9: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[UADDO36]] + ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV23]], [[UADDO36]] + ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV22]], [[ADD16]] + ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV22]], [[UADDO36]] + ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV18]], [[MUL15]] + ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV19]], [[ADD18]], [[USUBO3]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV19]], [[ADD18]] + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX9: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV25]] + ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV24]] + ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV25]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV24]] + ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV25]], [[USUBO3]] + ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV25]] + ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV24]] + ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV25]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV24]] + ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV25]], [[USUBO5]] + ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX9: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX9: [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX9: [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX9: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV26]], [[UV28]] + ; GFX9: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV27]], [[UV29]], [[USUBO9]] + ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX9: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) + ; GFX9: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C]](s32) + ; GFX9: [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) + ; GFX9: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX9: [[UADDO38:%[0-9]+]]:_(s32), [[UADDO39:%[0-9]+]]:_(s1) = G_UADDO [[UV30]], [[UV32]] + ; GFX9: [[UADDE10:%[0-9]+]]:_(s32), [[UADDE11:%[0-9]+]]:_(s1) = G_UADDE [[UV31]], [[UV33]], [[UADDO39]] + ; GFX9: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO38]](s32), [[UADDE10]](s32) + ; GFX9: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV3]](s64) + ; GFX9: [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR3]](s64) + ; GFX9: [[UADDO40:%[0-9]+]]:_(s32), [[UADDO41:%[0-9]+]]:_(s1) = G_UADDO [[UV34]], [[UV36]] + ; GFX9: [[UADDE12:%[0-9]+]]:_(s32), [[UADDE13:%[0-9]+]]:_(s1) = G_UADDE [[UV35]], [[UV37]], [[UADDO41]] + ; GFX9: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO40]](s32), [[UADDE12]](s32) + ; GFX9: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[MV6]], [[ASHR2]] + ; GFX9: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[MV7]], [[ASHR3]] + ; GFX9: [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX9: [[UITOFP2:%[0-9]+]]:_(s32) = G_UITOFP [[UV38]](s32) + ; GFX9: [[UITOFP3:%[0-9]+]]:_(s32) = G_UITOFP [[UV39]](s32) + ; GFX9: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL4:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP3]], [[C7]] + ; GFX9: [[FADD2:%[0-9]+]]:_(s32) = G_FADD [[FMUL4]], [[UITOFP2]] + ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD2]](s32) + ; GFX9: [[C8:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9: [[FMUL5:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C8]] + ; GFX9: [[C9:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9: [[FMUL6:%[0-9]+]]:_(s32) = G_FMUL [[FMUL5]], [[C9]] + ; GFX9: [[INTRINSIC_TRUNC1:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL6]] + ; GFX9: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9: [[FMUL7:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC1]], [[C10]] + ; GFX9: [[FADD3:%[0-9]+]]:_(s32) = G_FADD [[FMUL7]], [[FMUL5]] + ; GFX9: [[FPTOUI2:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD3]](s32) + ; GFX9: [[FPTOUI3:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC1]](s32) + ; GFX9: [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX9: [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX9: [[USUBO10:%[0-9]+]]:_(s32), [[USUBO11:%[0-9]+]]:_(s1) = G_USUBO [[UV40]], [[UV42]] + ; GFX9: [[USUBE14:%[0-9]+]]:_(s32), [[USUBE15:%[0-9]+]]:_(s1) = G_USUBE [[UV41]], [[UV43]], [[USUBO11]] + ; GFX9: [[MUL18:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI2]] + ; GFX9: [[MUL19:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[FPTOUI2]] + ; GFX9: [[MUL20:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[FPTOUI3]] + ; GFX9: [[UMULH15:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[FPTOUI2]] + ; GFX9: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[MUL19]], [[MUL20]] + ; GFX9: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[ADD19]], [[UMULH15]] + ; GFX9: [[MUL21:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[MUL18]] + ; GFX9: [[MUL22:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI2]], [[ADD20]] + ; GFX9: [[UMULH16:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[MUL18]] + ; GFX9: [[UADDO42:%[0-9]+]]:_(s32), [[UADDO43:%[0-9]+]]:_(s1) = G_UADDO [[MUL21]], [[MUL22]] + ; GFX9: [[ZEXT15:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO43]](s1) + ; GFX9: [[UADDO44:%[0-9]+]]:_(s32), [[UADDO45:%[0-9]+]]:_(s1) = G_UADDO [[UADDO42]], [[UMULH16]] + ; GFX9: [[ZEXT16:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO45]](s1) + ; GFX9: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[ZEXT15]], [[ZEXT16]] + ; GFX9: [[MUL23:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI3]], [[ADD20]] + ; GFX9: [[UMULH17:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[MUL18]] + ; GFX9: [[UMULH18:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI2]], [[ADD20]] + ; GFX9: [[UADDO46:%[0-9]+]]:_(s32), [[UADDO47:%[0-9]+]]:_(s1) = G_UADDO [[MUL23]], [[UMULH17]] + ; GFX9: [[ZEXT17:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO47]](s1) + ; GFX9: [[UADDO48:%[0-9]+]]:_(s32), [[UADDO49:%[0-9]+]]:_(s1) = G_UADDO [[UADDO46]], [[UMULH18]] + ; GFX9: [[ZEXT18:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO49]](s1) + ; GFX9: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[ZEXT17]], [[ZEXT18]] + ; GFX9: [[UADDO50:%[0-9]+]]:_(s32), [[UADDO51:%[0-9]+]]:_(s1) = G_UADDO [[UADDO48]], [[ADD21]] + ; GFX9: [[ZEXT19:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO51]](s1) + ; GFX9: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[ADD22]], [[ZEXT19]] + ; GFX9: [[UMULH19:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI3]], [[ADD20]] + ; GFX9: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UMULH19]], [[ADD23]] + ; GFX9: [[UADDO52:%[0-9]+]]:_(s32), [[UADDO53:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI2]], [[UADDO50]] + ; GFX9: [[UADDE14:%[0-9]+]]:_(s32), [[UADDE15:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI3]], [[ADD24]], [[UADDO53]] + ; GFX9: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI3]], [[ADD24]] + ; GFX9: [[MUL24:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDO52]] + ; GFX9: [[MUL25:%[0-9]+]]:_(s32) = G_MUL [[USUBE14]], [[UADDO52]] + ; GFX9: [[MUL26:%[0-9]+]]:_(s32) = G_MUL [[USUBO10]], [[UADDE14]] + ; GFX9: [[UMULH20:%[0-9]+]]:_(s32) = G_UMULH [[USUBO10]], [[UADDO52]] + ; GFX9: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[MUL25]], [[MUL26]] + ; GFX9: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[ADD26]], [[UMULH20]] + ; GFX9: [[MUL27:%[0-9]+]]:_(s32) = G_MUL [[UADDE14]], [[MUL24]] + ; GFX9: [[MUL28:%[0-9]+]]:_(s32) = G_MUL [[UADDO52]], [[ADD27]] + ; GFX9: [[UMULH21:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[MUL24]] + ; GFX9: [[UADDO54:%[0-9]+]]:_(s32), [[UADDO55:%[0-9]+]]:_(s1) = G_UADDO [[MUL27]], [[MUL28]] + ; GFX9: [[ZEXT20:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO55]](s1) + ; GFX9: [[UADDO56:%[0-9]+]]:_(s32), [[UADDO57:%[0-9]+]]:_(s1) = G_UADDO [[UADDO54]], [[UMULH21]] + ; GFX9: [[ZEXT21:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO57]](s1) + ; GFX9: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[ZEXT20]], [[ZEXT21]] + ; GFX9: [[MUL29:%[0-9]+]]:_(s32) = G_MUL [[UADDE14]], [[ADD27]] + ; GFX9: [[UMULH22:%[0-9]+]]:_(s32) = G_UMULH [[UADDE14]], [[MUL24]] + ; GFX9: [[UMULH23:%[0-9]+]]:_(s32) = G_UMULH [[UADDO52]], [[ADD27]] + ; GFX9: [[UADDO58:%[0-9]+]]:_(s32), [[UADDO59:%[0-9]+]]:_(s1) = G_UADDO [[MUL29]], [[UMULH22]] + ; GFX9: [[ZEXT22:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO59]](s1) + ; GFX9: [[UADDO60:%[0-9]+]]:_(s32), [[UADDO61:%[0-9]+]]:_(s1) = G_UADDO [[UADDO58]], [[UMULH23]] + ; GFX9: [[ZEXT23:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO61]](s1) + ; GFX9: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[ZEXT22]], [[ZEXT23]] + ; GFX9: [[UADDO62:%[0-9]+]]:_(s32), [[UADDO63:%[0-9]+]]:_(s1) = G_UADDO [[UADDO60]], [[ADD28]] + ; GFX9: [[ZEXT24:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO63]](s1) + ; GFX9: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[ADD29]], [[ZEXT24]] + ; GFX9: [[UMULH24:%[0-9]+]]:_(s32) = G_UMULH [[UADDE14]], [[ADD27]] + ; GFX9: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UMULH24]], [[ADD30]] + ; GFX9: [[UADDO64:%[0-9]+]]:_(s32), [[UADDO65:%[0-9]+]]:_(s1) = G_UADDO [[UADDO52]], [[UADDO62]] + ; GFX9: [[UADDE16:%[0-9]+]]:_(s32), [[UADDE17:%[0-9]+]]:_(s1) = G_UADDE [[ADD25]], [[ADD31]], [[UADDO53]] + ; GFX9: [[UADDE18:%[0-9]+]]:_(s32), [[UADDE19:%[0-9]+]]:_(s1) = G_UADDE [[UADDE16]], [[C6]], [[UADDO65]] + ; GFX9: [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX9: [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR3]](s64) + ; GFX9: [[MUL30:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDO64]] + ; GFX9: [[MUL31:%[0-9]+]]:_(s32) = G_MUL [[UV46]], [[UADDE18]] + ; GFX9: [[UMULH25:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDO64]] + ; GFX9: [[UADDO66:%[0-9]+]]:_(s32), [[UADDO67:%[0-9]+]]:_(s1) = G_UADDO [[MUL30]], [[MUL31]] + ; GFX9: [[ZEXT25:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO67]](s1) + ; GFX9: [[UADDO68:%[0-9]+]]:_(s32), [[UADDO69:%[0-9]+]]:_(s1) = G_UADDO [[UADDO66]], [[UMULH25]] + ; GFX9: [[ZEXT26:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO69]](s1) + ; GFX9: [[ADD32:%[0-9]+]]:_(s32) = G_ADD [[ZEXT25]], [[ZEXT26]] + ; GFX9: [[MUL32:%[0-9]+]]:_(s32) = G_MUL [[UV47]], [[UADDE18]] + ; GFX9: [[UMULH26:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDO64]] + ; GFX9: [[UMULH27:%[0-9]+]]:_(s32) = G_UMULH [[UV46]], [[UADDE18]] + ; GFX9: [[UADDO70:%[0-9]+]]:_(s32), [[UADDO71:%[0-9]+]]:_(s1) = G_UADDO [[MUL32]], [[UMULH26]] + ; GFX9: [[ZEXT27:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO71]](s1) + ; GFX9: [[UADDO72:%[0-9]+]]:_(s32), [[UADDO73:%[0-9]+]]:_(s1) = G_UADDO [[UADDO70]], [[UMULH27]] + ; GFX9: [[ZEXT28:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO73]](s1) + ; GFX9: [[ADD33:%[0-9]+]]:_(s32) = G_ADD [[ZEXT27]], [[ZEXT28]] + ; GFX9: [[UADDO74:%[0-9]+]]:_(s32), [[UADDO75:%[0-9]+]]:_(s1) = G_UADDO [[UADDO72]], [[ADD32]] + ; GFX9: [[ZEXT29:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO75]](s1) + ; GFX9: [[ADD34:%[0-9]+]]:_(s32) = G_ADD [[ADD33]], [[ZEXT29]] + ; GFX9: [[UMULH28:%[0-9]+]]:_(s32) = G_UMULH [[UV47]], [[UADDE18]] + ; GFX9: [[ADD35:%[0-9]+]]:_(s32) = G_ADD [[UMULH28]], [[ADD34]] + ; GFX9: [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX9: [[MUL33:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[UADDO74]] + ; GFX9: [[MUL34:%[0-9]+]]:_(s32) = G_MUL [[UV49]], [[UADDO74]] + ; GFX9: [[MUL35:%[0-9]+]]:_(s32) = G_MUL [[UV48]], [[ADD35]] + ; GFX9: [[UMULH29:%[0-9]+]]:_(s32) = G_UMULH [[UV48]], [[UADDO74]] + ; GFX9: [[ADD36:%[0-9]+]]:_(s32) = G_ADD [[MUL34]], [[MUL35]] + ; GFX9: [[ADD37:%[0-9]+]]:_(s32) = G_ADD [[ADD36]], [[UMULH29]] + ; GFX9: [[USUBO12:%[0-9]+]]:_(s32), [[USUBO13:%[0-9]+]]:_(s1) = G_USUBO [[UV44]], [[MUL33]] + ; GFX9: [[USUBE16:%[0-9]+]]:_(s32), [[USUBE17:%[0-9]+]]:_(s1) = G_USUBE [[UV45]], [[ADD37]], [[USUBO13]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV45]], [[ADD37]] + ; GFX9: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO12]](s32), [[USUBE16]](s32) + ; GFX9: [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR4]](s64) + ; GFX9: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE16]](s32), [[UV51]] + ; GFX9: [[SEXT4:%[0-9]+]]:_(s32) = G_SEXT [[ICMP8]](s1) + ; GFX9: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO12]](s32), [[UV50]] + ; GFX9: [[SEXT5:%[0-9]+]]:_(s32) = G_SEXT [[ICMP9]](s1) + ; GFX9: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE16]](s32), [[UV51]] + ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP10]](s1), [[SEXT5]], [[SEXT4]] + ; GFX9: [[USUBO14:%[0-9]+]]:_(s32), [[USUBO15:%[0-9]+]]:_(s1) = G_USUBO [[USUBO12]], [[UV50]] + ; GFX9: [[USUBE18:%[0-9]+]]:_(s32), [[USUBE19:%[0-9]+]]:_(s1) = G_USUBE [[SUB1]], [[UV51]], [[USUBO13]] + ; GFX9: [[USUBE20:%[0-9]+]]:_(s32), [[USUBE21:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[C6]], [[USUBO15]] + ; GFX9: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO14]](s32), [[USUBE20]](s32) + ; GFX9: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE20]](s32), [[UV51]] + ; GFX9: [[SEXT6:%[0-9]+]]:_(s32) = G_SEXT [[ICMP11]](s1) + ; GFX9: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO14]](s32), [[UV50]] + ; GFX9: [[SEXT7:%[0-9]+]]:_(s32) = G_SEXT [[ICMP12]](s1) + ; GFX9: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE20]](s32), [[UV51]] + ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP13]](s1), [[SEXT7]], [[SEXT6]] + ; GFX9: [[USUBO16:%[0-9]+]]:_(s32), [[USUBO17:%[0-9]+]]:_(s1) = G_USUBO [[USUBO14]], [[UV50]] + ; GFX9: [[USUBE22:%[0-9]+]]:_(s32), [[USUBE23:%[0-9]+]]:_(s1) = G_USUBE [[USUBE18]], [[UV51]], [[USUBO15]] + ; GFX9: [[USUBE24:%[0-9]+]]:_(s32), [[USUBE25:%[0-9]+]]:_(s1) = G_USUBE [[USUBE22]], [[C6]], [[USUBO17]] + ; GFX9: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO16]](s32), [[USUBE24]](s32) + ; GFX9: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT5]](s32), [[C6]] + ; GFX9: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP14]](s1), [[MV10]], [[MV9]] + ; GFX9: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT4]](s32), [[C6]] + ; GFX9: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP15]](s1), [[SELECT6]], [[MV8]] + ; GFX9: [[XOR5:%[0-9]+]]:_(s64) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX9: [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR5]](s64) + ; GFX9: [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR2]](s64) + ; GFX9: [[USUBO18:%[0-9]+]]:_(s32), [[USUBO19:%[0-9]+]]:_(s1) = G_USUBO [[UV52]], [[UV54]] + ; GFX9: [[USUBE26:%[0-9]+]]:_(s32), [[USUBE27:%[0-9]+]]:_(s1) = G_USUBE [[UV53]], [[UV55]], [[USUBO19]] + ; GFX9: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO18]](s32), [[USUBE26]](s32) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV5]](s64), [[MV11]](s64) ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7 @@ -1153,36 +2602,519 @@ ; GFX6-LABEL: name: test_srem_s33 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) - ; GFX6: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) - ; GFX6: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s33) - ; GFX6: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC1]](s33) - ; GFX6: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[SEXT]], [[SEXT1]] - ; GFX6: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[SREM]](s64) - ; GFX6: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) - ; GFX6: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX6: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY2]], 33 + ; GFX6: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY3]], 33 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) + ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX6: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX6: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX6: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX6: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX6: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX6: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX6: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] + ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX6: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX6: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX6: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX6: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX6: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX6: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX6: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX6: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX6: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX6: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX6: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX6: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX6: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX6: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]] + ; GFX6: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX6: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX6: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX6: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX6: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX6: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]] + ; GFX6: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX6: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]] + ; GFX6: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX6: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX6: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX6: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX6: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX6: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]] + ; GFX6: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX6: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX6: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]] + ; GFX6: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX6: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]] + ; GFX6: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]] + ; GFX6: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX6: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX6: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX6: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]] + ; GFX6: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX6: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX6: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX6: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX6: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX6: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX6: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]] + ; GFX6: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX6: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]] + ; GFX6: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX6: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX6: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX6: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX6: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX6: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]] + ; GFX6: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX6: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX6: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]] + ; GFX6: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX6: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX6: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX6: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]] + ; GFX6: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX6: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX6: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX6: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]] + ; GFX6: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX6: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX6: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX6: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX6: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX6: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX6: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX6: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX6: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] + ; GFX6: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX6: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX6: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX6: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX6: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]] + ; GFX6: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] + ; GFX6: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX6: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV5]](s64) + ; GFX6: $vgpr0_vgpr1 = COPY [[COPY4]](s64) ; GFX8-LABEL: name: test_srem_s33 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) - ; GFX8: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) - ; GFX8: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s33) - ; GFX8: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC1]](s33) - ; GFX8: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[SEXT]], [[SEXT1]] - ; GFX8: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[SREM]](s64) - ; GFX8: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) - ; GFX8: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX8: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY2]], 33 + ; GFX8: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY3]], 33 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX8: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) + ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX8: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX8: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX8: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX8: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX8: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX8: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX8: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX8: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] + ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX8: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX8: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX8: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX8: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX8: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX8: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX8: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX8: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX8: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX8: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX8: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX8: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX8: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX8: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]] + ; GFX8: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX8: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX8: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX8: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX8: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX8: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]] + ; GFX8: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX8: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]] + ; GFX8: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX8: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX8: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX8: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX8: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX8: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]] + ; GFX8: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX8: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX8: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]] + ; GFX8: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX8: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]] + ; GFX8: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]] + ; GFX8: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX8: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX8: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]] + ; GFX8: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX8: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX8: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX8: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX8: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX8: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX8: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]] + ; GFX8: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX8: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]] + ; GFX8: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX8: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX8: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX8: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX8: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX8: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]] + ; GFX8: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX8: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX8: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]] + ; GFX8: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX8: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX8: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX8: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]] + ; GFX8: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX8: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX8: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX8: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]] + ; GFX8: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX8: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX8: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX8: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX8: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX8: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX8: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX8: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX8: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] + ; GFX8: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX8: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX8: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX8: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX8: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]] + ; GFX8: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] + ; GFX8: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX8: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV5]](s64) + ; GFX8: $vgpr0_vgpr1 = COPY [[COPY4]](s64) ; GFX9-LABEL: name: test_srem_s33 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) - ; GFX9: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s33) - ; GFX9: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC1]](s33) - ; GFX9: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[SEXT]], [[SEXT1]] - ; GFX9: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[SREM]](s64) - ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) - ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) + ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) + ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY2]], 33 + ; GFX9: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) + ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY3]], 33 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG]], [[C]](s32) + ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SEXT_INREG1]], [[C]](s32) + ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG]](s64) + ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX9: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT_INREG1]](s64) + ; GFX9: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64) + ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV4]], [[UV6]] + ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV5]], [[UV7]], [[UADDO3]] + ; GFX9: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32) + ; GFX9: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX9: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]] + ; GFX9: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV8]](s32) + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV9]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[UITOFP1]], [[C1]] + ; GFX9: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UITOFP]] + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[FADD]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x43EFFFFF80000000 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x3DF0000000000000 + ; GFX9: [[FMUL2:%[0-9]+]]:_(s32) = G_FMUL [[FMUL1]], [[C3]] + ; GFX9: [[INTRINSIC_TRUNC:%[0-9]+]]:_(s32) = G_INTRINSIC_TRUNC [[FMUL2]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0xC1F0000000000000 + ; GFX9: [[FMUL3:%[0-9]+]]:_(s32) = G_FMUL [[INTRINSIC_TRUNC]], [[C4]] + ; GFX9: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[FMUL3]], [[FMUL1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FADD1]](s32) + ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[INTRINSIC_TRUNC]](s32) + ; GFX9: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64) + ; GFX9: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV10]], [[UV12]] + ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV11]], [[UV13]], [[USUBO1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[FPTOUI]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[FPTOUI1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[MUL]] + ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[ADD1]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[UADDO4:%[0-9]+]]:_(s32), [[UADDO5:%[0-9]+]]:_(s1) = G_UADDO [[MUL3]], [[MUL4]] + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO5]](s1) + ; GFX9: [[UADDO6:%[0-9]+]]:_(s32), [[UADDO7:%[0-9]+]]:_(s1) = G_UADDO [[UADDO4]], [[UMULH1]] + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO7]](s1) + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]] + ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[ADD1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[ADD1]] + ; GFX9: [[UADDO8:%[0-9]+]]:_(s32), [[UADDO9:%[0-9]+]]:_(s1) = G_UADDO [[MUL5]], [[UMULH2]] + ; GFX9: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO9]](s1) + ; GFX9: [[UADDO10:%[0-9]+]]:_(s32), [[UADDO11:%[0-9]+]]:_(s1) = G_UADDO [[UADDO8]], [[UMULH3]] + ; GFX9: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO11]](s1) + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ZEXT2]], [[ZEXT3]] + ; GFX9: [[UADDO12:%[0-9]+]]:_(s32), [[UADDO13:%[0-9]+]]:_(s1) = G_UADDO [[UADDO10]], [[ADD2]] + ; GFX9: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO13]](s1) + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[ZEXT4]] + ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[ADD1]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UMULH4]], [[ADD4]] + ; GFX9: [[UADDO14:%[0-9]+]]:_(s32), [[UADDO15:%[0-9]+]]:_(s1) = G_UADDO [[FPTOUI]], [[UADDO12]] + ; GFX9: [[UADDE4:%[0-9]+]]:_(s32), [[UADDE5:%[0-9]+]]:_(s1) = G_UADDE [[FPTOUI1]], [[ADD5]], [[UADDO15]] + ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[ADD5]] + ; GFX9: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDO14]] + ; GFX9: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[USUBE]], [[UADDO14]] + ; GFX9: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[USUBO]], [[UADDE4]] + ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[USUBO]], [[UADDO14]] + ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[MUL7]], [[MUL8]] + ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[UMULH5]] + ; GFX9: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[MUL6]] + ; GFX9: [[MUL10:%[0-9]+]]:_(s32) = G_MUL [[UADDO14]], [[ADD8]] + ; GFX9: [[UMULH6:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[MUL6]] + ; GFX9: [[UADDO16:%[0-9]+]]:_(s32), [[UADDO17:%[0-9]+]]:_(s1) = G_UADDO [[MUL9]], [[MUL10]] + ; GFX9: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO17]](s1) + ; GFX9: [[UADDO18:%[0-9]+]]:_(s32), [[UADDO19:%[0-9]+]]:_(s1) = G_UADDO [[UADDO16]], [[UMULH6]] + ; GFX9: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO19]](s1) + ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ZEXT5]], [[ZEXT6]] + ; GFX9: [[MUL11:%[0-9]+]]:_(s32) = G_MUL [[UADDE4]], [[ADD8]] + ; GFX9: [[UMULH7:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[MUL6]] + ; GFX9: [[UMULH8:%[0-9]+]]:_(s32) = G_UMULH [[UADDO14]], [[ADD8]] + ; GFX9: [[UADDO20:%[0-9]+]]:_(s32), [[UADDO21:%[0-9]+]]:_(s1) = G_UADDO [[MUL11]], [[UMULH7]] + ; GFX9: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO21]](s1) + ; GFX9: [[UADDO22:%[0-9]+]]:_(s32), [[UADDO23:%[0-9]+]]:_(s1) = G_UADDO [[UADDO20]], [[UMULH8]] + ; GFX9: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO23]](s1) + ; GFX9: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ZEXT7]], [[ZEXT8]] + ; GFX9: [[UADDO24:%[0-9]+]]:_(s32), [[UADDO25:%[0-9]+]]:_(s1) = G_UADDO [[UADDO22]], [[ADD9]] + ; GFX9: [[ZEXT9:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO25]](s1) + ; GFX9: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD10]], [[ZEXT9]] + ; GFX9: [[UMULH9:%[0-9]+]]:_(s32) = G_UMULH [[UADDE4]], [[ADD8]] + ; GFX9: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UMULH9]], [[ADD11]] + ; GFX9: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[UADDO26:%[0-9]+]]:_(s32), [[UADDO27:%[0-9]+]]:_(s1) = G_UADDO [[UADDO14]], [[UADDO24]] + ; GFX9: [[UADDE6:%[0-9]+]]:_(s32), [[UADDE7:%[0-9]+]]:_(s1) = G_UADDE [[ADD6]], [[ADD12]], [[UADDO15]] + ; GFX9: [[UADDE8:%[0-9]+]]:_(s32), [[UADDE9:%[0-9]+]]:_(s1) = G_UADDE [[UADDE6]], [[C6]], [[UADDO27]] + ; GFX9: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR]](s64) + ; GFX9: [[MUL12:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDO26]] + ; GFX9: [[MUL13:%[0-9]+]]:_(s32) = G_MUL [[UV16]], [[UADDE8]] + ; GFX9: [[UMULH10:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDO26]] + ; GFX9: [[UADDO28:%[0-9]+]]:_(s32), [[UADDO29:%[0-9]+]]:_(s1) = G_UADDO [[MUL12]], [[MUL13]] + ; GFX9: [[ZEXT10:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO29]](s1) + ; GFX9: [[UADDO30:%[0-9]+]]:_(s32), [[UADDO31:%[0-9]+]]:_(s1) = G_UADDO [[UADDO28]], [[UMULH10]] + ; GFX9: [[ZEXT11:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO31]](s1) + ; GFX9: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ZEXT10]], [[ZEXT11]] + ; GFX9: [[MUL14:%[0-9]+]]:_(s32) = G_MUL [[UV17]], [[UADDE8]] + ; GFX9: [[UMULH11:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDO26]] + ; GFX9: [[UMULH12:%[0-9]+]]:_(s32) = G_UMULH [[UV16]], [[UADDE8]] + ; GFX9: [[UADDO32:%[0-9]+]]:_(s32), [[UADDO33:%[0-9]+]]:_(s1) = G_UADDO [[MUL14]], [[UMULH11]] + ; GFX9: [[ZEXT12:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO33]](s1) + ; GFX9: [[UADDO34:%[0-9]+]]:_(s32), [[UADDO35:%[0-9]+]]:_(s1) = G_UADDO [[UADDO32]], [[UMULH12]] + ; GFX9: [[ZEXT13:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO35]](s1) + ; GFX9: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ZEXT12]], [[ZEXT13]] + ; GFX9: [[UADDO36:%[0-9]+]]:_(s32), [[UADDO37:%[0-9]+]]:_(s1) = G_UADDO [[UADDO34]], [[ADD13]] + ; GFX9: [[ZEXT14:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO37]](s1) + ; GFX9: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[ZEXT14]] + ; GFX9: [[UMULH13:%[0-9]+]]:_(s32) = G_UMULH [[UV17]], [[UADDE8]] + ; GFX9: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UMULH13]], [[ADD15]] + ; GFX9: [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[MUL15:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[UADDO36]] + ; GFX9: [[MUL16:%[0-9]+]]:_(s32) = G_MUL [[UV19]], [[UADDO36]] + ; GFX9: [[MUL17:%[0-9]+]]:_(s32) = G_MUL [[UV18]], [[ADD16]] + ; GFX9: [[UMULH14:%[0-9]+]]:_(s32) = G_UMULH [[UV18]], [[UADDO36]] + ; GFX9: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[MUL16]], [[MUL17]] + ; GFX9: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[UMULH14]] + ; GFX9: [[USUBO2:%[0-9]+]]:_(s32), [[USUBO3:%[0-9]+]]:_(s1) = G_USUBO [[UV14]], [[MUL15]] + ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV15]], [[ADD18]], [[USUBO3]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UV15]], [[ADD18]] + ; GFX9: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO2]](s32), [[USUBE2]](s32) + ; GFX9: [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR1]](s64) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE2]](s32), [[UV21]] + ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ICMP]](s1) + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO2]](s32), [[UV20]] + ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[ICMP1]](s1) + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE2]](s32), [[UV21]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SEXT1]], [[SEXT]] + ; GFX9: [[USUBO4:%[0-9]+]]:_(s32), [[USUBO5:%[0-9]+]]:_(s1) = G_USUBO [[USUBO2]], [[UV20]] + ; GFX9: [[USUBE4:%[0-9]+]]:_(s32), [[USUBE5:%[0-9]+]]:_(s1) = G_USUBE [[SUB]], [[UV21]], [[USUBO3]] + ; GFX9: [[USUBE6:%[0-9]+]]:_(s32), [[USUBE7:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[C6]], [[USUBO5]] + ; GFX9: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO4]](s32), [[USUBE6]](s32) + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBE6]](s32), [[UV21]] + ; GFX9: [[SEXT2:%[0-9]+]]:_(s32) = G_SEXT [[ICMP3]](s1) + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[USUBO4]](s32), [[UV20]] + ; GFX9: [[SEXT3:%[0-9]+]]:_(s32) = G_SEXT [[ICMP4]](s1) + ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[USUBE6]](s32), [[UV21]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SEXT3]], [[SEXT2]] + ; GFX9: [[USUBO6:%[0-9]+]]:_(s32), [[USUBO7:%[0-9]+]]:_(s1) = G_USUBO [[USUBO4]], [[UV20]] + ; GFX9: [[USUBE8:%[0-9]+]]:_(s32), [[USUBE9:%[0-9]+]]:_(s1) = G_USUBE [[USUBE4]], [[UV21]], [[USUBO5]] + ; GFX9: [[USUBE10:%[0-9]+]]:_(s32), [[USUBE11:%[0-9]+]]:_(s1) = G_USUBE [[USUBE8]], [[C6]], [[USUBO7]] + ; GFX9: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO6]](s32), [[USUBE10]](s32) + ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT1]](s32), [[C6]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[MV4]], [[MV3]] + ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SELECT]](s32), [[C6]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[SELECT2]], [[MV2]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX9: [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[XOR2]](s64) + ; GFX9: [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX9: [[USUBO8:%[0-9]+]]:_(s32), [[USUBO9:%[0-9]+]]:_(s1) = G_USUBO [[UV22]], [[UV24]] + ; GFX9: [[USUBE12:%[0-9]+]]:_(s32), [[USUBE13:%[0-9]+]]:_(s1) = G_USUBE [[UV23]], [[UV25]], [[USUBO9]] + ; GFX9: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[USUBO8]](s32), [[USUBE12]](s32) + ; GFX9: [[COPY4:%[0-9]+]]:_(s64) = COPY [[MV5]](s64) + ; GFX9: $vgpr0_vgpr1 = COPY [[COPY4]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s33) = G_TRUNC %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -0,0 +1,3783 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s + +; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. + +define i64 @v_sdiv_i64(i64 %num, i64 %den) { +; CHECK-LABEL: v_sdiv_i64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_or_b32_e32 v5, v1, v3 +; CHECK-NEXT: v_mov_b32_e32 v4, 0 +; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] +; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5 +; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; CHECK-NEXT: s_cbranch_execz BB0_2 +; CHECK-NEXT: ; %bb.1: +; CHECK-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; CHECK-NEXT: v_ashrrev_i32_e32 v5, 31, v3 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v0, v4 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v2, v5 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc +; CHECK-NEXT: v_xor_b32_e32 v8, v4, v5 +; CHECK-NEXT: v_xor_b32_e32 v6, v6, v4 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v4 +; CHECK-NEXT: v_xor_b32_e32 v4, v7, v5 +; CHECK-NEXT: v_xor_b32_e32 v3, v3, v5 +; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v4 +; CHECK-NEXT: v_cvt_f32_u32_e32 v7, v3 +; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v4 +; CHECK-NEXT: v_subb_u32_e32 v10, vcc, 0, v3, vcc +; CHECK-NEXT: v_mac_f32_e32 v5, 0x4f800000, v7 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; CHECK-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CHECK-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 +; CHECK-NEXT: v_trunc_f32_e32 v7, v7 +; CHECK-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 +; CHECK-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CHECK-NEXT: v_mul_lo_u32 v11, v9, v7 +; CHECK-NEXT: v_mul_lo_u32 v12, v9, v5 +; CHECK-NEXT: v_mul_lo_u32 v13, v10, v5 +; CHECK-NEXT: v_mul_hi_u32 v14, v9, v5 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CHECK-NEXT: v_mul_lo_u32 v13, v7, v12 +; CHECK-NEXT: v_mul_hi_u32 v15, v5, v12 +; CHECK-NEXT: v_mul_hi_u32 v12, v7, v12 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, v11, v14 +; CHECK-NEXT: v_mul_lo_u32 v14, v5, v11 +; CHECK-NEXT: v_mul_lo_u32 v16, v7, v11 +; CHECK-NEXT: v_mul_hi_u32 v17, v5, v11 +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v11 +; CHECK-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; CHECK-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v12, vcc, v16, v12 +; CHECK-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v12, vcc, v12, v17 +; CHECK-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CHECK-NEXT: v_add_i32_e32 v14, vcc, v16, v15 +; CHECK-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v12 +; CHECK-NEXT: v_addc_u32_e64 v12, s[4:5], v7, v11, vcc +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v11 +; CHECK-NEXT: v_mul_lo_u32 v11, v9, v5 +; CHECK-NEXT: v_mul_lo_u32 v10, v10, v5 +; CHECK-NEXT: v_mul_hi_u32 v13, v9, v5 +; CHECK-NEXT: v_mul_lo_u32 v9, v9, v12 +; CHECK-NEXT: v_mul_lo_u32 v14, v12, v11 +; CHECK-NEXT: v_mul_hi_u32 v15, v5, v11 +; CHECK-NEXT: v_mul_hi_u32 v11, v12, v11 +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v10, v9 +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 +; CHECK-NEXT: v_mul_lo_u32 v10, v5, v9 +; CHECK-NEXT: v_mul_lo_u32 v13, v12, v9 +; CHECK-NEXT: v_mul_hi_u32 v16, v5, v9 +; CHECK-NEXT: v_mul_hi_u32 v9, v12, v9 +; CHECK-NEXT: v_add_i32_e64 v10, s[4:5], v14, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v11, s[4:5], v13, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v10, s[4:5], v10, v15 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 +; CHECK-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v10, s[4:5], v12, v10 +; CHECK-NEXT: v_add_i32_e64 v12, s[4:5], v13, v14 +; CHECK-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v9, v11 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v9, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; CHECK-NEXT: v_mul_lo_u32 v9, v1, v5 +; CHECK-NEXT: v_mul_hi_u32 v10, v6, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v1, v5 +; CHECK-NEXT: v_mul_lo_u32 v11, v6, v7 +; CHECK-NEXT: v_mul_lo_u32 v12, v1, v7 +; CHECK-NEXT: v_mul_hi_u32 v13, v6, v7 +; CHECK-NEXT: v_mul_hi_u32 v7, v1, v7 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v12, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v13 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CHECK-NEXT: v_mul_lo_u32 v10, v4, v5 +; CHECK-NEXT: v_mul_lo_u32 v11, v3, v5 +; CHECK-NEXT: v_mul_hi_u32 v12, v4, v5 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CHECK-NEXT: v_mul_lo_u32 v9, v4, v7 +; CHECK-NEXT: v_add_i32_e32 v13, vcc, 1, v5 +; CHECK-NEXT: v_addc_u32_e32 v14, vcc, 0, v7, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, 1, v13 +; CHECK-NEXT: v_addc_u32_e32 v15, vcc, 0, v14, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v6, v10 +; CHECK-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v9, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v9 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v10, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v6, v4 +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v6, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v6, v4, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v13, v11, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v3, v14, v15, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v8 +; CHECK-NEXT: v_xor_b32_e32 v3, v3, v8 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v1, v8 +; CHECK-NEXT: v_subb_u32_e32 v5, vcc, v3, v8, vcc +; CHECK-NEXT: BB0_2: ; %Flow +; CHECK-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] +; CHECK-NEXT: s_xor_b64 exec, exec, s[6:7] +; CHECK-NEXT: s_cbranch_execz BB0_4 +; CHECK-NEXT: ; %bb.3: +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_lo_u32 v3, v1, v2 +; CHECK-NEXT: v_mul_hi_u32 v4, v1, v2 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CHECK-NEXT: v_mul_hi_u32 v3, v3, v1 +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v1, v3 +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mul_lo_u32 v3, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v1 +; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v1 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v3 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v2 +; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v1, v4, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v4, v5, v0, vcc +; CHECK-NEXT: v_mov_b32_e32 v5, 0 +; CHECK-NEXT: BB0_4: +; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] +; CHECK-NEXT: v_mov_b32_e32 v0, v4 +; CHECK-NEXT: v_mov_b32_e32 v1, v5 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = sdiv i64 %num, %den + ret i64 %result +} + +; FIXME: This is a workaround for not handling uniform VGPR case. +declare i32 @llvm.amdgcn.readfirstlane(i32) + +define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) { +; CHECK-LABEL: s_sdiv_i64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_or_b64 s[4:5], s[0:1], s[2:3] +; CHECK-NEXT: s_mov_b32 s6, 0 +; CHECK-NEXT: s_mov_b32 s7, -1 +; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] +; CHECK-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], 0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, 1 +; CHECK-NEXT: s_xor_b64 vcc, s[4:5], s[6:7] +; CHECK-NEXT: s_mov_b32 s4, 1 +; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 +; CHECK-NEXT: s_cbranch_vccz BB1_2 +; CHECK-NEXT: ; %bb.1: +; CHECK-NEXT: s_ashr_i32 s4, s1, 31 +; CHECK-NEXT: s_ashr_i32 s10, s3, 31 +; CHECK-NEXT: s_add_u32 s6, s0, s4 +; CHECK-NEXT: s_cselect_b32 s5, 1, 0 +; CHECK-NEXT: s_and_b32 s5, s5, 1 +; CHECK-NEXT: s_cmp_lg_u32 s5, 0 +; CHECK-NEXT: s_addc_u32 s7, s1, s4 +; CHECK-NEXT: s_add_u32 s12, s2, s10 +; CHECK-NEXT: s_cselect_b32 s1, 1, 0 +; CHECK-NEXT: s_and_b32 s1, s1, 1 +; CHECK-NEXT: s_mov_b32 s5, s4 +; CHECK-NEXT: s_mov_b32 s11, s10 +; CHECK-NEXT: s_cmp_lg_u32 s1, 0 +; CHECK-NEXT: s_addc_u32 s13, s3, s10 +; CHECK-NEXT: s_xor_b64 s[8:9], s[6:7], s[4:5] +; CHECK-NEXT: s_xor_b64 s[6:7], s[12:13], s[10:11] +; CHECK-NEXT: v_mov_b32_e32 v0, s9 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s6 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s7 +; CHECK-NEXT: s_sub_u32 s1, 0, s6 +; CHECK-NEXT: s_cselect_b32 s3, 1, 0 +; CHECK-NEXT: v_mov_b32_e32 v3, s7 +; CHECK-NEXT: v_mac_f32_e32 v1, 0x4f800000, v2 +; CHECK-NEXT: s_and_b32 s3, s3, 1 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CHECK-NEXT: s_cmp_lg_u32 s3, 0 +; CHECK-NEXT: s_subb_u32 s3, 0, s7 +; CHECK-NEXT: s_xor_b64 s[10:11], s[4:5], s[10:11] +; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 +; CHECK-NEXT: v_trunc_f32_e32 v2, v2 +; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_lo_u32 v4, s1, v2 +; CHECK-NEXT: v_mul_lo_u32 v5, s1, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s3, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, s1, v1 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, v2, v5 +; CHECK-NEXT: v_mul_hi_u32 v8, v1, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v1, v4 +; CHECK-NEXT: v_mul_lo_u32 v9, v2, v4 +; CHECK-NEXT: v_mul_hi_u32 v10, v1, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v2, v4, vcc +; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v2, v4 +; CHECK-NEXT: v_mul_lo_u32 v4, s1, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s3, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, s1, v1 +; CHECK-NEXT: v_mul_lo_u32 v8, s1, v5 +; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 +; CHECK-NEXT: v_mul_hi_u32 v10, v1, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v1, v6 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v6 +; CHECK-NEXT: v_mul_hi_u32 v11, v1, v6 +; CHECK-NEXT: v_mul_hi_u32 v5, v5, v6 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v9, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v8, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v9 +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_mul_lo_u32 v4, s9, v1 +; CHECK-NEXT: v_mul_hi_u32 v5, s8, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, s9, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s8, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s9, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, s8, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, s9, v2 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v7, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s7, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, s6, v1 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v8 +; CHECK-NEXT: v_mul_lo_u32 v2, s6, v2 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, s8, v5 +; CHECK-NEXT: v_subb_u32_e64 v0, s[4:5], v0, v2, vcc +; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], s9, v2 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s7, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CHECK-NEXT: v_subb_u32_e32 v2, vcc, v2, v3, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s7, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v6, vcc +; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s6, v5 +; CHECK-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s7, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s7, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v3, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v8, v4, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc +; CHECK-NEXT: v_xor_b32_e32 v0, s10, v0 +; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s10, v0 +; CHECK-NEXT: s_mov_b32 s4, 0 +; CHECK-NEXT: BB1_2: ; %Flow +; CHECK-NEXT: s_and_b32 s1, s4, 1 +; CHECK-NEXT: s_cmp_lg_u32 s1, 0 +; CHECK-NEXT: s_cbranch_scc0 BB1_4 +; CHECK-NEXT: ; %bb.3: +; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CHECK-NEXT: v_mul_lo_u32 v1, v0, s2 +; CHECK-NEXT: v_mul_hi_u32 v2, v0, s2 +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v0, v1 +; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; CHECK-NEXT: v_mul_hi_u32 v0, v0, s0 +; CHECK-NEXT: v_mul_lo_u32 v1, v0, s2 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, 1, v0 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, s0, v1 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, s0, v1 +; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v4 +; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; CHECK-NEXT: BB1_4: +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: s_mov_b32 s1, s0 +; CHECK-NEXT: ; return to shader part epilog + %result = sdiv i64 %num, %den + %cast = bitcast i64 %result to <2 x i32> + %elt.0 = extractelement <2 x i32> %cast, i32 0 + %elt.1 = extractelement <2 x i32> %cast, i32 1 + %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0) + %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1) + %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0 + %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1 + %cast.back = bitcast <2 x i32> %ins.1 to i64 + ret i64 %cast.back +} + +define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) { +; GISEL-LABEL: v_sdiv_v2i64: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v1 +; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v8, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v8 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v8 +; GISEL-NEXT: v_xor_b32_e32 v4, v4, v9 +; GISEL-NEXT: v_xor_b32_e32 v5, v5, v9 +; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v4 +; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v5 +; GISEL-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v10, v10 +; GISEL-NEXT: v_mul_f32_e32 v10, 0x5f7ffffc, v10 +; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v10 +; GISEL-NEXT: v_trunc_f32_e32 v11, v11 +; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v11 +; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v10 +; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 +; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v4 +; GISEL-NEXT: v_subb_u32_e32 v13, vcc, 0, v5, vcc +; GISEL-NEXT: v_mul_lo_u32 v14, v12, v10 +; GISEL-NEXT: v_mul_lo_u32 v15, v13, v10 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v11 +; GISEL-NEXT: v_mul_hi_u32 v17, v12, v10 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; GISEL-NEXT: v_mul_lo_u32 v16, v11, v14 +; GISEL-NEXT: v_mul_lo_u32 v17, v10, v15 +; GISEL-NEXT: v_mul_hi_u32 v18, v10, v14 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16 +; GISEL-NEXT: v_mul_lo_u32 v17, v11, v15 +; GISEL-NEXT: v_mul_hi_u32 v14, v11, v14 +; GISEL-NEXT: v_mul_hi_u32 v18, v10, v15 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16 +; GISEL-NEXT: v_mul_hi_u32 v15, v11, v15 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], v11, v15, vcc +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 +; GISEL-NEXT: v_mul_lo_u32 v15, v12, v10 +; GISEL-NEXT: v_mul_lo_u32 v13, v13, v10 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v14 +; GISEL-NEXT: v_mul_hi_u32 v12, v12, v10 +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v16 +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v13, v12 +; GISEL-NEXT: v_mul_lo_u32 v13, v14, v15 +; GISEL-NEXT: v_mul_lo_u32 v16, v10, v12 +; GISEL-NEXT: v_mul_hi_u32 v17, v10, v15 +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v16, v13 +; GISEL-NEXT: v_mul_lo_u32 v16, v14, v12 +; GISEL-NEXT: v_mul_hi_u32 v15, v14, v15 +; GISEL-NEXT: v_mul_hi_u32 v17, v10, v12 +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v16, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v17 +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v15, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v16, v15 +; GISEL-NEXT: v_mul_hi_u32 v12, v14, v12 +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v15 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v12, vcc +; GISEL-NEXT: v_addc_u32_e64 v11, vcc, 0, v11, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v12, v1, v10 +; GISEL-NEXT: v_mul_lo_u32 v13, v0, v11 +; GISEL-NEXT: v_mul_hi_u32 v14, v0, v10 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; GISEL-NEXT: v_mul_lo_u32 v13, v1, v11 +; GISEL-NEXT: v_mul_hi_u32 v10, v1, v10 +; GISEL-NEXT: v_mul_hi_u32 v14, v0, v11 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; GISEL-NEXT: v_mul_hi_u32 v11, v1, v11 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_mul_lo_u32 v12, v4, v10 +; GISEL-NEXT: v_mul_lo_u32 v13, v5, v10 +; GISEL-NEXT: v_mul_lo_u32 v14, v4, v11 +; GISEL-NEXT: v_mul_hi_u32 v15, v4, v10 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v12 +; GISEL-NEXT: v_subb_u32_e64 v12, s[4:5], v1, v13, vcc +; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v13 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v12, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v12, v13, v14, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v10 +; GISEL-NEXT: v_addc_u32_e32 v14, vcc, 0, v11, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v15, v0, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v13 +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, 0, v14, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v13, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v14, v4, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc +; GISEL-NEXT: v_xor_b32_e32 v4, v8, v9 +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v7 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v4, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v5 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v5, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 +; GISEL-NEXT: v_xor_b32_e32 v6, v6, v5 +; GISEL-NEXT: v_xor_b32_e32 v7, v7, v5 +; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v7 +; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v9 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 +; GISEL-NEXT: v_mul_f32_e32 v8, 0x5f7ffffc, v8 +; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v8 +; GISEL-NEXT: v_trunc_f32_e32 v9, v9 +; GISEL-NEXT: v_mac_f32_e32 v8, 0xcf800000, v9 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 +; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 +; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 +; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v7, vcc +; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 +; GISEL-NEXT: v_mul_lo_u32 v13, v11, v8 +; GISEL-NEXT: v_mul_lo_u32 v14, v10, v9 +; GISEL-NEXT: v_mul_hi_u32 v15, v10, v8 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; GISEL-NEXT: v_mul_lo_u32 v14, v9, v12 +; GISEL-NEXT: v_mul_lo_u32 v15, v8, v13 +; GISEL-NEXT: v_mul_hi_u32 v16, v8, v12 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; GISEL-NEXT: v_mul_lo_u32 v15, v9, v13 +; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 +; GISEL-NEXT: v_mul_hi_u32 v16, v8, v13 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], v9, v13, vcc +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 +; GISEL-NEXT: v_mul_lo_u32 v13, v10, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v11, v8 +; GISEL-NEXT: v_mul_lo_u32 v14, v10, v12 +; GISEL-NEXT: v_mul_hi_u32 v10, v10, v8 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 +; GISEL-NEXT: v_mul_lo_u32 v11, v12, v13 +; GISEL-NEXT: v_mul_lo_u32 v14, v8, v10 +; GISEL-NEXT: v_mul_hi_u32 v15, v8, v13 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v14, v11 +; GISEL-NEXT: v_mul_lo_u32 v14, v12, v10 +; GISEL-NEXT: v_mul_hi_u32 v13, v12, v13 +; GISEL-NEXT: v_mul_hi_u32 v15, v8, v10 +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v15 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v13, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; GISEL-NEXT: v_mul_hi_u32 v10, v12, v10 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v10, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, 0, v9, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v10, v3, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v2, v9 +; GISEL-NEXT: v_mul_hi_u32 v12, v2, v8 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_mul_lo_u32 v11, v3, v9 +; GISEL-NEXT: v_mul_hi_u32 v8, v3, v8 +; GISEL-NEXT: v_mul_hi_u32 v12, v2, v9 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_mul_hi_u32 v9, v3, v9 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_mul_lo_u32 v10, v6, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v7, v8 +; GISEL-NEXT: v_mul_lo_u32 v12, v6, v9 +; GISEL-NEXT: v_mul_hi_u32 v13, v6, v8 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v3, v11, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v11 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v10, v11, v12, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v6 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v8 +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v9, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v13, v2, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v11 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v12, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v12, v6, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc +; GISEL-NEXT: v_xor_b32_e32 v4, v4, v5 +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v4, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_sdiv_v2i64: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_or_b32_e32 v9, v1, v5 +; CGP-NEXT: v_mov_b32_e32 v8, 0 +; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CGP-NEXT: ; implicit-def: $vgpr8_vgpr9 +; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; CGP-NEXT: s_cbranch_execz BB2_2 +; CGP-NEXT: ; %bb.1: +; CGP-NEXT: v_ashrrev_i32_e32 v8, 31, v1 +; CGP-NEXT: v_ashrrev_i32_e32 v9, 31, v5 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v0, v8 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v8, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v4, v9 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc +; CGP-NEXT: v_xor_b32_e32 v12, v8, v9 +; CGP-NEXT: v_xor_b32_e32 v10, v10, v8 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v8 +; CGP-NEXT: v_xor_b32_e32 v8, v11, v9 +; CGP-NEXT: v_xor_b32_e32 v5, v5, v9 +; CGP-NEXT: v_cvt_f32_u32_e32 v9, v8 +; CGP-NEXT: v_cvt_f32_u32_e32 v11, v5 +; CGP-NEXT: v_sub_i32_e32 v13, vcc, 0, v8 +; CGP-NEXT: v_subb_u32_e32 v14, vcc, 0, v5, vcc +; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v11 +; CGP-NEXT: v_rcp_iflag_f32_e32 v9, v9 +; CGP-NEXT: v_mul_f32_e32 v9, 0x5f7ffffc, v9 +; CGP-NEXT: v_mul_f32_e32 v11, 0x2f800000, v9 +; CGP-NEXT: v_trunc_f32_e32 v11, v11 +; CGP-NEXT: v_mac_f32_e32 v9, 0xcf800000, v11 +; CGP-NEXT: v_cvt_u32_f32_e32 v11, v11 +; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9 +; CGP-NEXT: v_mul_lo_u32 v15, v13, v11 +; CGP-NEXT: v_mul_lo_u32 v16, v13, v9 +; CGP-NEXT: v_mul_lo_u32 v17, v14, v9 +; CGP-NEXT: v_mul_hi_u32 v18, v13, v9 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v15 +; CGP-NEXT: v_mul_lo_u32 v17, v11, v16 +; CGP-NEXT: v_mul_hi_u32 v19, v9, v16 +; CGP-NEXT: v_mul_hi_u32 v16, v11, v16 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v18 +; CGP-NEXT: v_mul_lo_u32 v18, v9, v15 +; CGP-NEXT: v_mul_lo_u32 v20, v11, v15 +; CGP-NEXT: v_mul_hi_u32 v21, v9, v15 +; CGP-NEXT: v_mul_hi_u32 v15, v11, v15 +; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v18 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v16, vcc, v20, v16 +; CGP-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v19 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v21 +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v17, vcc, v18, v17 +; CGP-NEXT: v_add_i32_e32 v18, vcc, v20, v19 +; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v17, vcc, v18, v17 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v16 +; CGP-NEXT: v_addc_u32_e64 v16, s[4:5], v11, v15, vcc +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 +; CGP-NEXT: v_mul_lo_u32 v15, v13, v9 +; CGP-NEXT: v_mul_lo_u32 v14, v14, v9 +; CGP-NEXT: v_mul_hi_u32 v17, v13, v9 +; CGP-NEXT: v_mul_lo_u32 v13, v13, v16 +; CGP-NEXT: v_mul_lo_u32 v18, v16, v15 +; CGP-NEXT: v_mul_hi_u32 v19, v9, v15 +; CGP-NEXT: v_mul_hi_u32 v15, v16, v15 +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v17 +; CGP-NEXT: v_mul_lo_u32 v14, v9, v13 +; CGP-NEXT: v_mul_lo_u32 v17, v16, v13 +; CGP-NEXT: v_mul_hi_u32 v20, v9, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v16, v13 +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v18, v14 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v15, s[4:5], v17, v15 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v19 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v15, s[4:5], v15, v20 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v16, v14 +; CGP-NEXT: v_add_i32_e64 v16, s[4:5], v17, v18 +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v15, v14 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v15, s[4:5], v16, v15 +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v15 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, v11, v13, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v14 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v11, vcc +; CGP-NEXT: v_mul_lo_u32 v13, v1, v9 +; CGP-NEXT: v_mul_hi_u32 v14, v10, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v1, v9 +; CGP-NEXT: v_mul_lo_u32 v15, v10, v11 +; CGP-NEXT: v_mul_lo_u32 v16, v1, v11 +; CGP-NEXT: v_mul_hi_u32 v17, v10, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v1, v11 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v16, v9 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v17 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v13 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CGP-NEXT: v_mul_lo_u32 v14, v8, v9 +; CGP-NEXT: v_mul_lo_u32 v15, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v16, v8, v9 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CGP-NEXT: v_mul_lo_u32 v13, v8, v11 +; CGP-NEXT: v_add_i32_e32 v17, vcc, 1, v9 +; CGP-NEXT: v_addc_u32_e32 v18, vcc, 0, v11, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; CGP-NEXT: v_add_i32_e32 v15, vcc, 1, v17 +; CGP-NEXT: v_addc_u32_e32 v19, vcc, 0, v18, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v10, v14 +; CGP-NEXT: v_subb_u32_e64 v14, s[4:5], v1, v13, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v13 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v8 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v14, v5 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v14, v5 +; CGP-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v10, v8 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v10, v8 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v1, v5 +; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v8, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v1, v17, v15, vcc +; CGP-NEXT: v_cndmask_b32_e32 v5, v18, v19, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 +; CGP-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; CGP-NEXT: v_cndmask_b32_e32 v5, v11, v5, vcc +; CGP-NEXT: v_xor_b32_e32 v1, v1, v12 +; CGP-NEXT: v_xor_b32_e32 v5, v5, v12 +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v1, v12 +; CGP-NEXT: v_subb_u32_e32 v9, vcc, v5, v12, vcc +; CGP-NEXT: BB2_2: ; %Flow2 +; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] +; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] +; CGP-NEXT: s_cbranch_execz BB2_4 +; CGP-NEXT: ; %bb.3: +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CGP-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CGP-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CGP-NEXT: v_mul_lo_u32 v5, v1, v4 +; CGP-NEXT: v_mul_hi_u32 v8, v1, v4 +; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v5 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 +; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc +; CGP-NEXT: v_mul_hi_u32 v5, v5, v1 +; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v1, v5 +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v8, vcc +; CGP-NEXT: v_mul_hi_u32 v1, v1, v0 +; CGP-NEXT: v_mul_lo_u32 v5, v1, v4 +; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v1 +; CGP-NEXT: v_subrev_i32_e32 v9, vcc, 1, v1 +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v0, v5 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v4 +; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v1, v8, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v8, v9, v0, vcc +; CGP-NEXT: v_mov_b32_e32 v9, 0 +; CGP-NEXT: BB2_4: +; CGP-NEXT: s_or_b64 exec, exec, s[6:7] +; CGP-NEXT: v_or_b32_e32 v1, v3, v7 +; CGP-NEXT: v_mov_b32_e32 v0, 0 +; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 +; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; CGP-NEXT: s_cbranch_execz BB2_6 +; CGP-NEXT: ; %bb.5: +; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v3 +; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v7 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v2, v0 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v0, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v1 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v1, vcc +; CGP-NEXT: v_xor_b32_e32 v10, v0, v1 +; CGP-NEXT: v_xor_b32_e32 v4, v4, v0 +; CGP-NEXT: v_xor_b32_e32 v0, v3, v0 +; CGP-NEXT: v_xor_b32_e32 v3, v5, v1 +; CGP-NEXT: v_xor_b32_e32 v1, v7, v1 +; CGP-NEXT: v_cvt_f32_u32_e32 v5, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v7, v1 +; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v3 +; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v1, vcc +; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v7 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 +; CGP-NEXT: v_trunc_f32_e32 v7, v7 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v13, v11, v7 +; CGP-NEXT: v_mul_lo_u32 v14, v11, v5 +; CGP-NEXT: v_mul_lo_u32 v15, v12, v5 +; CGP-NEXT: v_mul_hi_u32 v16, v11, v5 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; CGP-NEXT: v_mul_lo_u32 v15, v7, v14 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v14 +; CGP-NEXT: v_mul_hi_u32 v14, v7, v14 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 +; CGP-NEXT: v_mul_lo_u32 v16, v5, v13 +; CGP-NEXT: v_mul_lo_u32 v18, v7, v13 +; CGP-NEXT: v_mul_hi_u32 v19, v5, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v7, v13 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v18, v14 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; CGP-NEXT: v_add_i32_e32 v16, vcc, v18, v17 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v14 +; CGP-NEXT: v_addc_u32_e64 v14, s[4:5], v7, v13, vcc +; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v13 +; CGP-NEXT: v_mul_lo_u32 v13, v11, v5 +; CGP-NEXT: v_mul_lo_u32 v12, v12, v5 +; CGP-NEXT: v_mul_hi_u32 v15, v11, v5 +; CGP-NEXT: v_mul_lo_u32 v11, v11, v14 +; CGP-NEXT: v_mul_lo_u32 v16, v14, v13 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v14, v13 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 +; CGP-NEXT: v_mul_lo_u32 v12, v5, v11 +; CGP-NEXT: v_mul_lo_u32 v15, v14, v11 +; CGP-NEXT: v_mul_hi_u32 v18, v5, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v14, v11 +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v16, v12 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v15, v13 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v18 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12 +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v15, v16 +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v13, v12 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v13 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v0, v5 +; CGP-NEXT: v_mul_hi_u32 v12, v4, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v0, v5 +; CGP-NEXT: v_mul_lo_u32 v13, v4, v7 +; CGP-NEXT: v_mul_lo_u32 v14, v0, v7 +; CGP-NEXT: v_mul_hi_u32 v15, v4, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v0, v7 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v15 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v3, v5 +; CGP-NEXT: v_mul_lo_u32 v13, v1, v5 +; CGP-NEXT: v_mul_hi_u32 v14, v3, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v11, v3, v7 +; CGP-NEXT: v_add_i32_e32 v15, vcc, 1, v5 +; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v7, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CGP-NEXT: v_add_i32_e32 v13, vcc, 1, v15 +; CGP-NEXT: v_addc_u32_e32 v17, vcc, 0, v16, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v14 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v12 +; CGP-NEXT: v_subb_u32_e64 v12, s[4:5], v0, v11, vcc +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v11 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v3 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v1 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v12, v1 +; CGP-NEXT: v_cndmask_b32_e32 v11, v14, v11, vcc +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v3 +; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v3 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v3, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v15, v13, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v16, v17, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; CGP-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; CGP-NEXT: v_xor_b32_e32 v0, v0, v10 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v10 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v10, vcc +; CGP-NEXT: BB2_6: ; %Flow +; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] +; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] +; CGP-NEXT: s_cbranch_execz BB2_8 +; CGP-NEXT: ; %bb.7: +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CGP-NEXT: v_mul_lo_u32 v1, v0, v6 +; CGP-NEXT: v_mul_hi_u32 v3, v0, v6 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v1 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; CGP-NEXT: v_mul_hi_u32 v1, v1, v0 +; CGP-NEXT: v_add_i32_e64 v3, s[4:5], v0, v1 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CGP-NEXT: v_mul_hi_u32 v0, v0, v2 +; CGP-NEXT: v_mul_lo_u32 v1, v0, v6 +; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0 +; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 1, v0 +; CGP-NEXT: v_sub_i32_e32 v5, vcc, v2, v1 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v1 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v6 +; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; CGP-NEXT: v_mov_b32_e32 v1, 0 +; CGP-NEXT: BB2_8: +; CGP-NEXT: s_or_b64 exec, exec, s[6:7] +; CGP-NEXT: v_mov_b32_e32 v3, v1 +; CGP-NEXT: v_mov_b32_e32 v2, v0 +; CGP-NEXT: v_mov_b32_e32 v0, v8 +; CGP-NEXT: v_mov_b32_e32 v1, v9 +; CGP-NEXT: s_setpc_b64 s[30:31] + %result = sdiv <2 x i64> %num, %den + ret <2 x i64> %result +} + +define i64 @v_sdiv_i64_pow2k_denom(i64 %num) { +; CHECK-LABEL: v_sdiv_i64_pow2k_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_movk_i32 s6, 0x1000 +; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v1 +; CHECK-NEXT: v_cvt_f32_u32_e32 v3, 0 +; CHECK-NEXT: s_mov_b32 s7, 0xfffff000 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc +; CHECK-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v2 +; CHECK-NEXT: v_mac_f32_e32 v4, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v4 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 +; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 +; CHECK-NEXT: v_trunc_f32_e32 v4, v4 +; CHECK-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_lo_u32 v5, s7, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s7, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, -1, v3 +; CHECK-NEXT: v_mul_hi_u32 v8, s7, v3 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6 +; CHECK-NEXT: v_mul_hi_u32 v9, v3, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CHECK-NEXT: v_mul_lo_u32 v8, v3, v5 +; CHECK-NEXT: v_mul_lo_u32 v10, v4, v5 +; CHECK-NEXT: v_mul_hi_u32 v11, v3, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; CHECK-NEXT: v_addc_u32_e64 v6, s[4:5], v4, v5, vcc +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5 +; CHECK-NEXT: v_mul_lo_u32 v5, s7, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, -1, v3 +; CHECK-NEXT: v_mul_hi_u32 v8, s7, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, s7, v6 +; CHECK-NEXT: v_mul_lo_u32 v10, v6, v5 +; CHECK-NEXT: v_mul_hi_u32 v11, v3, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v6, v5 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v9 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 +; CHECK-NEXT: v_mul_lo_u32 v8, v3, v7 +; CHECK-NEXT: v_mul_lo_u32 v9, v6, v7 +; CHECK-NEXT: v_mul_hi_u32 v12, v3, v7 +; CHECK-NEXT: v_mul_hi_u32 v6, v6, v7 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v10, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v9, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v12 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v7 +; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v10 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v7 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v7 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, v1, v3 +; CHECK-NEXT: v_mul_hi_u32 v6, v0, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 +; CHECK-NEXT: v_mul_hi_u32 v9, v0, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CHECK-NEXT: v_mul_lo_u32 v6, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, 0, v3 +; CHECK-NEXT: v_mul_hi_u32 v8, s6, v3 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v4 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v3 +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, 0, v4, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, 1, v9 +; CHECK-NEXT: v_addc_u32_e32 v11, vcc, 0, v10, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; CHECK-NEXT: v_subb_u32_e64 v6, s[4:5], v1, v5, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 +; CHECK-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc +; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v9, v7, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v10, v11, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v2 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = sdiv i64 %num, 4096 + ret i64 %result +} + +define <2 x i64> @v_sdiv_v2i64_pow2k_denom(<2 x i64> %num) { +; GISEL-LABEL: v_sdiv_v2i64_pow2k_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_movk_i32 s8, 0x1000 +; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; GISEL-NEXT: s_mov_b32 s4, 0 +; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v3 +; GISEL-NEXT: s_mov_b32 s5, s4 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: s_add_u32 s6, s8, 0 +; GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc +; GISEL-NEXT: s_and_b32 s7, s7, 1 +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v5 +; GISEL-NEXT: s_cmp_lg_u32 s7, 0 +; GISEL-NEXT: s_addc_u32 s7, 0, 0 +; GISEL-NEXT: s_xor_b64 s[10:11], s[6:7], s[4:5] +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, s10 +; GISEL-NEXT: v_cvt_f32_u32_e32 v7, s11 +; GISEL-NEXT: s_sub_u32 s12, 0, s10 +; GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GISEL-NEXT: v_mov_b32_e32 v8, s11 +; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v7 +; GISEL-NEXT: s_and_b32 s6, s6, 1 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; GISEL-NEXT: s_cmp_lg_u32 s6, 0 +; GISEL-NEXT: s_subb_u32 s13, 0, s11 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v6 +; GISEL-NEXT: s_add_u32 s6, s8, 0 +; GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; GISEL-NEXT: v_trunc_f32_e32 v7, v7 +; GISEL-NEXT: s_and_b32 s7, s7, 1 +; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_lo_u32 v9, s12, v7 +; GISEL-NEXT: s_cmp_lg_u32 s7, 0 +; GISEL-NEXT: s_addc_u32 s7, 0, 0 +; GISEL-NEXT: v_mul_lo_u32 v10, s12, v6 +; GISEL-NEXT: v_mul_lo_u32 v11, s13, v6 +; GISEL-NEXT: v_mul_hi_u32 v12, s12, v6 +; GISEL-NEXT: s_xor_b64 s[8:9], s[6:7], s[4:5] +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; GISEL-NEXT: v_mul_lo_u32 v11, v7, v10 +; GISEL-NEXT: v_mul_hi_u32 v13, v6, v10 +; GISEL-NEXT: v_mul_hi_u32 v10, v7, v10 +; GISEL-NEXT: v_cvt_f32_u32_e32 v14, s8 +; GISEL-NEXT: v_cvt_f32_u32_e32 v15, s9 +; GISEL-NEXT: s_sub_u32 s14, 0, s8 +; GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; GISEL-NEXT: v_mac_f32_e32 v14, 0x4f800000, v15 +; GISEL-NEXT: s_and_b32 s4, s4, 1 +; GISEL-NEXT: v_mul_lo_u32 v12, v6, v9 +; GISEL-NEXT: v_mul_lo_u32 v15, v7, v9 +; GISEL-NEXT: v_mul_hi_u32 v16, v6, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v14, v14 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_mul_f32_e32 v14, 0x5f7ffffc, v14 +; GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GISEL-NEXT: s_subb_u32 s15, 0, s9 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_mul_f32_e32 v16, 0x2f800000, v14 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v13 +; GISEL-NEXT: v_trunc_f32_e32 v13, v16 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_mac_f32_e32 v14, 0xcf800000, v13 +; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v14 +; GISEL-NEXT: v_mul_lo_u32 v14, s14, v13 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v11, s14, v12 +; GISEL-NEXT: v_mul_lo_u32 v15, s15, v12 +; GISEL-NEXT: v_mul_hi_u32 v16, s14, v12 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v7, v9, vcc +; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v7, v9 +; GISEL-NEXT: v_mul_lo_u32 v9, s12, v6 +; GISEL-NEXT: v_mul_lo_u32 v17, s13, v6 +; GISEL-NEXT: v_mul_hi_u32 v18, s12, v6 +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v15, v14 +; GISEL-NEXT: v_mul_lo_u32 v15, s12, v10 +; GISEL-NEXT: v_mul_lo_u32 v19, v10, v9 +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v17, v15 +; GISEL-NEXT: v_mul_hi_u32 v17, v6, v9 +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v6, v15 +; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v19, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v18, v17 +; GISEL-NEXT: v_mul_lo_u32 v17, v13, v11 +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v11 +; GISEL-NEXT: v_mul_hi_u32 v11, v13, v11 +; GISEL-NEXT: v_mul_hi_u32 v9, v10, v9 +; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v14, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v14 +; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v17, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v18 +; GISEL-NEXT: v_mul_lo_u32 v16, v13, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], v17, v18 +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v14 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v16, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v19, v18 +; GISEL-NEXT: v_mul_lo_u32 v19, v10, v15 +; GISEL-NEXT: v_mul_hi_u32 v10, v10, v15 +; GISEL-NEXT: v_mul_hi_u32 v15, v6, v15 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v19, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v19, v15 +; GISEL-NEXT: v_mov_b32_e32 v19, s9 +; GISEL-NEXT: v_mul_hi_u32 v14, v13, v14 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v17 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v16 +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v17 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; GISEL-NEXT: v_addc_u32_e64 v12, s[6:7], v13, v14, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v13, v14 +; GISEL-NEXT: v_mul_lo_u32 v14, s14, v11 +; GISEL-NEXT: v_mul_lo_u32 v16, s15, v11 +; GISEL-NEXT: v_mul_hi_u32 v17, s14, v11 +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v15 +; GISEL-NEXT: v_mul_lo_u32 v15, s14, v12 +; GISEL-NEXT: v_mul_lo_u32 v18, v12, v14 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v10, vcc +; GISEL-NEXT: v_mul_hi_u32 v10, v11, v14 +; GISEL-NEXT: v_mul_hi_u32 v14, v12, v14 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v1, v6 +; GISEL-NEXT: v_mul_hi_u32 v16, v0, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; GISEL-NEXT: v_mul_lo_u32 v17, v11, v15 +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v17, v10 +; GISEL-NEXT: v_mul_lo_u32 v10, v0, v7 +; GISEL-NEXT: v_mul_lo_u32 v17, v1, v7 +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16 +; GISEL-NEXT: v_mul_hi_u32 v9, v0, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v15 +; GISEL-NEXT: v_mul_hi_u32 v12, v12, v15 +; GISEL-NEXT: v_mul_hi_u32 v15, v11, v15 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v17, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v16, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v17, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v17 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_mul_lo_u32 v10, s10, v6 +; GISEL-NEXT: v_mul_lo_u32 v17, s11, v6 +; GISEL-NEXT: v_mul_hi_u32 v18, s10, v6 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v15 +; GISEL-NEXT: v_mul_lo_u32 v12, s10, v7 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, 1, v6 +; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v7, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v13, v9, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v17, v12 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v15 +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v16, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; GISEL-NEXT: v_mul_lo_u32 v14, v3, v11 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v18 +; GISEL-NEXT: v_mul_hi_u32 v18, v2, v11 +; GISEL-NEXT: v_mul_hi_u32 v11, v3, v11 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v12, vcc +; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v12 +; GISEL-NEXT: v_mul_lo_u32 v12, v2, v9 +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v18 +; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s10, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s11, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], s11, v10 +; GISEL-NEXT: v_mul_lo_u32 v10, v3, v9 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc +; GISEL-NEXT: v_mul_hi_u32 v8, v2, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v3, v9 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v12, v18, v12, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s10, v0 +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s10, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v18 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s11, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s11, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v11, v0, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v14 +; GISEL-NEXT: v_mul_lo_u32 v10, s8, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, s9, v8 +; GISEL-NEXT: v_mul_hi_u32 v14, s8, v8 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v15, v13, vcc +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v16, v17, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; GISEL-NEXT: v_mul_lo_u32 v6, s8, v9 +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], 1, v8 +; GISEL-NEXT: v_addc_u32_e64 v13, s[4:5], 0, v9, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v6 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v12 +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v13, vcc +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v3, v6, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v6 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s9, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v19, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s9, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v10, v6, vcc +; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s8, v2 +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s9, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s9, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v12, v7, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v11, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v5 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_sdiv_v2i64_pow2k_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_movk_i32 s10, 0x1000 +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0 +; CGP-NEXT: s_mov_b32 s8, 0xfffff000 +; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_cvt_f32_u32_e32 v7, s10 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_mov_b32_e32 v8, v7 +; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 +; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v5 +; CGP-NEXT: v_mac_f32_e32 v8, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v7 +; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v8 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 +; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v5 +; CGP-NEXT: v_mul_f32_e32 v9, 0x2f800000, v7 +; CGP-NEXT: v_trunc_f32_e32 v8, v8 +; CGP-NEXT: v_trunc_f32_e32 v9, v9 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 +; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v9 +; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v10, s8, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s8, v9 +; CGP-NEXT: v_mul_lo_u32 v12, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v13, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v14, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v15, s8, v7 +; CGP-NEXT: v_mul_lo_u32 v16, -1, v7 +; CGP-NEXT: v_mul_hi_u32 v17, s8, v7 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; CGP-NEXT: v_mul_lo_u32 v13, v8, v12 +; CGP-NEXT: v_mul_hi_u32 v18, v5, v12 +; CGP-NEXT: v_mul_hi_u32 v12, v8, v12 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; CGP-NEXT: v_mul_lo_u32 v16, v9, v15 +; CGP-NEXT: v_mul_hi_u32 v19, v7, v15 +; CGP-NEXT: v_mul_hi_u32 v15, v9, v15 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v17 +; CGP-NEXT: v_mul_lo_u32 v14, v5, v10 +; CGP-NEXT: v_mul_lo_u32 v17, v7, v11 +; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v19 +; CGP-NEXT: v_mul_lo_u32 v16, v8, v10 +; CGP-NEXT: v_mul_hi_u32 v19, v5, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v8, v10 +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v18 +; CGP-NEXT: v_mul_lo_u32 v13, v9, v11 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v18 +; CGP-NEXT: v_mul_hi_u32 v18, v7, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v9, v11 +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v16, v12 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v19 +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v16, s[4:5], v16, v19 +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v19 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v18 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], v8, v10, vcc +; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 +; CGP-NEXT: v_mul_lo_u32 v10, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v15, s8, v5 +; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v13 +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], v9, v11, s[4:5] +; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v9, v11 +; CGP-NEXT: v_mul_lo_u32 v11, s8, v7 +; CGP-NEXT: v_mul_lo_u32 v16, -1, v7 +; CGP-NEXT: v_mul_hi_u32 v17, s8, v7 +; CGP-NEXT: v_mul_lo_u32 v18, s8, v13 +; CGP-NEXT: v_mul_lo_u32 v19, v13, v11 +; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v18 +; CGP-NEXT: v_mul_hi_u32 v18, v7, v11 +; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v17 +; CGP-NEXT: v_mul_lo_u32 v17, v7, v16 +; CGP-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17 +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v17, s[6:7], v17, v18 +; CGP-NEXT: v_mul_lo_u32 v17, s8, v12 +; CGP-NEXT: v_mul_lo_u32 v18, v12, v10 +; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v14, v17 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v12, v10 +; CGP-NEXT: v_mul_hi_u32 v11, v13, v11 +; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v14, v15 +; CGP-NEXT: v_mul_lo_u32 v15, v5, v14 +; CGP-NEXT: v_add_i32_e64 v15, s[8:9], v18, v15 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v15, s[8:9], v15, v17 +; CGP-NEXT: v_mul_lo_u32 v15, v12, v14 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v14 +; CGP-NEXT: v_mul_hi_u32 v12, v12, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v18, v14 +; CGP-NEXT: v_mul_lo_u32 v18, v13, v16 +; CGP-NEXT: v_mul_hi_u32 v13, v13, v16 +; CGP-NEXT: v_mul_hi_u32 v16, v7, v16 +; CGP-NEXT: v_add_i32_e64 v10, s[8:9], v15, v10 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v11, s[8:9], v18, v11 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v10, s[8:9], v10, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v15, s[8:9], v15, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17 +; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v18, v16 +; CGP-NEXT: v_add_i32_e64 v10, s[6:7], v10, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v14, s[6:7], v15, v14 +; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v16, v17 +; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v12, v14 +; CGP-NEXT: v_add_i32_e64 v13, s[6:7], v13, v15 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc +; CGP-NEXT: v_addc_u32_e64 v9, vcc, v9, v13, s[4:5] +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc +; CGP-NEXT: v_mul_lo_u32 v10, v3, v5 +; CGP-NEXT: v_mul_hi_u32 v12, v2, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v1, v7 +; CGP-NEXT: v_mul_hi_u32 v13, v0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_mul_lo_u32 v14, v2, v8 +; CGP-NEXT: v_mul_lo_u32 v15, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v16, v2, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 +; CGP-NEXT: v_mul_lo_u32 v17, v0, v9 +; CGP-NEXT: v_mul_lo_u32 v18, v1, v9 +; CGP-NEXT: v_mul_hi_u32 v19, v0, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v1, v9 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v15, v5 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v18, v7 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v16 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v19 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v14, v10 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v17, v11 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v18, v13 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; CGP-NEXT: v_mul_lo_u32 v12, s10, v5 +; CGP-NEXT: v_mul_lo_u32 v14, 0, v5 +; CGP-NEXT: v_mul_hi_u32 v15, s10, v5 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CGP-NEXT: v_mul_lo_u32 v13, s10, v7 +; CGP-NEXT: v_mul_lo_u32 v16, 0, v7 +; CGP-NEXT: v_mul_hi_u32 v17, s10, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CGP-NEXT: v_mul_lo_u32 v10, s10, v8 +; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v5 +; CGP-NEXT: v_addc_u32_e32 v18, vcc, 0, v8, vcc +; CGP-NEXT: v_mul_lo_u32 v19, s10, v9 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v14, v10 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v19 +; CGP-NEXT: v_add_i32_e32 v16, vcc, 1, v7 +; CGP-NEXT: v_addc_u32_e32 v19, vcc, 0, v9, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v15 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; CGP-NEXT: v_add_i32_e32 v15, vcc, 1, v11 +; CGP-NEXT: v_addc_u32_e32 v17, vcc, 0, v18, vcc +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v12 +; CGP-NEXT: v_subb_u32_e64 v12, s[4:5], v3, v10, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v10 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s10, v2 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v13 +; CGP-NEXT: v_subb_u32_e64 v13, s[6:7], v1, v14, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v14 +; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], 0, v12 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[6:7] +; CGP-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v12 +; CGP-NEXT: v_cmp_le_u32_e64 s[8:9], s10, v0 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[8:9] +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cndmask_b32_e64 v10, v14, v10, s[6:7] +; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v13 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc +; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s10, v2 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s10, v2 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13 +; CGP-NEXT: v_cndmask_b32_e32 v12, v14, v12, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_add_i32_e64 v3, s[6:7], 1, v16 +; CGP-NEXT: v_addc_u32_e64 v14, s[6:7], 0, v19, s[6:7] +; CGP-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] +; CGP-NEXT: v_subrev_i32_e64 v0, s[4:5], s10, v0 +; CGP-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s10, v0 +; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v2, v13, v2, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v0, v13, v0, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; CGP-NEXT: v_cndmask_b32_e32 v1, v11, v15, vcc +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 +; CGP-NEXT: v_cndmask_b32_e64 v0, v16, v3, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v2, v18, v17, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v3, v19, v14, s[4:5] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v12 +; CGP-NEXT: v_cndmask_b32_e64 v0, v7, v0, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; CGP-NEXT: v_xor_b32_e32 v5, v1, v6 +; CGP-NEXT: v_cndmask_b32_e64 v1, v9, v3, s[4:5] +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_xor_b32_e32 v3, v2, v6 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v5, v6 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: s_setpc_b64 s[30:31] + %result = sdiv <2 x i64> %num, + ret <2 x i64> %result +} + +define i64 @v_sdiv_i64_oddk_denom(i64 %num) { +; CHECK-LABEL: v_sdiv_i64_oddk_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb +; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v1 +; CHECK-NEXT: v_cvt_f32_u32_e32 v3, 0 +; CHECK-NEXT: s_mov_b32 s7, 0xffed2705 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc +; CHECK-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v2 +; CHECK-NEXT: v_mac_f32_e32 v4, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v4 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 +; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 +; CHECK-NEXT: v_trunc_f32_e32 v4, v4 +; CHECK-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_lo_u32 v5, s7, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s7, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, -1, v3 +; CHECK-NEXT: v_mul_hi_u32 v8, s7, v3 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6 +; CHECK-NEXT: v_mul_hi_u32 v9, v3, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CHECK-NEXT: v_mul_lo_u32 v8, v3, v5 +; CHECK-NEXT: v_mul_lo_u32 v10, v4, v5 +; CHECK-NEXT: v_mul_hi_u32 v11, v3, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; CHECK-NEXT: v_addc_u32_e64 v6, s[4:5], v4, v5, vcc +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5 +; CHECK-NEXT: v_mul_lo_u32 v5, s7, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, -1, v3 +; CHECK-NEXT: v_mul_hi_u32 v8, s7, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, s7, v6 +; CHECK-NEXT: v_mul_lo_u32 v10, v6, v5 +; CHECK-NEXT: v_mul_hi_u32 v11, v3, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v6, v5 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v9 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 +; CHECK-NEXT: v_mul_lo_u32 v8, v3, v7 +; CHECK-NEXT: v_mul_lo_u32 v9, v6, v7 +; CHECK-NEXT: v_mul_hi_u32 v12, v3, v7 +; CHECK-NEXT: v_mul_hi_u32 v6, v6, v7 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v10, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v9, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v12 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v7 +; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v10 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v7 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v7 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, v1, v3 +; CHECK-NEXT: v_mul_hi_u32 v6, v0, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 +; CHECK-NEXT: v_mul_hi_u32 v9, v0, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CHECK-NEXT: v_mul_lo_u32 v6, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, 0, v3 +; CHECK-NEXT: v_mul_hi_u32 v8, s6, v3 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_mul_lo_u32 v5, s6, v4 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v3 +; CHECK-NEXT: v_addc_u32_e32 v10, vcc, 0, v4, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, 1, v9 +; CHECK-NEXT: v_addc_u32_e32 v11, vcc, 0, v10, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; CHECK-NEXT: v_subb_u32_e64 v6, s[4:5], v1, v5, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 +; CHECK-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc +; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v9, v7, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v10, v11, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v2 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = sdiv i64 %num, 1235195 + ret i64 %result +} + +define <2 x i64> @v_sdiv_v2i64_oddk_denom(<2 x i64> %num) { +; GISEL-LABEL: v_sdiv_v2i64_oddk_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s8, 0x12d8fb +; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; GISEL-NEXT: s_mov_b32 s4, 0 +; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v3 +; GISEL-NEXT: s_mov_b32 s5, s4 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: s_add_u32 s6, s8, 0 +; GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc +; GISEL-NEXT: s_and_b32 s7, s7, 1 +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v5 +; GISEL-NEXT: s_cmp_lg_u32 s7, 0 +; GISEL-NEXT: s_addc_u32 s7, 0, 0 +; GISEL-NEXT: s_xor_b64 s[10:11], s[6:7], s[4:5] +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, s10 +; GISEL-NEXT: v_cvt_f32_u32_e32 v7, s11 +; GISEL-NEXT: s_sub_u32 s12, 0, s10 +; GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GISEL-NEXT: v_mov_b32_e32 v8, s11 +; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v7 +; GISEL-NEXT: s_and_b32 s6, s6, 1 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; GISEL-NEXT: s_cmp_lg_u32 s6, 0 +; GISEL-NEXT: s_subb_u32 s13, 0, s11 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v6 +; GISEL-NEXT: s_add_u32 s6, s8, 0 +; GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; GISEL-NEXT: v_trunc_f32_e32 v7, v7 +; GISEL-NEXT: s_and_b32 s7, s7, 1 +; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_lo_u32 v9, s12, v7 +; GISEL-NEXT: s_cmp_lg_u32 s7, 0 +; GISEL-NEXT: s_addc_u32 s7, 0, 0 +; GISEL-NEXT: v_mul_lo_u32 v10, s12, v6 +; GISEL-NEXT: v_mul_lo_u32 v11, s13, v6 +; GISEL-NEXT: v_mul_hi_u32 v12, s12, v6 +; GISEL-NEXT: s_xor_b64 s[8:9], s[6:7], s[4:5] +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; GISEL-NEXT: v_mul_lo_u32 v11, v7, v10 +; GISEL-NEXT: v_mul_hi_u32 v13, v6, v10 +; GISEL-NEXT: v_mul_hi_u32 v10, v7, v10 +; GISEL-NEXT: v_cvt_f32_u32_e32 v14, s8 +; GISEL-NEXT: v_cvt_f32_u32_e32 v15, s9 +; GISEL-NEXT: s_sub_u32 s14, 0, s8 +; GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; GISEL-NEXT: v_mac_f32_e32 v14, 0x4f800000, v15 +; GISEL-NEXT: s_and_b32 s4, s4, 1 +; GISEL-NEXT: v_mul_lo_u32 v12, v6, v9 +; GISEL-NEXT: v_mul_lo_u32 v15, v7, v9 +; GISEL-NEXT: v_mul_hi_u32 v16, v6, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v14, v14 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_mul_f32_e32 v14, 0x5f7ffffc, v14 +; GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GISEL-NEXT: s_subb_u32 s15, 0, s9 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_mul_f32_e32 v16, 0x2f800000, v14 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v13 +; GISEL-NEXT: v_trunc_f32_e32 v13, v16 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_mac_f32_e32 v14, 0xcf800000, v13 +; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v14 +; GISEL-NEXT: v_mul_lo_u32 v14, s14, v13 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v11, s14, v12 +; GISEL-NEXT: v_mul_lo_u32 v15, s15, v12 +; GISEL-NEXT: v_mul_hi_u32 v16, s14, v12 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v7, v9, vcc +; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v7, v9 +; GISEL-NEXT: v_mul_lo_u32 v9, s12, v6 +; GISEL-NEXT: v_mul_lo_u32 v17, s13, v6 +; GISEL-NEXT: v_mul_hi_u32 v18, s12, v6 +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v15, v14 +; GISEL-NEXT: v_mul_lo_u32 v15, s12, v10 +; GISEL-NEXT: v_mul_lo_u32 v19, v10, v9 +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v17, v15 +; GISEL-NEXT: v_mul_hi_u32 v17, v6, v9 +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v6, v15 +; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v19, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v18, v17 +; GISEL-NEXT: v_mul_lo_u32 v17, v13, v11 +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v11 +; GISEL-NEXT: v_mul_hi_u32 v11, v13, v11 +; GISEL-NEXT: v_mul_hi_u32 v9, v10, v9 +; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v14, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v14 +; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v17, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v18 +; GISEL-NEXT: v_mul_lo_u32 v16, v13, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], v17, v18 +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v14 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v16, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v19, v18 +; GISEL-NEXT: v_mul_lo_u32 v19, v10, v15 +; GISEL-NEXT: v_mul_hi_u32 v10, v10, v15 +; GISEL-NEXT: v_mul_hi_u32 v15, v6, v15 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v19, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v19, v15 +; GISEL-NEXT: v_mov_b32_e32 v19, s9 +; GISEL-NEXT: v_mul_hi_u32 v14, v13, v14 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v17 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v16 +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v17 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; GISEL-NEXT: v_addc_u32_e64 v12, s[6:7], v13, v14, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v13, v14 +; GISEL-NEXT: v_mul_lo_u32 v14, s14, v11 +; GISEL-NEXT: v_mul_lo_u32 v16, s15, v11 +; GISEL-NEXT: v_mul_hi_u32 v17, s14, v11 +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v15 +; GISEL-NEXT: v_mul_lo_u32 v15, s14, v12 +; GISEL-NEXT: v_mul_lo_u32 v18, v12, v14 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v10, vcc +; GISEL-NEXT: v_mul_hi_u32 v10, v11, v14 +; GISEL-NEXT: v_mul_hi_u32 v14, v12, v14 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v1, v6 +; GISEL-NEXT: v_mul_hi_u32 v16, v0, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; GISEL-NEXT: v_mul_lo_u32 v17, v11, v15 +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v17, v10 +; GISEL-NEXT: v_mul_lo_u32 v10, v0, v7 +; GISEL-NEXT: v_mul_lo_u32 v17, v1, v7 +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16 +; GISEL-NEXT: v_mul_hi_u32 v9, v0, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v15 +; GISEL-NEXT: v_mul_hi_u32 v12, v12, v15 +; GISEL-NEXT: v_mul_hi_u32 v15, v11, v15 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v17, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v16, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v17, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v17 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_mul_lo_u32 v10, s10, v6 +; GISEL-NEXT: v_mul_lo_u32 v17, s11, v6 +; GISEL-NEXT: v_mul_hi_u32 v18, s10, v6 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v15 +; GISEL-NEXT: v_mul_lo_u32 v12, s10, v7 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, 1, v6 +; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v7, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v13, v9, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v17, v12 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v15 +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v16, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; GISEL-NEXT: v_mul_lo_u32 v14, v3, v11 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v18 +; GISEL-NEXT: v_mul_hi_u32 v18, v2, v11 +; GISEL-NEXT: v_mul_hi_u32 v11, v3, v11 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v12, vcc +; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v12 +; GISEL-NEXT: v_mul_lo_u32 v12, v2, v9 +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v18 +; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s10, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s11, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], s11, v10 +; GISEL-NEXT: v_mul_lo_u32 v10, v3, v9 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc +; GISEL-NEXT: v_mul_hi_u32 v8, v2, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v3, v9 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v12, v18, v12, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s10, v0 +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s10, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v18 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s11, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s11, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v11, v0, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v14 +; GISEL-NEXT: v_mul_lo_u32 v10, s8, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, s9, v8 +; GISEL-NEXT: v_mul_hi_u32 v14, s8, v8 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v15, v13, vcc +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v16, v17, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; GISEL-NEXT: v_mul_lo_u32 v6, s8, v9 +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], 1, v8 +; GISEL-NEXT: v_addc_u32_e64 v13, s[4:5], 0, v9, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v6 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v12 +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, 0, v13, vcc +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v3, v6, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v6 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s9, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v19, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s9, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v10, v6, vcc +; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s8, v2 +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s9, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s9, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v12, v7, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v11, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v5 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_sdiv_v2i64_oddk_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s10, 0x12d8fb +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0 +; CGP-NEXT: s_mov_b32 s8, 0xffed2705 +; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_cvt_f32_u32_e32 v7, s10 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_mov_b32_e32 v8, v7 +; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 +; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v5 +; CGP-NEXT: v_mac_f32_e32 v8, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v7 +; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v8 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 +; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v5 +; CGP-NEXT: v_mul_f32_e32 v9, 0x2f800000, v7 +; CGP-NEXT: v_trunc_f32_e32 v8, v8 +; CGP-NEXT: v_trunc_f32_e32 v9, v9 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 +; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v9 +; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v10, s8, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s8, v9 +; CGP-NEXT: v_mul_lo_u32 v12, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v13, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v14, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v15, s8, v7 +; CGP-NEXT: v_mul_lo_u32 v16, -1, v7 +; CGP-NEXT: v_mul_hi_u32 v17, s8, v7 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; CGP-NEXT: v_mul_lo_u32 v13, v8, v12 +; CGP-NEXT: v_mul_hi_u32 v18, v5, v12 +; CGP-NEXT: v_mul_hi_u32 v12, v8, v12 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; CGP-NEXT: v_mul_lo_u32 v16, v9, v15 +; CGP-NEXT: v_mul_hi_u32 v19, v7, v15 +; CGP-NEXT: v_mul_hi_u32 v15, v9, v15 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v17 +; CGP-NEXT: v_mul_lo_u32 v14, v5, v10 +; CGP-NEXT: v_mul_lo_u32 v17, v7, v11 +; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v19 +; CGP-NEXT: v_mul_lo_u32 v16, v8, v10 +; CGP-NEXT: v_mul_hi_u32 v19, v5, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v8, v10 +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v18 +; CGP-NEXT: v_mul_lo_u32 v13, v9, v11 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v18 +; CGP-NEXT: v_mul_hi_u32 v18, v7, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v9, v11 +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v16, v12 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v19 +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v16, s[4:5], v16, v19 +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v19 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v18 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], v8, v10, vcc +; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 +; CGP-NEXT: v_mul_lo_u32 v10, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v15, s8, v5 +; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v13 +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], v9, v11, s[4:5] +; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v9, v11 +; CGP-NEXT: v_mul_lo_u32 v11, s8, v7 +; CGP-NEXT: v_mul_lo_u32 v16, -1, v7 +; CGP-NEXT: v_mul_hi_u32 v17, s8, v7 +; CGP-NEXT: v_mul_lo_u32 v18, s8, v13 +; CGP-NEXT: v_mul_lo_u32 v19, v13, v11 +; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v18 +; CGP-NEXT: v_mul_hi_u32 v18, v7, v11 +; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v17 +; CGP-NEXT: v_mul_lo_u32 v17, v7, v16 +; CGP-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17 +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v17, s[6:7], v17, v18 +; CGP-NEXT: v_mul_lo_u32 v17, s8, v12 +; CGP-NEXT: v_mul_lo_u32 v18, v12, v10 +; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v14, v17 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v12, v10 +; CGP-NEXT: v_mul_hi_u32 v11, v13, v11 +; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v14, v15 +; CGP-NEXT: v_mul_lo_u32 v15, v5, v14 +; CGP-NEXT: v_add_i32_e64 v15, s[8:9], v18, v15 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v15, s[8:9], v15, v17 +; CGP-NEXT: v_mul_lo_u32 v15, v12, v14 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v14 +; CGP-NEXT: v_mul_hi_u32 v12, v12, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v18, v14 +; CGP-NEXT: v_mul_lo_u32 v18, v13, v16 +; CGP-NEXT: v_mul_hi_u32 v13, v13, v16 +; CGP-NEXT: v_mul_hi_u32 v16, v7, v16 +; CGP-NEXT: v_add_i32_e64 v10, s[8:9], v15, v10 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v11, s[8:9], v18, v11 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v10, s[8:9], v10, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v15, s[8:9], v15, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17 +; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v18, v16 +; CGP-NEXT: v_add_i32_e64 v10, s[6:7], v10, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v14, s[6:7], v15, v14 +; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v16, v17 +; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v12, v14 +; CGP-NEXT: v_add_i32_e64 v13, s[6:7], v13, v15 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc +; CGP-NEXT: v_addc_u32_e64 v9, vcc, v9, v13, s[4:5] +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc +; CGP-NEXT: v_mul_lo_u32 v10, v3, v5 +; CGP-NEXT: v_mul_hi_u32 v12, v2, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v1, v7 +; CGP-NEXT: v_mul_hi_u32 v13, v0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_mul_lo_u32 v14, v2, v8 +; CGP-NEXT: v_mul_lo_u32 v15, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v16, v2, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 +; CGP-NEXT: v_mul_lo_u32 v17, v0, v9 +; CGP-NEXT: v_mul_lo_u32 v18, v1, v9 +; CGP-NEXT: v_mul_hi_u32 v19, v0, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v1, v9 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v15, v5 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v18, v7 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v16 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v19 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v14, v10 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v17, v11 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v18, v13 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; CGP-NEXT: v_mul_lo_u32 v12, s10, v5 +; CGP-NEXT: v_mul_lo_u32 v14, 0, v5 +; CGP-NEXT: v_mul_hi_u32 v15, s10, v5 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CGP-NEXT: v_mul_lo_u32 v13, s10, v7 +; CGP-NEXT: v_mul_lo_u32 v16, 0, v7 +; CGP-NEXT: v_mul_hi_u32 v17, s10, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CGP-NEXT: v_mul_lo_u32 v10, s10, v8 +; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v5 +; CGP-NEXT: v_addc_u32_e32 v18, vcc, 0, v8, vcc +; CGP-NEXT: v_mul_lo_u32 v19, s10, v9 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v14, v10 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v19 +; CGP-NEXT: v_add_i32_e32 v16, vcc, 1, v7 +; CGP-NEXT: v_addc_u32_e32 v19, vcc, 0, v9, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v15 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; CGP-NEXT: v_add_i32_e32 v15, vcc, 1, v11 +; CGP-NEXT: v_addc_u32_e32 v17, vcc, 0, v18, vcc +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v12 +; CGP-NEXT: v_subb_u32_e64 v12, s[4:5], v3, v10, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v10 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s10, v2 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v13 +; CGP-NEXT: v_subb_u32_e64 v13, s[6:7], v1, v14, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v14 +; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], 0, v12 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[6:7] +; CGP-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v12 +; CGP-NEXT: v_cmp_le_u32_e64 s[8:9], s10, v0 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[8:9] +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cndmask_b32_e64 v10, v14, v10, s[6:7] +; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v13 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc +; CGP-NEXT: v_subrev_i32_e32 v2, vcc, s10, v2 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s10, v2 +; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v13 +; CGP-NEXT: v_cndmask_b32_e32 v12, v14, v12, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_add_i32_e64 v3, s[6:7], 1, v16 +; CGP-NEXT: v_addc_u32_e64 v14, s[6:7], 0, v19, s[6:7] +; CGP-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] +; CGP-NEXT: v_subrev_i32_e64 v0, s[4:5], s10, v0 +; CGP-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5] +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s10, v0 +; CGP-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v2, v13, v2, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v0, v13, v0, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; CGP-NEXT: v_cndmask_b32_e32 v1, v11, v15, vcc +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 +; CGP-NEXT: v_cndmask_b32_e64 v0, v16, v3, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v2, v18, v17, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v3, v19, v14, s[4:5] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v12 +; CGP-NEXT: v_cndmask_b32_e64 v0, v7, v0, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; CGP-NEXT: v_xor_b32_e32 v5, v1, v6 +; CGP-NEXT: v_cndmask_b32_e64 v1, v9, v3, s[4:5] +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_xor_b32_e32 v3, v2, v6 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v5, v6 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: s_setpc_b64 s[30:31] + %result = sdiv <2 x i64> %num, + ret <2 x i64> %result +} + +define i64 @v_sdiv_i64_pow2_shl_denom(i64 %x, i64 %y) { +; CHECK-LABEL: v_sdiv_i64_pow2_shl_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_movk_i32 s4, 0x1000 +; CHECK-NEXT: s_mov_b32 s5, 0 +; CHECK-NEXT: v_mov_b32_e32 v6, 0 +; CHECK-NEXT: v_lshl_b64 v[4:5], s[4:5], v2 +; CHECK-NEXT: v_or_b32_e32 v7, v1, v5 +; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] +; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 +; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; CHECK-NEXT: s_cbranch_execz BB7_2 +; CHECK-NEXT: ; %bb.1: +; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v1 +; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v5 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v0, v2 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v4, v3 +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v3, vcc +; CHECK-NEXT: v_xor_b32_e32 v8, v2, v3 +; CHECK-NEXT: v_xor_b32_e32 v6, v6, v2 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v2 +; CHECK-NEXT: v_xor_b32_e32 v2, v7, v3 +; CHECK-NEXT: v_xor_b32_e32 v3, v5, v3 +; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v2 +; CHECK-NEXT: v_cvt_f32_u32_e32 v7, v3 +; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v2 +; CHECK-NEXT: v_subb_u32_e32 v10, vcc, 0, v3, vcc +; CHECK-NEXT: v_mac_f32_e32 v5, 0x4f800000, v7 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; CHECK-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CHECK-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5 +; CHECK-NEXT: v_trunc_f32_e32 v7, v7 +; CHECK-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7 +; CHECK-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CHECK-NEXT: v_mul_lo_u32 v11, v9, v7 +; CHECK-NEXT: v_mul_lo_u32 v12, v9, v5 +; CHECK-NEXT: v_mul_lo_u32 v13, v10, v5 +; CHECK-NEXT: v_mul_hi_u32 v14, v9, v5 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CHECK-NEXT: v_mul_lo_u32 v13, v7, v12 +; CHECK-NEXT: v_mul_hi_u32 v15, v5, v12 +; CHECK-NEXT: v_mul_hi_u32 v12, v7, v12 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, v11, v14 +; CHECK-NEXT: v_mul_lo_u32 v14, v5, v11 +; CHECK-NEXT: v_mul_lo_u32 v16, v7, v11 +; CHECK-NEXT: v_mul_hi_u32 v17, v5, v11 +; CHECK-NEXT: v_mul_hi_u32 v11, v7, v11 +; CHECK-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; CHECK-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v12, vcc, v16, v12 +; CHECK-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v12, vcc, v12, v17 +; CHECK-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CHECK-NEXT: v_add_i32_e32 v14, vcc, v16, v15 +; CHECK-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v12 +; CHECK-NEXT: v_addc_u32_e64 v12, s[4:5], v7, v11, vcc +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v11 +; CHECK-NEXT: v_mul_lo_u32 v11, v9, v5 +; CHECK-NEXT: v_mul_lo_u32 v10, v10, v5 +; CHECK-NEXT: v_mul_hi_u32 v13, v9, v5 +; CHECK-NEXT: v_mul_lo_u32 v9, v9, v12 +; CHECK-NEXT: v_mul_lo_u32 v14, v12, v11 +; CHECK-NEXT: v_mul_hi_u32 v15, v5, v11 +; CHECK-NEXT: v_mul_hi_u32 v11, v12, v11 +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v10, v9 +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 +; CHECK-NEXT: v_mul_lo_u32 v10, v5, v9 +; CHECK-NEXT: v_mul_lo_u32 v13, v12, v9 +; CHECK-NEXT: v_mul_hi_u32 v16, v5, v9 +; CHECK-NEXT: v_mul_hi_u32 v9, v12, v9 +; CHECK-NEXT: v_add_i32_e64 v10, s[4:5], v14, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v11, s[4:5], v13, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v10, s[4:5], v10, v15 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 +; CHECK-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v10, s[4:5], v12, v10 +; CHECK-NEXT: v_add_i32_e64 v12, s[4:5], v13, v14 +; CHECK-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v9, v11 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, v7, v9, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CHECK-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; CHECK-NEXT: v_mul_lo_u32 v9, v1, v5 +; CHECK-NEXT: v_mul_hi_u32 v10, v6, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v1, v5 +; CHECK-NEXT: v_mul_lo_u32 v11, v6, v7 +; CHECK-NEXT: v_mul_lo_u32 v12, v1, v7 +; CHECK-NEXT: v_mul_hi_u32 v13, v6, v7 +; CHECK-NEXT: v_mul_hi_u32 v7, v1, v7 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v12, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v13 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CHECK-NEXT: v_mul_lo_u32 v10, v2, v5 +; CHECK-NEXT: v_mul_lo_u32 v11, v3, v5 +; CHECK-NEXT: v_mul_hi_u32 v12, v2, v5 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CHECK-NEXT: v_mul_lo_u32 v9, v2, v7 +; CHECK-NEXT: v_add_i32_e32 v13, vcc, 1, v5 +; CHECK-NEXT: v_addc_u32_e32 v14, vcc, 0, v7, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, 1, v13 +; CHECK-NEXT: v_addc_u32_e32 v15, vcc, 0, v14, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v6, v10 +; CHECK-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v9, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v9 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v10, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v6, v2 +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v6, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v6, v2, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v13, v11, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v2, v14, v15, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v2, v7, v2, vcc +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v8 +; CHECK-NEXT: v_xor_b32_e32 v3, v2, v8 +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v1, v8 +; CHECK-NEXT: v_subb_u32_e32 v3, vcc, v3, v8, vcc +; CHECK-NEXT: BB7_2: ; %Flow +; CHECK-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] +; CHECK-NEXT: s_xor_b64 exec, exec, s[6:7] +; CHECK-NEXT: s_cbranch_execz BB7_4 +; CHECK-NEXT: ; %bb.3: +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v4 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_lo_u32 v2, v1, v4 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v4 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1 +; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v1, v2 +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mul_lo_u32 v2, v1, v4 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1 +; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v1 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v2 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v4 +; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v1, v3, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v0, vcc +; CHECK-NEXT: v_mov_b32_e32 v3, 0 +; CHECK-NEXT: BB7_4: +; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] +; CHECK-NEXT: v_mov_b32_e32 v0, v2 +; CHECK-NEXT: v_mov_b32_e32 v1, v3 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %shl.y = shl i64 4096, %y + %r = sdiv i64 %x, %shl.y + ret i64 %r +} + +define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { +; GISEL-LABEL: v_sdiv_v2i64_pow2_shl_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_movk_i32 s4, 0x1000 +; GISEL-NEXT: s_mov_b32 s5, 0 +; GISEL-NEXT: v_lshl_b64 v[4:5], s[4:5], v4 +; GISEL-NEXT: v_lshl_b64 v[6:7], s[4:5], v6 +; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v1 +; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v8, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v8 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v8 +; GISEL-NEXT: v_xor_b32_e32 v4, v4, v9 +; GISEL-NEXT: v_xor_b32_e32 v5, v5, v9 +; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v4 +; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v5 +; GISEL-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v10, v10 +; GISEL-NEXT: v_mul_f32_e32 v10, 0x5f7ffffc, v10 +; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v10 +; GISEL-NEXT: v_trunc_f32_e32 v11, v11 +; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v11 +; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v10 +; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 +; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v4 +; GISEL-NEXT: v_subb_u32_e32 v13, vcc, 0, v5, vcc +; GISEL-NEXT: v_mul_lo_u32 v14, v12, v10 +; GISEL-NEXT: v_mul_lo_u32 v15, v13, v10 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v11 +; GISEL-NEXT: v_mul_hi_u32 v17, v12, v10 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; GISEL-NEXT: v_mul_lo_u32 v16, v11, v14 +; GISEL-NEXT: v_mul_lo_u32 v17, v10, v15 +; GISEL-NEXT: v_mul_hi_u32 v18, v10, v14 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16 +; GISEL-NEXT: v_mul_lo_u32 v17, v11, v15 +; GISEL-NEXT: v_mul_hi_u32 v14, v11, v14 +; GISEL-NEXT: v_mul_hi_u32 v18, v10, v15 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16 +; GISEL-NEXT: v_mul_hi_u32 v15, v11, v15 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; GISEL-NEXT: v_addc_u32_e64 v14, s[4:5], v11, v15, vcc +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 +; GISEL-NEXT: v_mul_lo_u32 v15, v12, v10 +; GISEL-NEXT: v_mul_lo_u32 v13, v13, v10 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v14 +; GISEL-NEXT: v_mul_hi_u32 v12, v12, v10 +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v16 +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v13, v12 +; GISEL-NEXT: v_mul_lo_u32 v13, v14, v15 +; GISEL-NEXT: v_mul_lo_u32 v16, v10, v12 +; GISEL-NEXT: v_mul_hi_u32 v17, v10, v15 +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v16, v13 +; GISEL-NEXT: v_mul_lo_u32 v16, v14, v12 +; GISEL-NEXT: v_mul_hi_u32 v15, v14, v15 +; GISEL-NEXT: v_mul_hi_u32 v17, v10, v12 +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v16, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v17 +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v15, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v16, v15 +; GISEL-NEXT: v_mul_hi_u32 v12, v14, v12 +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v15 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 +; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v12, vcc +; GISEL-NEXT: v_addc_u32_e64 v11, vcc, 0, v11, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v12, v1, v10 +; GISEL-NEXT: v_mul_lo_u32 v13, v0, v11 +; GISEL-NEXT: v_mul_hi_u32 v14, v0, v10 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; GISEL-NEXT: v_mul_lo_u32 v13, v1, v11 +; GISEL-NEXT: v_mul_hi_u32 v10, v1, v10 +; GISEL-NEXT: v_mul_hi_u32 v14, v0, v11 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12 +; GISEL-NEXT: v_mul_hi_u32 v11, v1, v11 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_mul_lo_u32 v12, v4, v10 +; GISEL-NEXT: v_mul_lo_u32 v13, v5, v10 +; GISEL-NEXT: v_mul_lo_u32 v14, v4, v11 +; GISEL-NEXT: v_mul_hi_u32 v15, v4, v10 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v12 +; GISEL-NEXT: v_subb_u32_e64 v12, s[4:5], v1, v13, vcc +; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v13 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v12, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v12, v13, v14, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v10 +; GISEL-NEXT: v_addc_u32_e32 v14, vcc, 0, v11, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v15, v0, vcc +; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v13 +; GISEL-NEXT: v_addc_u32_e32 v4, vcc, 0, v14, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v13, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v14, v4, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc +; GISEL-NEXT: v_xor_b32_e32 v4, v8, v9 +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v7 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v4, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v5 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v5, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 +; GISEL-NEXT: v_xor_b32_e32 v6, v6, v5 +; GISEL-NEXT: v_xor_b32_e32 v7, v7, v5 +; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v7 +; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v9 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 +; GISEL-NEXT: v_mul_f32_e32 v8, 0x5f7ffffc, v8 +; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v8 +; GISEL-NEXT: v_trunc_f32_e32 v9, v9 +; GISEL-NEXT: v_mac_f32_e32 v8, 0xcf800000, v9 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 +; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 +; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 +; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v7, vcc +; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8 +; GISEL-NEXT: v_mul_lo_u32 v13, v11, v8 +; GISEL-NEXT: v_mul_lo_u32 v14, v10, v9 +; GISEL-NEXT: v_mul_hi_u32 v15, v10, v8 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; GISEL-NEXT: v_mul_lo_u32 v14, v9, v12 +; GISEL-NEXT: v_mul_lo_u32 v15, v8, v13 +; GISEL-NEXT: v_mul_hi_u32 v16, v8, v12 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; GISEL-NEXT: v_mul_lo_u32 v15, v9, v13 +; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12 +; GISEL-NEXT: v_mul_hi_u32 v16, v8, v13 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14 +; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_addc_u32_e64 v12, s[4:5], v9, v13, vcc +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 +; GISEL-NEXT: v_mul_lo_u32 v13, v10, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v11, v8 +; GISEL-NEXT: v_mul_lo_u32 v14, v10, v12 +; GISEL-NEXT: v_mul_hi_u32 v10, v10, v8 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 +; GISEL-NEXT: v_mul_lo_u32 v11, v12, v13 +; GISEL-NEXT: v_mul_lo_u32 v14, v8, v10 +; GISEL-NEXT: v_mul_hi_u32 v15, v8, v13 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v14, v11 +; GISEL-NEXT: v_mul_lo_u32 v14, v12, v10 +; GISEL-NEXT: v_mul_hi_u32 v13, v12, v13 +; GISEL-NEXT: v_mul_hi_u32 v15, v8, v10 +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v15 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v13, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; GISEL-NEXT: v_mul_hi_u32 v10, v12, v10 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v10, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, 0, v9, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v10, v3, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v2, v9 +; GISEL-NEXT: v_mul_hi_u32 v12, v2, v8 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_mul_lo_u32 v11, v3, v9 +; GISEL-NEXT: v_mul_hi_u32 v8, v3, v8 +; GISEL-NEXT: v_mul_hi_u32 v12, v2, v9 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_mul_hi_u32 v9, v3, v9 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_mul_lo_u32 v10, v6, v8 +; GISEL-NEXT: v_mul_lo_u32 v11, v7, v8 +; GISEL-NEXT: v_mul_lo_u32 v12, v6, v9 +; GISEL-NEXT: v_mul_hi_u32 v13, v6, v8 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 +; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v3, v11, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v11 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v10, v11, v12, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v6 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v8 +; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v9, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v13, v2, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v11 +; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v12, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v3, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v12, v6, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc +; GISEL-NEXT: v_xor_b32_e32 v4, v4, v5 +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v4, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_sdiv_v2i64_pow2_shl_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_movk_i32 s4, 0x1000 +; CGP-NEXT: s_mov_b32 s5, 0 +; CGP-NEXT: v_mov_b32_e32 v10, 0 +; CGP-NEXT: v_lshl_b64 v[8:9], s[4:5], v4 +; CGP-NEXT: v_lshl_b64 v[6:7], s[4:5], v6 +; CGP-NEXT: v_or_b32_e32 v11, v1, v9 +; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5 +; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; CGP-NEXT: s_cbranch_execz BB8_2 +; CGP-NEXT: ; %bb.1: +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v9 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v0, v4 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v8, v5 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v5, vcc +; CGP-NEXT: v_xor_b32_e32 v12, v4, v5 +; CGP-NEXT: v_xor_b32_e32 v10, v10, v4 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_xor_b32_e32 v4, v11, v5 +; CGP-NEXT: v_xor_b32_e32 v5, v9, v5 +; CGP-NEXT: v_cvt_f32_u32_e32 v9, v4 +; CGP-NEXT: v_cvt_f32_u32_e32 v11, v5 +; CGP-NEXT: v_sub_i32_e32 v13, vcc, 0, v4 +; CGP-NEXT: v_subb_u32_e32 v14, vcc, 0, v5, vcc +; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v11 +; CGP-NEXT: v_rcp_iflag_f32_e32 v9, v9 +; CGP-NEXT: v_mul_f32_e32 v9, 0x5f7ffffc, v9 +; CGP-NEXT: v_mul_f32_e32 v11, 0x2f800000, v9 +; CGP-NEXT: v_trunc_f32_e32 v11, v11 +; CGP-NEXT: v_mac_f32_e32 v9, 0xcf800000, v11 +; CGP-NEXT: v_cvt_u32_f32_e32 v11, v11 +; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9 +; CGP-NEXT: v_mul_lo_u32 v15, v13, v11 +; CGP-NEXT: v_mul_lo_u32 v16, v13, v9 +; CGP-NEXT: v_mul_lo_u32 v17, v14, v9 +; CGP-NEXT: v_mul_hi_u32 v18, v13, v9 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v15 +; CGP-NEXT: v_mul_lo_u32 v17, v11, v16 +; CGP-NEXT: v_mul_hi_u32 v19, v9, v16 +; CGP-NEXT: v_mul_hi_u32 v16, v11, v16 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v18 +; CGP-NEXT: v_mul_lo_u32 v18, v9, v15 +; CGP-NEXT: v_mul_lo_u32 v20, v11, v15 +; CGP-NEXT: v_mul_hi_u32 v21, v9, v15 +; CGP-NEXT: v_mul_hi_u32 v15, v11, v15 +; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v18 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v16, vcc, v20, v16 +; CGP-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v19 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v21 +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v17, vcc, v18, v17 +; CGP-NEXT: v_add_i32_e32 v18, vcc, v20, v19 +; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v17, vcc, v18, v17 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v16 +; CGP-NEXT: v_addc_u32_e64 v16, s[4:5], v11, v15, vcc +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 +; CGP-NEXT: v_mul_lo_u32 v15, v13, v9 +; CGP-NEXT: v_mul_lo_u32 v14, v14, v9 +; CGP-NEXT: v_mul_hi_u32 v17, v13, v9 +; CGP-NEXT: v_mul_lo_u32 v13, v13, v16 +; CGP-NEXT: v_mul_lo_u32 v18, v16, v15 +; CGP-NEXT: v_mul_hi_u32 v19, v9, v15 +; CGP-NEXT: v_mul_hi_u32 v15, v16, v15 +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v17 +; CGP-NEXT: v_mul_lo_u32 v14, v9, v13 +; CGP-NEXT: v_mul_lo_u32 v17, v16, v13 +; CGP-NEXT: v_mul_hi_u32 v20, v9, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v16, v13 +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v18, v14 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v15, s[4:5], v17, v15 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v19 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v15, s[4:5], v15, v20 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v16, v14 +; CGP-NEXT: v_add_i32_e64 v16, s[4:5], v17, v18 +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v15, v14 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v15, s[4:5], v16, v15 +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v15 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, v11, v13, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v14 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, 0, v11, vcc +; CGP-NEXT: v_mul_lo_u32 v13, v1, v9 +; CGP-NEXT: v_mul_hi_u32 v14, v10, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v1, v9 +; CGP-NEXT: v_mul_lo_u32 v15, v10, v11 +; CGP-NEXT: v_mul_lo_u32 v16, v1, v11 +; CGP-NEXT: v_mul_hi_u32 v17, v10, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v1, v11 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v16, v9 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v17 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v13 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CGP-NEXT: v_mul_lo_u32 v14, v4, v9 +; CGP-NEXT: v_mul_lo_u32 v15, v5, v9 +; CGP-NEXT: v_mul_hi_u32 v16, v4, v9 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CGP-NEXT: v_mul_lo_u32 v13, v4, v11 +; CGP-NEXT: v_add_i32_e32 v17, vcc, 1, v9 +; CGP-NEXT: v_addc_u32_e32 v18, vcc, 0, v11, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; CGP-NEXT: v_add_i32_e32 v15, vcc, 1, v17 +; CGP-NEXT: v_addc_u32_e32 v19, vcc, 0, v18, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v10, v14 +; CGP-NEXT: v_subb_u32_e64 v14, s[4:5], v1, v13, vcc +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v13 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v4 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v14, v5 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v14, v5 +; CGP-NEXT: v_cndmask_b32_e32 v13, v16, v13, vcc +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v10, v4 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v10, v4 +; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v1, v5 +; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v4, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v1, v17, v15, vcc +; CGP-NEXT: v_cndmask_b32_e32 v4, v18, v19, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 +; CGP-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; CGP-NEXT: v_cndmask_b32_e32 v4, v11, v4, vcc +; CGP-NEXT: v_xor_b32_e32 v1, v1, v12 +; CGP-NEXT: v_xor_b32_e32 v5, v4, v12 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v12 +; CGP-NEXT: v_subb_u32_e32 v5, vcc, v5, v12, vcc +; CGP-NEXT: BB8_2: ; %Flow2 +; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] +; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] +; CGP-NEXT: s_cbranch_execz BB8_4 +; CGP-NEXT: ; %bb.3: +; CGP-NEXT: v_cvt_f32_u32_e32 v1, v8 +; CGP-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CGP-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CGP-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CGP-NEXT: v_mul_lo_u32 v4, v1, v8 +; CGP-NEXT: v_mul_hi_u32 v5, v1, v8 +; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc +; CGP-NEXT: v_mul_hi_u32 v4, v4, v1 +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v1, v4 +; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v4 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; CGP-NEXT: v_mul_hi_u32 v1, v1, v0 +; CGP-NEXT: v_mul_lo_u32 v4, v1, v8 +; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v1 +; CGP-NEXT: v_subrev_i32_e32 v9, vcc, 1, v1 +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v0, v4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v8 +; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v1, v5, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v4, v9, v0, vcc +; CGP-NEXT: v_mov_b32_e32 v5, 0 +; CGP-NEXT: BB8_4: +; CGP-NEXT: s_or_b64 exec, exec, s[6:7] +; CGP-NEXT: v_or_b32_e32 v1, v3, v7 +; CGP-NEXT: v_mov_b32_e32 v0, 0 +; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 +; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; CGP-NEXT: s_cbranch_execz BB8_6 +; CGP-NEXT: ; %bb.5: +; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v3 +; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v2, v0 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v0, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v6, v1 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v1, vcc +; CGP-NEXT: v_xor_b32_e32 v10, v0, v1 +; CGP-NEXT: v_xor_b32_e32 v8, v8, v0 +; CGP-NEXT: v_xor_b32_e32 v0, v3, v0 +; CGP-NEXT: v_xor_b32_e32 v3, v9, v1 +; CGP-NEXT: v_xor_b32_e32 v1, v7, v1 +; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v9, v1 +; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v3 +; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v1, vcc +; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v9 +; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v7 +; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 +; CGP-NEXT: v_mul_f32_e32 v9, 0x2f800000, v7 +; CGP-NEXT: v_trunc_f32_e32 v9, v9 +; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v9 +; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_mul_lo_u32 v13, v11, v9 +; CGP-NEXT: v_mul_lo_u32 v14, v11, v7 +; CGP-NEXT: v_mul_lo_u32 v15, v12, v7 +; CGP-NEXT: v_mul_hi_u32 v16, v11, v7 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; CGP-NEXT: v_mul_lo_u32 v15, v9, v14 +; CGP-NEXT: v_mul_hi_u32 v17, v7, v14 +; CGP-NEXT: v_mul_hi_u32 v14, v9, v14 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 +; CGP-NEXT: v_mul_lo_u32 v16, v7, v13 +; CGP-NEXT: v_mul_lo_u32 v18, v9, v13 +; CGP-NEXT: v_mul_hi_u32 v19, v7, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v9, v13 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v18, v14 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; CGP-NEXT: v_add_i32_e32 v16, vcc, v18, v17 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v14 +; CGP-NEXT: v_addc_u32_e64 v14, s[4:5], v9, v13, vcc +; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 +; CGP-NEXT: v_mul_lo_u32 v13, v11, v7 +; CGP-NEXT: v_mul_lo_u32 v12, v12, v7 +; CGP-NEXT: v_mul_hi_u32 v15, v11, v7 +; CGP-NEXT: v_mul_lo_u32 v11, v11, v14 +; CGP-NEXT: v_mul_lo_u32 v16, v14, v13 +; CGP-NEXT: v_mul_hi_u32 v17, v7, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v14, v13 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 +; CGP-NEXT: v_mul_lo_u32 v12, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v15, v14, v11 +; CGP-NEXT: v_mul_hi_u32 v18, v7, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v14, v11 +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v16, v12 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v15, v13 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v18 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12 +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v15, v16 +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v13, v12 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v13 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v11, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v0, v7 +; CGP-NEXT: v_mul_hi_u32 v12, v8, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v0, v7 +; CGP-NEXT: v_mul_lo_u32 v13, v8, v9 +; CGP-NEXT: v_mul_lo_u32 v14, v0, v9 +; CGP-NEXT: v_mul_hi_u32 v15, v8, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v0, v9 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v14, v7 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v15 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v3, v7 +; CGP-NEXT: v_mul_lo_u32 v13, v1, v7 +; CGP-NEXT: v_mul_hi_u32 v14, v3, v7 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CGP-NEXT: v_mul_lo_u32 v11, v3, v9 +; CGP-NEXT: v_add_i32_e32 v15, vcc, 1, v7 +; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v9, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CGP-NEXT: v_add_i32_e32 v13, vcc, 1, v15 +; CGP-NEXT: v_addc_u32_e32 v17, vcc, 0, v16, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v14 +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v12 +; CGP-NEXT: v_subb_u32_e64 v12, s[4:5], v0, v11, vcc +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v11 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v3 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v1 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v12, v1 +; CGP-NEXT: v_cndmask_b32_e32 v11, v14, v11, vcc +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v3 +; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v3 +; CGP-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v0, v8, v3, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v15, v13, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v16, v17, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; CGP-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc +; CGP-NEXT: v_xor_b32_e32 v0, v0, v10 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v10 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v10, vcc +; CGP-NEXT: BB8_6: ; %Flow +; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7] +; CGP-NEXT: s_xor_b64 exec, exec, s[6:7] +; CGP-NEXT: s_cbranch_execz BB8_8 +; CGP-NEXT: ; %bb.7: +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CGP-NEXT: v_mul_lo_u32 v1, v0, v6 +; CGP-NEXT: v_mul_hi_u32 v3, v0, v6 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v1 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; CGP-NEXT: v_mul_hi_u32 v1, v1, v0 +; CGP-NEXT: v_add_i32_e64 v3, s[4:5], v0, v1 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CGP-NEXT: v_mul_hi_u32 v0, v0, v2 +; CGP-NEXT: v_mul_lo_u32 v1, v0, v6 +; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0 +; CGP-NEXT: v_subrev_i32_e32 v7, vcc, 1, v0 +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v1 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v1 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v6 +; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; CGP-NEXT: v_mov_b32_e32 v1, 0 +; CGP-NEXT: BB8_8: +; CGP-NEXT: s_or_b64 exec, exec, s[6:7] +; CGP-NEXT: v_mov_b32_e32 v3, v1 +; CGP-NEXT: v_mov_b32_e32 v2, v0 +; CGP-NEXT: v_mov_b32_e32 v0, v4 +; CGP-NEXT: v_mov_b32_e32 v1, v5 +; CGP-NEXT: s_setpc_b64 s[30:31] + %shl.y = shl <2 x i64> , %y + %r = sdiv <2 x i64> %x, %shl.y + ret <2 x i64> %r +} + +define i64 @v_sdiv_i64_24bit(i64 %num, i64 %den) { +; GISEL-LABEL: v_sdiv_i64_24bit: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s4, 0xffffff +; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 +; GISEL-NEXT: v_and_b32_e32 v1, s4, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 +; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2 +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0 +; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v2 +; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2 +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v1 +; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v2, v4, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_sdiv_i64_24bit: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s4, 0xffffff +; CGP-NEXT: v_and_b32_e32 v0, s4, v0 +; CGP-NEXT: v_and_b32_e32 v1, s4, v2 +; CGP-NEXT: v_xor_b32_e32 v2, v0, v1 +; CGP-NEXT: v_cvt_f32_i32_e32 v0, v0 +; CGP-NEXT: v_cvt_f32_i32_e32 v1, v1 +; CGP-NEXT: v_ashrrev_i32_e32 v2, 30, v2 +; CGP-NEXT: v_rcp_f32_e32 v3, v1 +; CGP-NEXT: v_or_b32_e32 v2, 1, v2 +; CGP-NEXT: v_mul_f32_e32 v3, v0, v3 +; CGP-NEXT: v_trunc_f32_e32 v3, v3 +; CGP-NEXT: v_fma_f32 v0, -v3, v1, v0 +; CGP-NEXT: v_cvt_i32_f32_e32 v3, v3 +; CGP-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, |v1| +; CGP-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v3, v0 +; CGP-NEXT: v_lshlrev_b32_e32 v0, 7, v0 +; CGP-NEXT: v_ashrrev_i32_e32 v0, 7, v0 +; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; CGP-NEXT: s_setpc_b64 s[30:31] + %num.mask = and i64 %num, 16777215 + %den.mask = and i64 %den, 16777215 + %result = sdiv i64 %num.mask, %den.mask + ret i64 %result +} + +define <2 x i64> @v_sdiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { +; GISEL-LABEL: v_sdiv_v2i64_24bit: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s10, 0xffffff +; GISEL-NEXT: v_and_b32_e32 v1, s10, v4 +; GISEL-NEXT: v_and_b32_e32 v3, s10, v6 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, 0, v1 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 0, v3 +; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v1 +; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v4 +; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v1 +; GISEL-NEXT: v_subb_u32_e32 v9, vcc, 0, v4, vcc +; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v3 +; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v5 +; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v3 +; GISEL-NEXT: v_subb_u32_e32 v13, vcc, 0, v5, vcc +; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v7 +; GISEL-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v10 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 +; GISEL-NEXT: v_mul_f32_e32 v10, 0x2f800000, v6 +; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v7 +; GISEL-NEXT: v_trunc_f32_e32 v10, v10 +; GISEL-NEXT: v_trunc_f32_e32 v11, v11 +; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v10 +; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v10 +; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v11 +; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_lo_u32 v14, v8, v10 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GISEL-NEXT: v_mul_lo_u32 v15, v12, v11 +; GISEL-NEXT: v_mul_lo_u32 v16, v8, v6 +; GISEL-NEXT: v_mul_lo_u32 v17, v9, v6 +; GISEL-NEXT: v_mul_hi_u32 v18, v8, v6 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v14 +; GISEL-NEXT: v_mul_lo_u32 v17, v10, v16 +; GISEL-NEXT: v_mul_hi_u32 v19, v6, v16 +; GISEL-NEXT: v_mul_hi_u32 v16, v10, v16 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v6, v14 +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19 +; GISEL-NEXT: v_mul_lo_u32 v17, v10, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v19 +; GISEL-NEXT: v_mul_hi_u32 v19, v6, v14 +; GISEL-NEXT: v_mul_hi_u32 v14, v10, v14 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v19 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v16 +; GISEL-NEXT: v_addc_u32_e64 v16, s[4:5], v10, v14, vcc +; GISEL-NEXT: v_mul_lo_u32 v17, v8, v6 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v6 +; GISEL-NEXT: v_mul_hi_u32 v18, v8, v6 +; GISEL-NEXT: v_mul_lo_u32 v8, v8, v16 +; GISEL-NEXT: v_mul_lo_u32 v19, v16, v17 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 +; GISEL-NEXT: v_mul_hi_u32 v9, v6, v17 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v6, v8 +; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v19, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v18, v9 +; GISEL-NEXT: v_mul_lo_u32 v9, v12, v7 +; GISEL-NEXT: v_mul_lo_u32 v18, v13, v7 +; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v18, v15 +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v7 +; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v11, v9 +; GISEL-NEXT: v_mul_lo_u32 v20, v7, v15 +; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v18, v20 +; GISEL-NEXT: v_mul_hi_u32 v20, v7, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v18, v20 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v21, v18 +; GISEL-NEXT: v_mul_hi_u32 v9, v11, v9 +; GISEL-NEXT: v_mul_lo_u32 v20, v11, v15 +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v20, v9 +; GISEL-NEXT: v_mul_hi_u32 v20, v7, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v20 +; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v21, v20 +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v20, v18 +; GISEL-NEXT: v_mul_hi_u32 v15, v11, v15 +; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18 +; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v9 +; GISEL-NEXT: v_addc_u32_e64 v9, s[8:9], v11, v15, s[6:7] +; GISEL-NEXT: v_mul_lo_u32 v13, v13, v7 +; GISEL-NEXT: v_mul_lo_u32 v18, v12, v9 +; GISEL-NEXT: v_add_i32_e64 v13, s[8:9], v13, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v12, v7 +; GISEL-NEXT: v_mul_hi_u32 v12, v12, v7 +; GISEL-NEXT: v_add_i32_e64 v12, s[8:9], v13, v12 +; GISEL-NEXT: v_mul_lo_u32 v13, v9, v18 +; GISEL-NEXT: v_mul_lo_u32 v20, v7, v12 +; GISEL-NEXT: v_add_i32_e64 v13, s[8:9], v13, v20 +; GISEL-NEXT: v_mul_hi_u32 v20, v7, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v13, s[8:9], v13, v20 +; GISEL-NEXT: v_and_b32_e32 v0, s10, v0 +; GISEL-NEXT: v_and_b32_e32 v2, s10, v2 +; GISEL-NEXT: v_add_i32_e64 v10, s[10:11], v10, v14 +; GISEL-NEXT: v_add_i32_e64 v0, s[10:11], 0, v0 +; GISEL-NEXT: v_addc_u32_e64 v13, s[10:11], 0, 0, s[10:11] +; GISEL-NEXT: v_add_i32_e64 v11, s[10:11], v11, v15 +; GISEL-NEXT: v_mul_hi_u32 v14, v16, v17 +; GISEL-NEXT: v_mul_hi_u32 v15, v9, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v19, v17 +; GISEL-NEXT: v_mul_lo_u32 v18, v16, v8 +; GISEL-NEXT: v_mul_hi_u32 v16, v16, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8 +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v18, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v14, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v18, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v21, v18 +; GISEL-NEXT: v_mul_lo_u32 v19, v9, v12 +; GISEL-NEXT: v_mul_hi_u32 v9, v9, v12 +; GISEL-NEXT: v_mul_hi_u32 v12, v7, v12 +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v19, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v15, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v19, v15 +; GISEL-NEXT: v_add_i32_e64 v2, s[4:5], 0, v2 +; GISEL-NEXT: v_addc_u32_e64 v19, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v17 +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18 +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v16, v14 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v15 +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v10, v14, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v11, v9, s[6:7] +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v10, vcc +; GISEL-NEXT: v_mul_lo_u32 v10, v13, v6 +; GISEL-NEXT: v_mul_hi_u32 v11, v0, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v12 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; GISEL-NEXT: v_mul_lo_u32 v12, v19, v7 +; GISEL-NEXT: v_mul_hi_u32 v14, v2, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, v19, v7 +; GISEL-NEXT: v_mul_lo_u32 v15, v0, v8 +; GISEL-NEXT: v_mul_lo_u32 v16, v13, v8 +; GISEL-NEXT: v_mul_hi_u32 v17, v0, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v13, v8 +; GISEL-NEXT: v_mul_lo_u32 v18, v2, v9 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_mul_lo_u32 v12, v19, v9 +; GISEL-NEXT: v_mul_hi_u32 v14, v2, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v19, v9 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v16, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v12, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v17 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_mul_lo_u32 v11, v1, v6 +; GISEL-NEXT: v_mul_lo_u32 v15, v4, v6 +; GISEL-NEXT: v_mul_hi_u32 v16, v1, v6 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_mul_lo_u32 v14, v3, v7 +; GISEL-NEXT: v_mul_lo_u32 v17, v5, v7 +; GISEL-NEXT: v_mul_hi_u32 v18, v3, v7 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; GISEL-NEXT: v_mul_lo_u32 v10, v1, v8 +; GISEL-NEXT: v_mul_lo_u32 v12, v3, v9 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v17, v12 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, 1, v6 +; GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v8, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v16 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v18 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v11 +; GISEL-NEXT: v_subb_u32_e64 v11, s[4:5], v13, v10, vcc +; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], v13, v10 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v14 +; GISEL-NEXT: v_subb_u32_e64 v14, s[6:7], v19, v12, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v12, s[6:7], v19, v12 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v11, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v11, v4 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v14, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[8:9] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], v14, v5 +; GISEL-NEXT: v_add_i32_e64 v14, s[10:11], 1, v7 +; GISEL-NEXT: v_addc_u32_e64 v19, s[10:11], 0, v9, s[10:11] +; GISEL-NEXT: v_subb_u32_e32 v10, vcc, v10, v4, vcc +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GISEL-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v10, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_add_i32_e64 v0, s[10:11], 1, v15 +; GISEL-NEXT: v_addc_u32_e64 v1, s[10:11], 0, v17, s[10:11] +; GISEL-NEXT: v_cndmask_b32_e64 v13, v18, v13, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, -1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, v16, s[8:9] +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v10, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v4 +; GISEL-NEXT: v_add_i32_e64 v4, s[6:7], 1, v14 +; GISEL-NEXT: v_addc_u32_e64 v10, s[6:7], 0, v19, s[6:7] +; GISEL-NEXT: v_subb_u32_e64 v12, s[4:5], v12, v5, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 +; GISEL-NEXT: v_subbrev_u32_e64 v12, s[4:5], 0, v12, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v16, v16, v18, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v12, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v15, v0, vcc +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v2, v14, v4, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v3, v19, v10, s[4:5] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v2, v7, v2, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v3, v9, v3, s[4:5] +; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0, v0 +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 0, v2 +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_sdiv_v2i64_24bit: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s4, 0xffffff +; CGP-NEXT: v_and_b32_e32 v0, s4, v0 +; CGP-NEXT: v_and_b32_e32 v1, s4, v2 +; CGP-NEXT: v_and_b32_e32 v2, s4, v4 +; CGP-NEXT: v_and_b32_e32 v3, s4, v6 +; CGP-NEXT: v_xor_b32_e32 v4, v0, v2 +; CGP-NEXT: v_cvt_f32_i32_e32 v0, v0 +; CGP-NEXT: v_cvt_f32_i32_e32 v2, v2 +; CGP-NEXT: v_xor_b32_e32 v5, v1, v3 +; CGP-NEXT: v_cvt_f32_i32_e32 v1, v1 +; CGP-NEXT: v_cvt_f32_i32_e32 v3, v3 +; CGP-NEXT: v_ashrrev_i32_e32 v4, 30, v4 +; CGP-NEXT: v_rcp_f32_e32 v6, v2 +; CGP-NEXT: v_ashrrev_i32_e32 v5, 30, v5 +; CGP-NEXT: v_rcp_f32_e32 v7, v3 +; CGP-NEXT: v_or_b32_e32 v4, 1, v4 +; CGP-NEXT: v_mul_f32_e32 v6, v0, v6 +; CGP-NEXT: v_or_b32_e32 v5, 1, v5 +; CGP-NEXT: v_mul_f32_e32 v7, v1, v7 +; CGP-NEXT: v_trunc_f32_e32 v6, v6 +; CGP-NEXT: v_trunc_f32_e32 v7, v7 +; CGP-NEXT: v_fma_f32 v0, -v6, v2, v0 +; CGP-NEXT: v_cvt_i32_f32_e32 v6, v6 +; CGP-NEXT: v_fma_f32 v1, -v7, v3, v1 +; CGP-NEXT: v_cvt_i32_f32_e32 v7, v7 +; CGP-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, |v2| +; CGP-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; CGP-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, |v3| +; CGP-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc +; CGP-NEXT: v_add_i32_e32 v0, vcc, v6, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v7, v1 +; CGP-NEXT: v_lshlrev_b32_e32 v0, 7, v0 +; CGP-NEXT: v_lshlrev_b32_e32 v1, 7, v1 +; CGP-NEXT: v_ashrrev_i32_e32 v0, 7, v0 +; CGP-NEXT: v_ashrrev_i32_e32 v2, 7, v1 +; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; CGP-NEXT: s_setpc_b64 s[30:31] + %num.mask = and <2 x i64> %num, + %den.mask = and <2 x i64> %den, + %result = sdiv <2 x i64> %num.mask, %den.mask + ret <2 x i64> %result +} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -0,0 +1,3749 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,GISEL %s +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=CHECK,CGP %s + +; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. + +define i64 @v_srem_i64(i64 %num, i64 %den) { +; CHECK-LABEL: v_srem_i64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_or_b32_e32 v5, v1, v3 +; CHECK-NEXT: v_mov_b32_e32 v4, 0 +; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] +; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CHECK-NEXT: ; implicit-def: $vgpr4_vgpr5 +; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; CHECK-NEXT: s_cbranch_execz BB0_2 +; CHECK-NEXT: ; %bb.1: +; CHECK-NEXT: v_ashrrev_i32_e32 v5, 31, v1 +; CHECK-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v0, v5 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v2, v4 +; CHECK-NEXT: v_addc_u32_e32 v3, vcc, v3, v4, vcc +; CHECK-NEXT: v_xor_b32_e32 v6, v6, v5 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v5 +; CHECK-NEXT: v_xor_b32_e32 v7, v7, v4 +; CHECK-NEXT: v_xor_b32_e32 v3, v3, v4 +; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v7 +; CHECK-NEXT: v_cvt_f32_u32_e32 v8, v3 +; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v7 +; CHECK-NEXT: v_subb_u32_e32 v10, vcc, 0, v3, vcc +; CHECK-NEXT: v_mac_f32_e32 v4, 0x4f800000, v8 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CHECK-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 +; CHECK-NEXT: v_mul_f32_e32 v8, 0x2f800000, v4 +; CHECK-NEXT: v_trunc_f32_e32 v8, v8 +; CHECK-NEXT: v_mac_f32_e32 v4, 0xcf800000, v8 +; CHECK-NEXT: v_cvt_u32_f32_e32 v8, v8 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CHECK-NEXT: v_mul_lo_u32 v11, v9, v8 +; CHECK-NEXT: v_mul_lo_u32 v12, v9, v4 +; CHECK-NEXT: v_mul_lo_u32 v13, v10, v4 +; CHECK-NEXT: v_mul_hi_u32 v14, v9, v4 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CHECK-NEXT: v_mul_lo_u32 v13, v8, v12 +; CHECK-NEXT: v_mul_hi_u32 v15, v4, v12 +; CHECK-NEXT: v_mul_hi_u32 v12, v8, v12 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, v11, v14 +; CHECK-NEXT: v_mul_lo_u32 v14, v4, v11 +; CHECK-NEXT: v_mul_lo_u32 v16, v8, v11 +; CHECK-NEXT: v_mul_hi_u32 v17, v4, v11 +; CHECK-NEXT: v_mul_hi_u32 v11, v8, v11 +; CHECK-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; CHECK-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v12, vcc, v16, v12 +; CHECK-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v12, vcc, v12, v17 +; CHECK-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CHECK-NEXT: v_add_i32_e32 v14, vcc, v16, v15 +; CHECK-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v12 +; CHECK-NEXT: v_addc_u32_e64 v12, s[4:5], v8, v11, vcc +; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 +; CHECK-NEXT: v_mul_lo_u32 v11, v9, v4 +; CHECK-NEXT: v_mul_lo_u32 v10, v10, v4 +; CHECK-NEXT: v_mul_hi_u32 v13, v9, v4 +; CHECK-NEXT: v_mul_lo_u32 v9, v9, v12 +; CHECK-NEXT: v_mul_lo_u32 v14, v12, v11 +; CHECK-NEXT: v_mul_hi_u32 v15, v4, v11 +; CHECK-NEXT: v_mul_hi_u32 v11, v12, v11 +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v10, v9 +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 +; CHECK-NEXT: v_mul_lo_u32 v10, v4, v9 +; CHECK-NEXT: v_mul_lo_u32 v13, v12, v9 +; CHECK-NEXT: v_mul_hi_u32 v16, v4, v9 +; CHECK-NEXT: v_mul_hi_u32 v9, v12, v9 +; CHECK-NEXT: v_add_i32_e64 v10, s[4:5], v14, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v11, s[4:5], v13, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v10, s[4:5], v10, v15 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 +; CHECK-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v10, s[4:5], v12, v10 +; CHECK-NEXT: v_add_i32_e64 v12, s[4:5], v13, v14 +; CHECK-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v9, v11 +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v10 +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc +; CHECK-NEXT: v_mul_lo_u32 v9, v1, v4 +; CHECK-NEXT: v_mul_hi_u32 v10, v6, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 +; CHECK-NEXT: v_mul_lo_u32 v11, v6, v8 +; CHECK-NEXT: v_mul_lo_u32 v12, v1, v8 +; CHECK-NEXT: v_mul_hi_u32 v13, v6, v8 +; CHECK-NEXT: v_mul_hi_u32 v8, v1, v8 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v12, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v13 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CHECK-NEXT: v_mul_lo_u32 v10, v7, v4 +; CHECK-NEXT: v_mul_lo_u32 v11, v3, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v7, v4 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CHECK-NEXT: v_mul_lo_u32 v8, v7, v8 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v8, v4 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v6, v10 +; CHECK-NEXT: v_subb_u32_e64 v8, s[4:5], v1, v4, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v4 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v8, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc +; CHECK-NEXT: v_sub_i32_e32 v9, vcc, v6, v7 +; CHECK-NEXT: v_subbrev_u32_e64 v10, s[4:5], 0, v1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v10, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v10, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v12, v11, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v9, v7, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v6, v3, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v3, v3, v5 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v5 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v3, v5 +; CHECK-NEXT: v_subb_u32_e32 v5, vcc, v1, v5, vcc +; CHECK-NEXT: BB0_2: ; %Flow +; CHECK-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] +; CHECK-NEXT: s_xor_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_cbranch_execz BB0_4 +; CHECK-NEXT: ; %bb.3: +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_lo_u32 v3, v1, v2 +; CHECK-NEXT: v_mul_hi_u32 v4, v1, v2 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CHECK-NEXT: v_mul_hi_u32 v3, v3, v1 +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v1, v3 +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2 +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2 +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v3, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1 +; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v2 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v4, v4, v0, s[4:5] +; CHECK-NEXT: v_mov_b32_e32 v5, 0 +; CHECK-NEXT: BB0_4: +; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT: v_mov_b32_e32 v0, v4 +; CHECK-NEXT: v_mov_b32_e32 v1, v5 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = srem i64 %num, %den + ret i64 %result +} + +; FIXME: This is a workaround for not handling uniform VGPR case. +declare i32 @llvm.amdgcn.readfirstlane(i32) + +define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) { +; CHECK-LABEL: s_srem_i64: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_or_b64 s[4:5], s[0:1], s[2:3] +; CHECK-NEXT: s_mov_b32 s6, 0 +; CHECK-NEXT: s_mov_b32 s7, -1 +; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] +; CHECK-NEXT: v_cmp_eq_u64_e64 s[4:5], s[4:5], 0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, 1 +; CHECK-NEXT: s_xor_b64 vcc, s[4:5], s[6:7] +; CHECK-NEXT: s_mov_b32 s4, 1 +; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1 +; CHECK-NEXT: s_cbranch_vccz BB1_2 +; CHECK-NEXT: ; %bb.1: +; CHECK-NEXT: s_ashr_i32 s6, s1, 31 +; CHECK-NEXT: s_ashr_i32 s4, s3, 31 +; CHECK-NEXT: s_add_u32 s8, s0, s6 +; CHECK-NEXT: s_cselect_b32 s5, 1, 0 +; CHECK-NEXT: s_and_b32 s5, s5, 1 +; CHECK-NEXT: s_cmp_lg_u32 s5, 0 +; CHECK-NEXT: s_addc_u32 s9, s1, s6 +; CHECK-NEXT: s_add_u32 s12, s2, s4 +; CHECK-NEXT: s_cselect_b32 s1, 1, 0 +; CHECK-NEXT: s_and_b32 s1, s1, 1 +; CHECK-NEXT: s_mov_b32 s7, s6 +; CHECK-NEXT: s_mov_b32 s5, s4 +; CHECK-NEXT: s_cmp_lg_u32 s1, 0 +; CHECK-NEXT: s_addc_u32 s13, s3, s4 +; CHECK-NEXT: s_xor_b64 s[10:11], s[8:9], s[6:7] +; CHECK-NEXT: s_xor_b64 s[8:9], s[12:13], s[4:5] +; CHECK-NEXT: v_mov_b32_e32 v0, s11 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s8 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s9 +; CHECK-NEXT: s_sub_u32 s1, 0, s8 +; CHECK-NEXT: s_cselect_b32 s3, 1, 0 +; CHECK-NEXT: v_mov_b32_e32 v3, s9 +; CHECK-NEXT: v_mac_f32_e32 v1, 0x4f800000, v2 +; CHECK-NEXT: s_and_b32 s3, s3, 1 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1 +; CHECK-NEXT: s_cmp_lg_u32 s3, 0 +; CHECK-NEXT: s_subb_u32 s3, 0, s9 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1 +; CHECK-NEXT: v_trunc_f32_e32 v2, v2 +; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_lo_u32 v4, s1, v2 +; CHECK-NEXT: v_mul_lo_u32 v5, s1, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s3, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, s1, v1 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, v2, v5 +; CHECK-NEXT: v_mul_hi_u32 v8, v1, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v1, v4 +; CHECK-NEXT: v_mul_lo_u32 v9, v2, v4 +; CHECK-NEXT: v_mul_hi_u32 v10, v1, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CHECK-NEXT: v_addc_u32_e64 v5, s[4:5], v2, v4, vcc +; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v2, v4 +; CHECK-NEXT: v_mul_lo_u32 v4, s1, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s3, v1 +; CHECK-NEXT: v_mul_hi_u32 v7, s1, v1 +; CHECK-NEXT: v_mul_lo_u32 v8, s1, v5 +; CHECK-NEXT: v_mul_lo_u32 v9, v5, v4 +; CHECK-NEXT: v_mul_hi_u32 v10, v1, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v5, v4 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v7 +; CHECK-NEXT: v_mul_lo_u32 v7, v1, v6 +; CHECK-NEXT: v_mul_lo_u32 v8, v5, v6 +; CHECK-NEXT: v_mul_hi_u32 v11, v1, v6 +; CHECK-NEXT: v_mul_hi_u32 v5, v5, v6 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v9, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v8, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v9 +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v7, v6 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v6 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CHECK-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; CHECK-NEXT: v_mul_lo_u32 v4, s11, v1 +; CHECK-NEXT: v_mul_hi_u32 v5, s10, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, s11, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s10, v2 +; CHECK-NEXT: v_mul_lo_u32 v7, s11, v2 +; CHECK-NEXT: v_mul_hi_u32 v8, s10, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, s11, v2 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v7, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; CHECK-NEXT: v_mul_lo_u32 v5, s8, v1 +; CHECK-NEXT: v_mul_lo_u32 v6, s9, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, s8, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; CHECK-NEXT: v_mul_lo_u32 v2, s8, v2 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, s10, v5 +; CHECK-NEXT: v_subb_u32_e64 v0, s[4:5], v0, v1, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], s11, v1 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s9, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s9, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc +; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s8, v2 +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s8, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc +; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, s8, v3 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s9, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s9, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v6, v4, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v3, v5, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v0, s6, v0 +; CHECK-NEXT: v_subrev_i32_e32 v0, vcc, s6, v0 +; CHECK-NEXT: s_mov_b32 s4, 0 +; CHECK-NEXT: BB1_2: ; %Flow +; CHECK-NEXT: s_and_b32 s1, s4, 1 +; CHECK-NEXT: s_cmp_lg_u32 s1, 0 +; CHECK-NEXT: s_cbranch_scc0 BB1_4 +; CHECK-NEXT: ; %bb.3: +; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CHECK-NEXT: v_mul_lo_u32 v1, v0, s2 +; CHECK-NEXT: v_mul_hi_u32 v2, v0, s2 +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v0, v1 +; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; CHECK-NEXT: v_mul_hi_u32 v0, v0, s0 +; CHECK-NEXT: v_mul_lo_u32 v0, v0, s2 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, s0, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v1 +; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], s2, v1 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[0:1], s0, v0 +; CHECK-NEXT: v_subrev_i32_e64 v0, s[2:3], s2, v1 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[0:1] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] +; CHECK-NEXT: BB1_4: +; CHECK-NEXT: v_readfirstlane_b32 s0, v0 +; CHECK-NEXT: s_mov_b32 s1, s0 +; CHECK-NEXT: ; return to shader part epilog + %result = srem i64 %num, %den + %cast = bitcast i64 %result to <2 x i32> + %elt.0 = extractelement <2 x i32> %cast, i32 0 + %elt.1 = extractelement <2 x i32> %cast, i32 1 + %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0) + %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1) + %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0 + %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1 + %cast.back = bitcast <2 x i32> %ins.1 to i64 + ret i64 %cast.back +} + +define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) { +; GISEL-LABEL: v_srem_v2i64: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v1 +; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v8, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v8 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v8 +; GISEL-NEXT: v_xor_b32_e32 v4, v4, v9 +; GISEL-NEXT: v_xor_b32_e32 v5, v5, v9 +; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v4 +; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v5 +; GISEL-NEXT: v_mac_f32_e32 v9, 0x4f800000, v10 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v9, v9 +; GISEL-NEXT: v_mul_f32_e32 v9, 0x5f7ffffc, v9 +; GISEL-NEXT: v_mul_f32_e32 v10, 0x2f800000, v9 +; GISEL-NEXT: v_trunc_f32_e32 v10, v10 +; GISEL-NEXT: v_mac_f32_e32 v9, 0xcf800000, v10 +; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 +; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v10 +; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v4 +; GISEL-NEXT: v_subb_u32_e32 v12, vcc, 0, v5, vcc +; GISEL-NEXT: v_mul_lo_u32 v13, v11, v9 +; GISEL-NEXT: v_mul_lo_u32 v14, v12, v9 +; GISEL-NEXT: v_mul_lo_u32 v15, v11, v10 +; GISEL-NEXT: v_mul_hi_u32 v16, v11, v9 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; GISEL-NEXT: v_mul_lo_u32 v15, v10, v13 +; GISEL-NEXT: v_mul_lo_u32 v16, v9, v14 +; GISEL-NEXT: v_mul_hi_u32 v17, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; GISEL-NEXT: v_mul_lo_u32 v16, v10, v14 +; GISEL-NEXT: v_mul_hi_u32 v13, v10, v13 +; GISEL-NEXT: v_mul_hi_u32 v17, v9, v14 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v17 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; GISEL-NEXT: v_mul_hi_u32 v14, v10, v14 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 +; GISEL-NEXT: v_addc_u32_e64 v13, s[4:5], v10, v14, vcc +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v14 +; GISEL-NEXT: v_mul_lo_u32 v14, v11, v9 +; GISEL-NEXT: v_mul_lo_u32 v12, v12, v9 +; GISEL-NEXT: v_mul_lo_u32 v15, v11, v13 +; GISEL-NEXT: v_mul_hi_u32 v11, v11, v9 +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v15 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; GISEL-NEXT: v_mul_lo_u32 v12, v13, v14 +; GISEL-NEXT: v_mul_lo_u32 v15, v9, v11 +; GISEL-NEXT: v_mul_hi_u32 v16, v9, v14 +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v15, v12 +; GISEL-NEXT: v_mul_lo_u32 v15, v13, v11 +; GISEL-NEXT: v_mul_hi_u32 v14, v13, v14 +; GISEL-NEXT: v_mul_hi_u32 v16, v9, v11 +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v15, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v16 +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v15, v14 +; GISEL-NEXT: v_mul_hi_u32 v11, v13, v11 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v10, v11, vcc +; GISEL-NEXT: v_addc_u32_e64 v10, vcc, 0, v10, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9 +; GISEL-NEXT: v_mul_lo_u32 v12, v0, v10 +; GISEL-NEXT: v_mul_hi_u32 v13, v0, v9 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_mul_lo_u32 v12, v1, v10 +; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9 +; GISEL-NEXT: v_mul_hi_u32 v13, v0, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_mul_hi_u32 v10, v1, v10 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_mul_lo_u32 v11, v4, v9 +; GISEL-NEXT: v_mul_lo_u32 v12, v5, v9 +; GISEL-NEXT: v_mul_lo_u32 v10, v4, v10 +; GISEL-NEXT: v_mul_hi_u32 v9, v4, v9 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v11 +; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v9, vcc +; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v9 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v11, s[4:5], v0, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v12, vcc, 0, v1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v12, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v12, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v14, vcc +; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v11, v4 +; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v1, v5, s[4:5] +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v11, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v12, v1, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v8 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v8 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc +; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v7 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v4, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v5 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v5, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 +; GISEL-NEXT: v_xor_b32_e32 v6, v6, v5 +; GISEL-NEXT: v_xor_b32_e32 v5, v7, v5 +; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v5 +; GISEL-NEXT: v_mac_f32_e32 v7, 0x4f800000, v8 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v7 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 +; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7 +; GISEL-NEXT: v_trunc_f32_e32 v8, v8 +; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v6 +; GISEL-NEXT: v_subb_u32_e32 v10, vcc, 0, v5, vcc +; GISEL-NEXT: v_mul_lo_u32 v11, v9, v7 +; GISEL-NEXT: v_mul_lo_u32 v12, v10, v7 +; GISEL-NEXT: v_mul_lo_u32 v13, v9, v8 +; GISEL-NEXT: v_mul_hi_u32 v14, v9, v7 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_mul_lo_u32 v13, v8, v11 +; GISEL-NEXT: v_mul_lo_u32 v14, v7, v12 +; GISEL-NEXT: v_mul_hi_u32 v15, v7, v11 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; GISEL-NEXT: v_mul_lo_u32 v14, v8, v12 +; GISEL-NEXT: v_mul_hi_u32 v11, v8, v11 +; GISEL-NEXT: v_mul_hi_u32 v15, v7, v12 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], v8, v12, vcc +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v12 +; GISEL-NEXT: v_mul_lo_u32 v12, v9, v7 +; GISEL-NEXT: v_mul_lo_u32 v10, v10, v7 +; GISEL-NEXT: v_mul_lo_u32 v13, v9, v11 +; GISEL-NEXT: v_mul_hi_u32 v9, v9, v7 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v10, v9 +; GISEL-NEXT: v_mul_lo_u32 v10, v11, v12 +; GISEL-NEXT: v_mul_lo_u32 v13, v7, v9 +; GISEL-NEXT: v_mul_hi_u32 v14, v7, v12 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v13, v10 +; GISEL-NEXT: v_mul_lo_u32 v13, v11, v9 +; GISEL-NEXT: v_mul_hi_u32 v12, v11, v12 +; GISEL-NEXT: v_mul_hi_u32 v14, v7, v9 +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v13, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v14 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v12, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v13, v12 +; GISEL-NEXT: v_mul_hi_u32 v9, v11, v9 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 +; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v7, v10 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc +; GISEL-NEXT: v_addc_u32_e64 v8, vcc, 0, v8, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v9, v3, v7 +; GISEL-NEXT: v_mul_lo_u32 v10, v2, v8 +; GISEL-NEXT: v_mul_hi_u32 v11, v2, v7 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; GISEL-NEXT: v_mul_lo_u32 v10, v3, v8 +; GISEL-NEXT: v_mul_hi_u32 v7, v3, v7 +; GISEL-NEXT: v_mul_hi_u32 v11, v2, v8 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v10, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; GISEL-NEXT: v_mul_hi_u32 v8, v3, v8 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; GISEL-NEXT: v_mul_lo_u32 v9, v6, v7 +; GISEL-NEXT: v_mul_lo_u32 v10, v5, v7 +; GISEL-NEXT: v_mul_lo_u32 v8, v6, v8 +; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 +; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v7, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v7 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v9, s[4:5], v2, v6 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v10, vcc, 0, v3, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v10, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v9, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v12, vcc +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v9, v6 +; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v5, s[4:5] +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; GISEL-NEXT: v_cndmask_b32_e32 v5, v9, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v4, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_srem_v2i64: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_mov_b32_e32 v8, v0 +; CGP-NEXT: v_mov_b32_e32 v9, v1 +; CGP-NEXT: v_or_b32_e32 v1, v9, v5 +; CGP-NEXT: v_mov_b32_e32 v0, 0 +; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 +; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; CGP-NEXT: s_cbranch_execz BB2_2 +; CGP-NEXT: ; %bb.1: +; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v9 +; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v5 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v8, v1 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v4, v0 +; CGP-NEXT: v_addc_u32_e32 v5, vcc, v5, v0, vcc +; CGP-NEXT: v_xor_b32_e32 v10, v10, v1 +; CGP-NEXT: v_xor_b32_e32 v9, v9, v1 +; CGP-NEXT: v_xor_b32_e32 v11, v11, v0 +; CGP-NEXT: v_xor_b32_e32 v0, v5, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v5, v11 +; CGP-NEXT: v_cvt_f32_u32_e32 v12, v0 +; CGP-NEXT: v_sub_i32_e32 v13, vcc, 0, v11 +; CGP-NEXT: v_subb_u32_e32 v14, vcc, 0, v0, vcc +; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v12 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v12, 0x2f800000, v5 +; CGP-NEXT: v_trunc_f32_e32 v12, v12 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v12 +; CGP-NEXT: v_cvt_u32_f32_e32 v12, v12 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v15, v13, v12 +; CGP-NEXT: v_mul_lo_u32 v16, v13, v5 +; CGP-NEXT: v_mul_lo_u32 v17, v14, v5 +; CGP-NEXT: v_mul_hi_u32 v18, v13, v5 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v15 +; CGP-NEXT: v_mul_lo_u32 v17, v12, v16 +; CGP-NEXT: v_mul_hi_u32 v19, v5, v16 +; CGP-NEXT: v_mul_hi_u32 v16, v12, v16 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v18 +; CGP-NEXT: v_mul_lo_u32 v18, v5, v15 +; CGP-NEXT: v_mul_lo_u32 v20, v12, v15 +; CGP-NEXT: v_mul_hi_u32 v21, v5, v15 +; CGP-NEXT: v_mul_hi_u32 v15, v12, v15 +; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v18 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v16, vcc, v20, v16 +; CGP-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v19 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v21 +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v17, vcc, v18, v17 +; CGP-NEXT: v_add_i32_e32 v18, vcc, v20, v19 +; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v17, vcc, v18, v17 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v16 +; CGP-NEXT: v_addc_u32_e64 v16, s[4:5], v12, v15, vcc +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v15 +; CGP-NEXT: v_mul_lo_u32 v15, v13, v5 +; CGP-NEXT: v_mul_lo_u32 v14, v14, v5 +; CGP-NEXT: v_mul_hi_u32 v17, v13, v5 +; CGP-NEXT: v_mul_lo_u32 v13, v13, v16 +; CGP-NEXT: v_mul_lo_u32 v18, v16, v15 +; CGP-NEXT: v_mul_hi_u32 v19, v5, v15 +; CGP-NEXT: v_mul_hi_u32 v15, v16, v15 +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v17 +; CGP-NEXT: v_mul_lo_u32 v14, v5, v13 +; CGP-NEXT: v_mul_lo_u32 v17, v16, v13 +; CGP-NEXT: v_mul_hi_u32 v20, v5, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v16, v13 +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v18, v14 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v15, s[4:5], v17, v15 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v19 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v15, s[4:5], v15, v20 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v16, v14 +; CGP-NEXT: v_add_i32_e64 v16, s[4:5], v17, v18 +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v15, v14 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v15, s[4:5], v16, v15 +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v15 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v13, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v14 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v12, vcc +; CGP-NEXT: v_mul_lo_u32 v13, v9, v5 +; CGP-NEXT: v_mul_hi_u32 v14, v10, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v9, v5 +; CGP-NEXT: v_mul_lo_u32 v15, v10, v12 +; CGP-NEXT: v_mul_lo_u32 v16, v9, v12 +; CGP-NEXT: v_mul_hi_u32 v17, v10, v12 +; CGP-NEXT: v_mul_hi_u32 v12, v9, v12 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v16, v5 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v17 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CGP-NEXT: v_mul_lo_u32 v14, v11, v5 +; CGP-NEXT: v_mul_lo_u32 v15, v0, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v11, v5 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; CGP-NEXT: v_mul_lo_u32 v12, v11, v12 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5 +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v10, v14 +; CGP-NEXT: v_subb_u32_e64 v12, s[4:5], v9, v5, vcc +; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v9, v5 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v11 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v0 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v5, vcc, v5, v0, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v12, v0 +; CGP-NEXT: v_cndmask_b32_e32 v9, v13, v9, vcc +; CGP-NEXT: v_sub_i32_e32 v13, vcc, v10, v11 +; CGP-NEXT: v_subbrev_u32_e64 v14, s[4:5], 0, v5, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v13, v11 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v5, vcc, v5, v0, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v14, v0 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc +; CGP-NEXT: v_sub_i32_e32 v11, vcc, v13, v11 +; CGP-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v14, v0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v16, v15, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v13, v11, vcc +; CGP-NEXT: v_cndmask_b32_e32 v5, v14, v5, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc +; CGP-NEXT: v_cndmask_b32_e32 v5, v12, v5, vcc +; CGP-NEXT: v_xor_b32_e32 v0, v0, v1 +; CGP-NEXT: v_xor_b32_e32 v5, v5, v1 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v5, v1, vcc +; CGP-NEXT: BB2_2: ; %Flow2 +; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] +; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] +; CGP-NEXT: s_cbranch_execz BB2_4 +; CGP-NEXT: ; %bb.3: +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CGP-NEXT: v_mul_lo_u32 v1, v0, v4 +; CGP-NEXT: v_mul_hi_u32 v5, v0, v4 +; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v1 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc +; CGP-NEXT: v_mul_hi_u32 v1, v1, v0 +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v0, v1 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; CGP-NEXT: v_mul_hi_u32 v0, v0, v8 +; CGP-NEXT: v_mul_lo_u32 v0, v0, v4 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v0 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4 +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v1, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v1, v4 +; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v1, 0 +; CGP-NEXT: BB2_4: +; CGP-NEXT: s_or_b64 exec, exec, s[8:9] +; CGP-NEXT: v_or_b32_e32 v5, v3, v7 +; CGP-NEXT: v_mov_b32_e32 v4, 0 +; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5 +; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; CGP-NEXT: s_cbranch_execz BB2_6 +; CGP-NEXT: ; %bb.5: +; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v3 +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v2, v5 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v6, v4 +; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v4, vcc +; CGP-NEXT: v_xor_b32_e32 v8, v8, v5 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v5 +; CGP-NEXT: v_xor_b32_e32 v9, v9, v4 +; CGP-NEXT: v_xor_b32_e32 v4, v7, v4 +; CGP-NEXT: v_cvt_f32_u32_e32 v7, v9 +; CGP-NEXT: v_cvt_f32_u32_e32 v10, v4 +; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v9 +; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v4, vcc +; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v10 +; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v7 +; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 +; CGP-NEXT: v_mul_f32_e32 v10, 0x2f800000, v7 +; CGP-NEXT: v_trunc_f32_e32 v10, v10 +; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v10 +; CGP-NEXT: v_cvt_u32_f32_e32 v10, v10 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_mul_lo_u32 v13, v11, v10 +; CGP-NEXT: v_mul_lo_u32 v14, v11, v7 +; CGP-NEXT: v_mul_lo_u32 v15, v12, v7 +; CGP-NEXT: v_mul_hi_u32 v16, v11, v7 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; CGP-NEXT: v_mul_lo_u32 v15, v10, v14 +; CGP-NEXT: v_mul_hi_u32 v17, v7, v14 +; CGP-NEXT: v_mul_hi_u32 v14, v10, v14 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 +; CGP-NEXT: v_mul_lo_u32 v16, v7, v13 +; CGP-NEXT: v_mul_lo_u32 v18, v10, v13 +; CGP-NEXT: v_mul_hi_u32 v19, v7, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v10, v13 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v18, v14 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; CGP-NEXT: v_add_i32_e32 v16, vcc, v18, v17 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v14 +; CGP-NEXT: v_addc_u32_e64 v14, s[4:5], v10, v13, vcc +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 +; CGP-NEXT: v_mul_lo_u32 v13, v11, v7 +; CGP-NEXT: v_mul_lo_u32 v12, v12, v7 +; CGP-NEXT: v_mul_hi_u32 v15, v11, v7 +; CGP-NEXT: v_mul_lo_u32 v11, v11, v14 +; CGP-NEXT: v_mul_lo_u32 v16, v14, v13 +; CGP-NEXT: v_mul_hi_u32 v17, v7, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v14, v13 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 +; CGP-NEXT: v_mul_lo_u32 v12, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v15, v14, v11 +; CGP-NEXT: v_mul_hi_u32 v18, v7, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v14, v11 +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v16, v12 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v15, v13 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v18 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12 +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v15, v16 +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v13, v12 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v13 +; CGP-NEXT: v_addc_u32_e32 v10, vcc, v10, v11, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v12 +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v10, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v3, v7 +; CGP-NEXT: v_mul_hi_u32 v12, v8, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v3, v7 +; CGP-NEXT: v_mul_lo_u32 v13, v8, v10 +; CGP-NEXT: v_mul_lo_u32 v14, v3, v10 +; CGP-NEXT: v_mul_hi_u32 v15, v8, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v3, v10 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v14, v7 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v15 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v9, v7 +; CGP-NEXT: v_mul_lo_u32 v13, v4, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v9, v7 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; CGP-NEXT: v_mul_lo_u32 v10, v9, v10 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7 +; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v12 +; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v3, v7, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v7 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v9 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v4 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v4, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v10, v4 +; CGP-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc +; CGP-NEXT: v_sub_i32_e32 v11, vcc, v8, v9 +; CGP-NEXT: v_subbrev_u32_e64 v12, s[4:5], 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v9 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v4, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v12, v4 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc +; CGP-NEXT: v_sub_i32_e32 v9, vcc, v11, v9 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v12, v4 +; CGP-NEXT: v_cndmask_b32_e32 v4, v14, v13, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v4, v11, v9, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; CGP-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v4, v4, v5 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v5 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v5 +; CGP-NEXT: v_subb_u32_e32 v5, vcc, v3, v5, vcc +; CGP-NEXT: BB2_6: ; %Flow +; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] +; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] +; CGP-NEXT: s_cbranch_execz BB2_8 +; CGP-NEXT: ; %bb.7: +; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6 +; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CGP-NEXT: v_mul_lo_u32 v4, v3, v6 +; CGP-NEXT: v_mul_hi_u32 v5, v3, v6 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; CGP-NEXT: v_mul_hi_u32 v4, v4, v3 +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CGP-NEXT: v_mul_hi_u32 v3, v3, v2 +; CGP-NEXT: v_mul_lo_u32 v3, v3, v6 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v2, v3 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6 +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v4, v6 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v3 +; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v4, v6 +; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc +; CGP-NEXT: v_cndmask_b32_e64 v4, v5, v2, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v5, 0 +; CGP-NEXT: BB2_8: +; CGP-NEXT: s_or_b64 exec, exec, s[8:9] +; CGP-NEXT: v_mov_b32_e32 v2, v4 +; CGP-NEXT: v_mov_b32_e32 v3, v5 +; CGP-NEXT: s_setpc_b64 s[30:31] + %result = srem <2 x i64> %num, %den + ret <2 x i64> %result +} + +define i64 @v_srem_i64_pow2k_denom(i64 %num) { +; CHECK-LABEL: v_srem_i64_pow2k_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_movk_i32 s6, 0x1000 +; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v1 +; CHECK-NEXT: v_cvt_f32_u32_e32 v3, 0 +; CHECK-NEXT: s_mov_b32 s7, 0xfffff000 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc +; CHECK-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v2 +; CHECK-NEXT: v_mac_f32_e32 v4, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v4 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 +; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 +; CHECK-NEXT: v_trunc_f32_e32 v4, v4 +; CHECK-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_lo_u32 v5, s7, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s7, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, -1, v3 +; CHECK-NEXT: v_mul_hi_u32 v8, s7, v3 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6 +; CHECK-NEXT: v_mul_hi_u32 v9, v3, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CHECK-NEXT: v_mul_lo_u32 v8, v3, v5 +; CHECK-NEXT: v_mul_lo_u32 v10, v4, v5 +; CHECK-NEXT: v_mul_hi_u32 v11, v3, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; CHECK-NEXT: v_addc_u32_e64 v6, s[4:5], v4, v5, vcc +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5 +; CHECK-NEXT: v_mul_lo_u32 v5, s7, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, -1, v3 +; CHECK-NEXT: v_mul_hi_u32 v8, s7, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, s7, v6 +; CHECK-NEXT: v_mul_lo_u32 v10, v6, v5 +; CHECK-NEXT: v_mul_hi_u32 v11, v3, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v6, v5 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v9 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 +; CHECK-NEXT: v_mul_lo_u32 v8, v3, v7 +; CHECK-NEXT: v_mul_lo_u32 v9, v6, v7 +; CHECK-NEXT: v_mul_hi_u32 v12, v3, v7 +; CHECK-NEXT: v_mul_hi_u32 v6, v6, v7 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v10, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v9, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v12 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v7 +; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v10 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v7 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v7 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, v1, v3 +; CHECK-NEXT: v_mul_hi_u32 v6, v0, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 +; CHECK-NEXT: v_mul_hi_u32 v9, v0, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CHECK-NEXT: v_mul_lo_u32 v6, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, 0, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, s6, v3 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v1, v3, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v3 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, s6, v0 +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc +; CHECK-NEXT: v_subrev_i32_e32 v8, vcc, s6, v5 +; CHECK-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v2 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = srem i64 %num, 4096 + ret i64 %result +} + +define <2 x i64> @v_srem_v2i64_pow2k_denom(<2 x i64> %num) { +; GISEL-LABEL: v_srem_v2i64_pow2k_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_movk_i32 s8, 0x1000 +; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; GISEL-NEXT: s_mov_b32 s4, 0 +; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v3 +; GISEL-NEXT: s_mov_b32 s5, s4 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: s_add_u32 s6, s8, 0 +; GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc +; GISEL-NEXT: s_and_b32 s7, s7, 1 +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v5 +; GISEL-NEXT: s_cmp_lg_u32 s7, 0 +; GISEL-NEXT: s_addc_u32 s7, 0, 0 +; GISEL-NEXT: s_xor_b64 s[10:11], s[6:7], s[4:5] +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, s10 +; GISEL-NEXT: v_cvt_f32_u32_e32 v7, s11 +; GISEL-NEXT: s_sub_u32 s12, 0, s10 +; GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GISEL-NEXT: v_mov_b32_e32 v8, s11 +; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v7 +; GISEL-NEXT: s_and_b32 s6, s6, 1 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; GISEL-NEXT: s_cmp_lg_u32 s6, 0 +; GISEL-NEXT: s_subb_u32 s13, 0, s11 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v6 +; GISEL-NEXT: s_add_u32 s6, s8, 0 +; GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; GISEL-NEXT: v_trunc_f32_e32 v7, v7 +; GISEL-NEXT: s_and_b32 s7, s7, 1 +; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_lo_u32 v9, s12, v7 +; GISEL-NEXT: s_cmp_lg_u32 s7, 0 +; GISEL-NEXT: s_addc_u32 s7, 0, 0 +; GISEL-NEXT: v_mul_lo_u32 v10, s12, v6 +; GISEL-NEXT: v_mul_lo_u32 v11, s13, v6 +; GISEL-NEXT: v_mul_hi_u32 v12, s12, v6 +; GISEL-NEXT: s_xor_b64 s[8:9], s[6:7], s[4:5] +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; GISEL-NEXT: v_mul_lo_u32 v11, v7, v10 +; GISEL-NEXT: v_mul_hi_u32 v13, v6, v10 +; GISEL-NEXT: v_mul_hi_u32 v10, v7, v10 +; GISEL-NEXT: v_cvt_f32_u32_e32 v14, s8 +; GISEL-NEXT: v_cvt_f32_u32_e32 v15, s9 +; GISEL-NEXT: s_sub_u32 s14, 0, s8 +; GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; GISEL-NEXT: v_mac_f32_e32 v14, 0x4f800000, v15 +; GISEL-NEXT: s_and_b32 s4, s4, 1 +; GISEL-NEXT: v_mul_lo_u32 v12, v6, v9 +; GISEL-NEXT: v_mul_lo_u32 v15, v7, v9 +; GISEL-NEXT: v_mul_hi_u32 v16, v6, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v14, v14 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_mul_f32_e32 v14, 0x5f7ffffc, v14 +; GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GISEL-NEXT: s_subb_u32 s15, 0, s9 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_mul_f32_e32 v16, 0x2f800000, v14 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v13 +; GISEL-NEXT: v_trunc_f32_e32 v13, v16 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_mac_f32_e32 v14, 0xcf800000, v13 +; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v14 +; GISEL-NEXT: v_mul_lo_u32 v14, s14, v13 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v11, s14, v12 +; GISEL-NEXT: v_mul_lo_u32 v15, s15, v12 +; GISEL-NEXT: v_mul_hi_u32 v16, s14, v12 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v7, v9, vcc +; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v7, v9 +; GISEL-NEXT: v_mul_lo_u32 v9, s12, v6 +; GISEL-NEXT: v_mul_lo_u32 v17, s13, v6 +; GISEL-NEXT: v_mul_hi_u32 v18, s12, v6 +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v15, v14 +; GISEL-NEXT: v_mul_lo_u32 v15, s12, v10 +; GISEL-NEXT: v_mul_lo_u32 v19, v10, v9 +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v17, v15 +; GISEL-NEXT: v_mul_hi_u32 v17, v6, v9 +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v6, v15 +; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v19, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v18, v17 +; GISEL-NEXT: v_mul_lo_u32 v17, v13, v11 +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v11 +; GISEL-NEXT: v_mul_hi_u32 v11, v13, v11 +; GISEL-NEXT: v_mul_hi_u32 v9, v10, v9 +; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v14, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v14 +; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v17, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v18 +; GISEL-NEXT: v_mul_lo_u32 v16, v13, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], v17, v18 +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v14 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v16, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v19, v18 +; GISEL-NEXT: v_mul_lo_u32 v19, v10, v15 +; GISEL-NEXT: v_mul_hi_u32 v10, v10, v15 +; GISEL-NEXT: v_mul_hi_u32 v15, v6, v15 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v19, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v19, v15 +; GISEL-NEXT: v_mov_b32_e32 v19, s9 +; GISEL-NEXT: v_mul_hi_u32 v14, v13, v14 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v17 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v16 +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v17 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; GISEL-NEXT: v_addc_u32_e64 v12, s[6:7], v13, v14, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v13, v14 +; GISEL-NEXT: v_mul_lo_u32 v14, s14, v11 +; GISEL-NEXT: v_mul_lo_u32 v16, s15, v11 +; GISEL-NEXT: v_mul_hi_u32 v17, s14, v11 +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v15 +; GISEL-NEXT: v_mul_lo_u32 v15, s14, v12 +; GISEL-NEXT: v_mul_lo_u32 v18, v12, v14 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v10, vcc +; GISEL-NEXT: v_mul_hi_u32 v10, v11, v14 +; GISEL-NEXT: v_mul_hi_u32 v14, v12, v14 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v1, v6 +; GISEL-NEXT: v_mul_hi_u32 v16, v0, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; GISEL-NEXT: v_mul_lo_u32 v17, v11, v15 +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v17, v10 +; GISEL-NEXT: v_mul_lo_u32 v10, v0, v7 +; GISEL-NEXT: v_mul_lo_u32 v17, v1, v7 +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16 +; GISEL-NEXT: v_mul_hi_u32 v9, v0, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v15 +; GISEL-NEXT: v_mul_hi_u32 v12, v12, v15 +; GISEL-NEXT: v_mul_hi_u32 v15, v11, v15 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v17, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v16, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v17, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v17 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_mul_lo_u32 v10, s10, v6 +; GISEL-NEXT: v_mul_lo_u32 v17, s11, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, s10, v6 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v15 +; GISEL-NEXT: v_mul_lo_u32 v7, s10, v7 +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v13, v9, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v17, v7 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; GISEL-NEXT: v_mul_lo_u32 v12, v3, v11 +; GISEL-NEXT: v_mul_hi_u32 v13, v2, v11 +; GISEL-NEXT: v_mul_hi_u32 v11, v3, v11 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; GISEL-NEXT: v_mul_lo_u32 v7, v2, v9 +; GISEL-NEXT: v_mul_lo_u32 v14, v3, v9 +; GISEL-NEXT: v_mul_hi_u32 v15, v2, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v3, v9 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v6, vcc +; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s10, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v12, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v14, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s11, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s11, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc +; GISEL-NEXT: v_subrev_i32_e32 v15, vcc, s10, v0 +; GISEL-NEXT: v_subbrev_u32_e64 v16, s[4:5], 0, v1, vcc +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s10, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v12, v7 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v13 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s11, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, s10, v15 +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v11, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s11, v16 +; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v17, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; GISEL-NEXT: v_mul_lo_u32 v11, s8, v7 +; GISEL-NEXT: v_mul_lo_u32 v14, s9, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, s8, v7 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; GISEL-NEXT: v_cndmask_b32_e32 v12, v15, v13, vcc +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc +; GISEL-NEXT: v_mul_lo_u32 v6, s8, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v14, v6 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v11 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v3, v6, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v6 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s9, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v19, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s9, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc +; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, s8, v2 +; GISEL-NEXT: v_subbrev_u32_e64 v8, s[4:5], 0, v3, vcc +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v19, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s9, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc +; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, s8, v7 +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s9, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v5 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_srem_v2i64_pow2k_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_movk_i32 s10, 0x1000 +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0 +; CGP-NEXT: s_mov_b32 s8, 0xfffff000 +; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_cvt_f32_u32_e32 v7, s10 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_mov_b32_e32 v8, v7 +; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 +; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v5 +; CGP-NEXT: v_mac_f32_e32 v8, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v7 +; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v8 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 +; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v5 +; CGP-NEXT: v_mul_f32_e32 v9, 0x2f800000, v7 +; CGP-NEXT: v_trunc_f32_e32 v8, v8 +; CGP-NEXT: v_trunc_f32_e32 v9, v9 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 +; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v9 +; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v10, s8, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s8, v9 +; CGP-NEXT: v_mul_lo_u32 v12, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v13, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v14, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v15, s8, v7 +; CGP-NEXT: v_mul_lo_u32 v16, -1, v7 +; CGP-NEXT: v_mul_hi_u32 v17, s8, v7 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; CGP-NEXT: v_mul_lo_u32 v13, v8, v12 +; CGP-NEXT: v_mul_hi_u32 v18, v5, v12 +; CGP-NEXT: v_mul_hi_u32 v12, v8, v12 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; CGP-NEXT: v_mul_lo_u32 v16, v9, v15 +; CGP-NEXT: v_mul_hi_u32 v19, v7, v15 +; CGP-NEXT: v_mul_hi_u32 v15, v9, v15 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v17 +; CGP-NEXT: v_mul_lo_u32 v14, v5, v10 +; CGP-NEXT: v_mul_lo_u32 v17, v7, v11 +; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v19 +; CGP-NEXT: v_mul_lo_u32 v16, v8, v10 +; CGP-NEXT: v_mul_hi_u32 v19, v5, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v8, v10 +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v18 +; CGP-NEXT: v_mul_lo_u32 v13, v9, v11 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v18 +; CGP-NEXT: v_mul_hi_u32 v18, v7, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v9, v11 +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v16, v12 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v19 +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v16, s[4:5], v16, v19 +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v19 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v18 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], v8, v10, vcc +; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 +; CGP-NEXT: v_mul_lo_u32 v10, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v15, s8, v5 +; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v13 +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], v9, v11, s[4:5] +; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v9, v11 +; CGP-NEXT: v_mul_lo_u32 v11, s8, v7 +; CGP-NEXT: v_mul_lo_u32 v16, -1, v7 +; CGP-NEXT: v_mul_hi_u32 v17, s8, v7 +; CGP-NEXT: v_mul_lo_u32 v18, s8, v13 +; CGP-NEXT: v_mul_lo_u32 v19, v13, v11 +; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v18 +; CGP-NEXT: v_mul_hi_u32 v18, v7, v11 +; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v17 +; CGP-NEXT: v_mul_lo_u32 v17, v7, v16 +; CGP-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17 +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v17, s[6:7], v17, v18 +; CGP-NEXT: v_mul_lo_u32 v17, s8, v12 +; CGP-NEXT: v_mul_lo_u32 v18, v12, v10 +; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v14, v17 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v12, v10 +; CGP-NEXT: v_mul_hi_u32 v11, v13, v11 +; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v14, v15 +; CGP-NEXT: v_mul_lo_u32 v15, v5, v14 +; CGP-NEXT: v_add_i32_e64 v15, s[8:9], v18, v15 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v15, s[8:9], v15, v17 +; CGP-NEXT: v_mul_lo_u32 v15, v12, v14 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v14 +; CGP-NEXT: v_mul_hi_u32 v12, v12, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v18, v14 +; CGP-NEXT: v_mul_lo_u32 v18, v13, v16 +; CGP-NEXT: v_mul_hi_u32 v13, v13, v16 +; CGP-NEXT: v_mul_hi_u32 v16, v7, v16 +; CGP-NEXT: v_add_i32_e64 v10, s[8:9], v15, v10 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v11, s[8:9], v18, v11 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v10, s[8:9], v10, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v15, s[8:9], v15, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17 +; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v18, v16 +; CGP-NEXT: v_add_i32_e64 v10, s[6:7], v10, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v14, s[6:7], v15, v14 +; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v16, v17 +; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v12, v14 +; CGP-NEXT: v_add_i32_e64 v13, s[6:7], v13, v15 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc +; CGP-NEXT: v_addc_u32_e64 v9, vcc, v9, v13, s[4:5] +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc +; CGP-NEXT: v_mul_lo_u32 v10, v3, v5 +; CGP-NEXT: v_mul_hi_u32 v12, v2, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v1, v7 +; CGP-NEXT: v_mul_hi_u32 v13, v0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_mul_lo_u32 v14, v2, v8 +; CGP-NEXT: v_mul_lo_u32 v15, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v16, v2, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 +; CGP-NEXT: v_mul_lo_u32 v17, v0, v9 +; CGP-NEXT: v_mul_lo_u32 v18, v1, v9 +; CGP-NEXT: v_mul_hi_u32 v19, v0, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v1, v9 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v15, v5 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v18, v7 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v16 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v19 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v14, v10 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v17, v11 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v18, v13 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; CGP-NEXT: v_mul_lo_u32 v12, s10, v5 +; CGP-NEXT: v_mul_lo_u32 v14, 0, v5 +; CGP-NEXT: v_mul_hi_u32 v5, s10, v5 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CGP-NEXT: v_mul_lo_u32 v13, s10, v7 +; CGP-NEXT: v_mul_lo_u32 v15, 0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, s10, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CGP-NEXT: v_mul_lo_u32 v8, s10, v8 +; CGP-NEXT: v_mul_lo_u32 v9, s10, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v14, v8 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v12 +; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v5, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v5 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s10, v2 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v13 +; CGP-NEXT: v_subb_u32_e64 v9, s[6:7], v1, v7, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v7 +; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], s10, v0 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[6:7] +; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], 0, v8 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[6:7] +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; CGP-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 +; CGP-NEXT: v_cndmask_b32_e32 v5, v10, v5, vcc +; CGP-NEXT: v_subrev_i32_e32 v10, vcc, s10, v2 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s10, v10 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc +; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s10, v0 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s10, v11 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc +; CGP-NEXT: v_subrev_i32_e32 v15, vcc, s10, v10 +; CGP-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, -1, vcc +; CGP-NEXT: v_subrev_i32_e32 v18, vcc, s10, v11 +; CGP-NEXT: v_subbrev_u32_e32 v19, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v12, v14, v12, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v13, v17, v13, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; CGP-NEXT: v_cndmask_b32_e32 v10, v10, v15, vcc +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13 +; CGP-NEXT: v_cndmask_b32_e64 v11, v11, v18, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v16, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc +; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v19, s[4:5] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v7 +; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v11, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 +; CGP-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[4:5] +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: s_setpc_b64 s[30:31] + %result = srem <2 x i64> %num, + ret <2 x i64> %result +} + +define i64 @v_srem_i64_oddk_denom(i64 %num) { +; CHECK-LABEL: v_srem_i64_oddk_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb +; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v1 +; CHECK-NEXT: v_cvt_f32_u32_e32 v3, 0 +; CHECK-NEXT: s_mov_b32 s7, 0xffed2705 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc +; CHECK-NEXT: v_cvt_f32_u32_e32 v4, s6 +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v2 +; CHECK-NEXT: v_mac_f32_e32 v4, 0x4f800000, v3 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v4 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 +; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 +; CHECK-NEXT: v_trunc_f32_e32 v4, v4 +; CHECK-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_lo_u32 v5, s7, v4 +; CHECK-NEXT: v_mul_lo_u32 v6, s7, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, -1, v3 +; CHECK-NEXT: v_mul_hi_u32 v8, s7, v3 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6 +; CHECK-NEXT: v_mul_hi_u32 v9, v3, v6 +; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CHECK-NEXT: v_mul_lo_u32 v8, v3, v5 +; CHECK-NEXT: v_mul_lo_u32 v10, v4, v5 +; CHECK-NEXT: v_mul_hi_u32 v11, v3, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; CHECK-NEXT: v_addc_u32_e64 v6, s[4:5], v4, v5, vcc +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v4, v5 +; CHECK-NEXT: v_mul_lo_u32 v5, s7, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, -1, v3 +; CHECK-NEXT: v_mul_hi_u32 v8, s7, v3 +; CHECK-NEXT: v_mul_lo_u32 v9, s7, v6 +; CHECK-NEXT: v_mul_lo_u32 v10, v6, v5 +; CHECK-NEXT: v_mul_hi_u32 v11, v3, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v6, v5 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v9 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v8 +; CHECK-NEXT: v_mul_lo_u32 v8, v3, v7 +; CHECK-NEXT: v_mul_lo_u32 v9, v6, v7 +; CHECK-NEXT: v_mul_hi_u32 v12, v3, v7 +; CHECK-NEXT: v_mul_hi_u32 v6, v6, v7 +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v10, v8 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v9, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v7, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v12 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v7 +; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v9, v10 +; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v5, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v8, v7 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v6, v7 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CHECK-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc +; CHECK-NEXT: v_mul_lo_u32 v5, v1, v3 +; CHECK-NEXT: v_mul_hi_u32 v6, v0, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4 +; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4 +; CHECK-NEXT: v_mul_hi_u32 v9, v0, v4 +; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3 +; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 +; CHECK-NEXT: v_mul_lo_u32 v6, s6, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, 0, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, s6, v3 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_mul_lo_u32 v4, s6, v4 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v1, v3, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v3 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v0 +; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], 0, v4 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, s6, v0 +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc +; CHECK-NEXT: v_subrev_i32_e32 v8, vcc, s6, v5 +; CHECK-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v2 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = srem i64 %num, 1235195 + ret i64 %result +} + +define <2 x i64> @v_srem_v2i64_oddk_denom(<2 x i64> %num) { +; GISEL-LABEL: v_srem_v2i64_oddk_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s8, 0x12d8fb +; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; GISEL-NEXT: s_mov_b32 s4, 0 +; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v3 +; GISEL-NEXT: s_mov_b32 s5, s4 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: s_add_u32 s6, s8, 0 +; GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc +; GISEL-NEXT: s_and_b32 s7, s7, 1 +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v5 +; GISEL-NEXT: s_cmp_lg_u32 s7, 0 +; GISEL-NEXT: s_addc_u32 s7, 0, 0 +; GISEL-NEXT: s_xor_b64 s[10:11], s[6:7], s[4:5] +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, s10 +; GISEL-NEXT: v_cvt_f32_u32_e32 v7, s11 +; GISEL-NEXT: s_sub_u32 s12, 0, s10 +; GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GISEL-NEXT: v_mov_b32_e32 v8, s11 +; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v7 +; GISEL-NEXT: s_and_b32 s6, s6, 1 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; GISEL-NEXT: s_cmp_lg_u32 s6, 0 +; GISEL-NEXT: s_subb_u32 s13, 0, s11 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v6 +; GISEL-NEXT: s_add_u32 s6, s8, 0 +; GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; GISEL-NEXT: v_trunc_f32_e32 v7, v7 +; GISEL-NEXT: s_and_b32 s7, s7, 1 +; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_lo_u32 v9, s12, v7 +; GISEL-NEXT: s_cmp_lg_u32 s7, 0 +; GISEL-NEXT: s_addc_u32 s7, 0, 0 +; GISEL-NEXT: v_mul_lo_u32 v10, s12, v6 +; GISEL-NEXT: v_mul_lo_u32 v11, s13, v6 +; GISEL-NEXT: v_mul_hi_u32 v12, s12, v6 +; GISEL-NEXT: s_xor_b64 s[8:9], s[6:7], s[4:5] +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; GISEL-NEXT: v_mul_lo_u32 v11, v7, v10 +; GISEL-NEXT: v_mul_hi_u32 v13, v6, v10 +; GISEL-NEXT: v_mul_hi_u32 v10, v7, v10 +; GISEL-NEXT: v_cvt_f32_u32_e32 v14, s8 +; GISEL-NEXT: v_cvt_f32_u32_e32 v15, s9 +; GISEL-NEXT: s_sub_u32 s14, 0, s8 +; GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; GISEL-NEXT: v_mac_f32_e32 v14, 0x4f800000, v15 +; GISEL-NEXT: s_and_b32 s4, s4, 1 +; GISEL-NEXT: v_mul_lo_u32 v12, v6, v9 +; GISEL-NEXT: v_mul_lo_u32 v15, v7, v9 +; GISEL-NEXT: v_mul_hi_u32 v16, v6, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v14, v14 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_mul_f32_e32 v14, 0x5f7ffffc, v14 +; GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GISEL-NEXT: s_subb_u32 s15, 0, s9 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_mul_f32_e32 v16, 0x2f800000, v14 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v13 +; GISEL-NEXT: v_trunc_f32_e32 v13, v16 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_mac_f32_e32 v14, 0xcf800000, v13 +; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_cvt_u32_f32_e32 v12, v14 +; GISEL-NEXT: v_mul_lo_u32 v14, s14, v13 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_mul_lo_u32 v11, s14, v12 +; GISEL-NEXT: v_mul_lo_u32 v15, s15, v12 +; GISEL-NEXT: v_mul_hi_u32 v16, s14, v12 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; GISEL-NEXT: v_addc_u32_e64 v10, s[4:5], v7, v9, vcc +; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v7, v9 +; GISEL-NEXT: v_mul_lo_u32 v9, s12, v6 +; GISEL-NEXT: v_mul_lo_u32 v17, s13, v6 +; GISEL-NEXT: v_mul_hi_u32 v18, s12, v6 +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v15, v14 +; GISEL-NEXT: v_mul_lo_u32 v15, s12, v10 +; GISEL-NEXT: v_mul_lo_u32 v19, v10, v9 +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v17, v15 +; GISEL-NEXT: v_mul_hi_u32 v17, v6, v9 +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v6, v15 +; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v19, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v18, v17 +; GISEL-NEXT: v_mul_lo_u32 v17, v13, v11 +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v11 +; GISEL-NEXT: v_mul_hi_u32 v11, v13, v11 +; GISEL-NEXT: v_mul_hi_u32 v9, v10, v9 +; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v14, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v14 +; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v17, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v18 +; GISEL-NEXT: v_mul_lo_u32 v16, v13, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], v17, v18 +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v14 +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v16, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v11, s[6:7], v11, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v19, v18 +; GISEL-NEXT: v_mul_lo_u32 v19, v10, v15 +; GISEL-NEXT: v_mul_hi_u32 v10, v10, v15 +; GISEL-NEXT: v_mul_hi_u32 v15, v6, v15 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v19, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v19, v15 +; GISEL-NEXT: v_mov_b32_e32 v19, s9 +; GISEL-NEXT: v_mul_hi_u32 v14, v13, v14 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v17 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v16 +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v17 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; GISEL-NEXT: v_addc_u32_e64 v12, s[6:7], v13, v14, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v13, v14 +; GISEL-NEXT: v_mul_lo_u32 v14, s14, v11 +; GISEL-NEXT: v_mul_lo_u32 v16, s15, v11 +; GISEL-NEXT: v_mul_hi_u32 v17, s14, v11 +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v15 +; GISEL-NEXT: v_mul_lo_u32 v15, s14, v12 +; GISEL-NEXT: v_mul_lo_u32 v18, v12, v14 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v10, vcc +; GISEL-NEXT: v_mul_hi_u32 v10, v11, v14 +; GISEL-NEXT: v_mul_hi_u32 v14, v12, v14 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc +; GISEL-NEXT: v_mul_lo_u32 v9, v1, v6 +; GISEL-NEXT: v_mul_hi_u32 v16, v0, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; GISEL-NEXT: v_mul_lo_u32 v17, v11, v15 +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v17, v10 +; GISEL-NEXT: v_mul_lo_u32 v10, v0, v7 +; GISEL-NEXT: v_mul_lo_u32 v17, v1, v7 +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v16 +; GISEL-NEXT: v_mul_hi_u32 v9, v0, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v16 +; GISEL-NEXT: v_mul_lo_u32 v16, v12, v15 +; GISEL-NEXT: v_mul_hi_u32 v12, v12, v15 +; GISEL-NEXT: v_mul_hi_u32 v15, v11, v15 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v17, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], v16, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v17, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v17 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_mul_lo_u32 v10, s10, v6 +; GISEL-NEXT: v_mul_lo_u32 v17, s11, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, s10, v6 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v15 +; GISEL-NEXT: v_mul_lo_u32 v7, s10, v7 +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v13, v9, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v17, v7 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; GISEL-NEXT: v_mul_lo_u32 v12, v3, v11 +; GISEL-NEXT: v_mul_hi_u32 v13, v2, v11 +; GISEL-NEXT: v_mul_hi_u32 v11, v3, v11 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6 +; GISEL-NEXT: v_mul_lo_u32 v7, v2, v9 +; GISEL-NEXT: v_mul_lo_u32 v14, v3, v9 +; GISEL-NEXT: v_mul_hi_u32 v15, v2, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v3, v9 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10 +; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v6, vcc +; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s10, v0 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v12, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v14, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s11, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s11, v10 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v16, v6, vcc +; GISEL-NEXT: v_subrev_i32_e32 v15, vcc, s10, v0 +; GISEL-NEXT: v_subbrev_u32_e64 v16, s[4:5], 0, v1, vcc +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s10, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v12, v7 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v13 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s11, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, s10, v15 +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v11, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s11, v16 +; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v17, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11 +; GISEL-NEXT: v_mul_lo_u32 v11, s8, v7 +; GISEL-NEXT: v_mul_lo_u32 v14, s9, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, s8, v7 +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; GISEL-NEXT: v_cndmask_b32_e32 v12, v15, v13, vcc +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v12, vcc +; GISEL-NEXT: v_mul_lo_u32 v6, s8, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v14, v6 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v4 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v11 +; GISEL-NEXT: v_subb_u32_e64 v4, s[4:5], v3, v6, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v6 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v2 +; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s9, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v19, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s9, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v7, v6, vcc +; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, s8, v2 +; GISEL-NEXT: v_subbrev_u32_e64 v8, s[4:5], 0, v3, vcc +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v19, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s9, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc +; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, s8, v7 +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, s9, v8 +; GISEL-NEXT: v_cndmask_b32_e32 v9, v10, v9, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v5 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_srem_v2i64_oddk_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s10, 0x12d8fb +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0 +; CGP-NEXT: s_mov_b32 s8, 0xffed2705 +; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v3 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_cvt_f32_u32_e32 v7, s10 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_mov_b32_e32 v8, v7 +; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 +; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v5 +; CGP-NEXT: v_mac_f32_e32 v8, 0x4f800000, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v7 +; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v8 +; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CGP-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 +; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v5 +; CGP-NEXT: v_mul_f32_e32 v9, 0x2f800000, v7 +; CGP-NEXT: v_trunc_f32_e32 v8, v8 +; CGP-NEXT: v_trunc_f32_e32 v9, v9 +; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8 +; CGP-NEXT: v_mac_f32_e32 v7, 0xcf800000, v9 +; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v10, s8, v8 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_mul_lo_u32 v11, s8, v9 +; CGP-NEXT: v_mul_lo_u32 v12, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v13, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v14, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v15, s8, v7 +; CGP-NEXT: v_mul_lo_u32 v16, -1, v7 +; CGP-NEXT: v_mul_hi_u32 v17, s8, v7 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; CGP-NEXT: v_mul_lo_u32 v13, v8, v12 +; CGP-NEXT: v_mul_hi_u32 v18, v5, v12 +; CGP-NEXT: v_mul_hi_u32 v12, v8, v12 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; CGP-NEXT: v_mul_lo_u32 v16, v9, v15 +; CGP-NEXT: v_mul_hi_u32 v19, v7, v15 +; CGP-NEXT: v_mul_hi_u32 v15, v9, v15 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v17 +; CGP-NEXT: v_mul_lo_u32 v14, v5, v10 +; CGP-NEXT: v_mul_lo_u32 v17, v7, v11 +; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v19 +; CGP-NEXT: v_mul_lo_u32 v16, v8, v10 +; CGP-NEXT: v_mul_hi_u32 v19, v5, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v8, v10 +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v18 +; CGP-NEXT: v_mul_lo_u32 v13, v9, v11 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v18 +; CGP-NEXT: v_mul_hi_u32 v18, v7, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v9, v11 +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v16, v12 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v19 +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v16, s[4:5], v16, v19 +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v19 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v18 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v12 +; CGP-NEXT: v_addc_u32_e64 v12, s[4:5], v8, v10, vcc +; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v8, v10 +; CGP-NEXT: v_mul_lo_u32 v10, s8, v5 +; CGP-NEXT: v_mul_lo_u32 v14, -1, v5 +; CGP-NEXT: v_mul_hi_u32 v15, s8, v5 +; CGP-NEXT: v_add_i32_e64 v7, s[4:5], v7, v13 +; CGP-NEXT: v_addc_u32_e64 v13, s[6:7], v9, v11, s[4:5] +; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v9, v11 +; CGP-NEXT: v_mul_lo_u32 v11, s8, v7 +; CGP-NEXT: v_mul_lo_u32 v16, -1, v7 +; CGP-NEXT: v_mul_hi_u32 v17, s8, v7 +; CGP-NEXT: v_mul_lo_u32 v18, s8, v13 +; CGP-NEXT: v_mul_lo_u32 v19, v13, v11 +; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v18 +; CGP-NEXT: v_mul_hi_u32 v18, v7, v11 +; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v16, v17 +; CGP-NEXT: v_mul_lo_u32 v17, v7, v16 +; CGP-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17 +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v17, s[6:7], v17, v18 +; CGP-NEXT: v_mul_lo_u32 v17, s8, v12 +; CGP-NEXT: v_mul_lo_u32 v18, v12, v10 +; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v14, v17 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v12, v10 +; CGP-NEXT: v_mul_hi_u32 v11, v13, v11 +; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v14, v15 +; CGP-NEXT: v_mul_lo_u32 v15, v5, v14 +; CGP-NEXT: v_add_i32_e64 v15, s[8:9], v18, v15 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v15, s[8:9], v15, v17 +; CGP-NEXT: v_mul_lo_u32 v15, v12, v14 +; CGP-NEXT: v_mul_hi_u32 v17, v5, v14 +; CGP-NEXT: v_mul_hi_u32 v12, v12, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v14, s[8:9], v18, v14 +; CGP-NEXT: v_mul_lo_u32 v18, v13, v16 +; CGP-NEXT: v_mul_hi_u32 v13, v13, v16 +; CGP-NEXT: v_mul_hi_u32 v16, v7, v16 +; CGP-NEXT: v_add_i32_e64 v10, s[8:9], v15, v10 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v11, s[8:9], v18, v11 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v10, s[8:9], v10, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9] +; CGP-NEXT: v_add_i32_e64 v15, s[8:9], v15, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v16 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v17, s[6:7], v19, v17 +; CGP-NEXT: v_add_i32_e64 v16, s[6:7], v18, v16 +; CGP-NEXT: v_add_i32_e64 v10, s[6:7], v10, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v11, s[6:7], v11, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[6:7] +; CGP-NEXT: v_add_i32_e64 v14, s[6:7], v15, v14 +; CGP-NEXT: v_add_i32_e64 v15, s[6:7], v16, v17 +; CGP-NEXT: v_add_i32_e64 v12, s[6:7], v12, v14 +; CGP-NEXT: v_add_i32_e64 v13, s[6:7], v13, v15 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc +; CGP-NEXT: v_addc_u32_e64 v9, vcc, v9, v13, s[4:5] +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CGP-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc +; CGP-NEXT: v_mul_lo_u32 v10, v3, v5 +; CGP-NEXT: v_mul_hi_u32 v12, v2, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v1, v7 +; CGP-NEXT: v_mul_hi_u32 v13, v0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v1, v7 +; CGP-NEXT: v_mul_lo_u32 v14, v2, v8 +; CGP-NEXT: v_mul_lo_u32 v15, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v16, v2, v8 +; CGP-NEXT: v_mul_hi_u32 v8, v3, v8 +; CGP-NEXT: v_mul_lo_u32 v17, v0, v9 +; CGP-NEXT: v_mul_lo_u32 v18, v1, v9 +; CGP-NEXT: v_mul_hi_u32 v19, v0, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v1, v9 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v15, v5 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v18, v7 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v16 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v19 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v14, v10 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v17, v11 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v18, v13 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; CGP-NEXT: v_mul_lo_u32 v12, s10, v5 +; CGP-NEXT: v_mul_lo_u32 v14, 0, v5 +; CGP-NEXT: v_mul_hi_u32 v5, s10, v5 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CGP-NEXT: v_mul_lo_u32 v13, s10, v7 +; CGP-NEXT: v_mul_lo_u32 v15, 0, v7 +; CGP-NEXT: v_mul_hi_u32 v7, s10, v7 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CGP-NEXT: v_mul_lo_u32 v8, s10, v8 +; CGP-NEXT: v_mul_lo_u32 v9, s10, v9 +; CGP-NEXT: v_add_i32_e32 v8, vcc, v14, v8 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v15, v9 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v12 +; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v5, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v5 +; CGP-NEXT: v_cmp_le_u32_e64 s[4:5], s10, v2 +; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v13 +; CGP-NEXT: v_subb_u32_e64 v9, s[6:7], v1, v7, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v7 +; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], s10, v0 +; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[6:7] +; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], 0, v8 +; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[6:7] +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; CGP-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5] +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 +; CGP-NEXT: v_cndmask_b32_e32 v5, v10, v5, vcc +; CGP-NEXT: v_subrev_i32_e32 v10, vcc, s10, v2 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s10, v10 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cndmask_b32_e32 v7, v11, v7, vcc +; CGP-NEXT: v_subrev_i32_e32 v11, vcc, s10, v0 +; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s10, v11 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc +; CGP-NEXT: v_subrev_i32_e32 v15, vcc, s10, v10 +; CGP-NEXT: v_subbrev_u32_e32 v16, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_le_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, -1, vcc +; CGP-NEXT: v_subrev_i32_e32 v18, vcc, s10, v11 +; CGP-NEXT: v_subbrev_u32_e32 v19, vcc, 0, v1, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v12, v14, v12, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v13, v17, v13, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12 +; CGP-NEXT: v_cndmask_b32_e32 v10, v10, v15, vcc +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13 +; CGP-NEXT: v_cndmask_b32_e64 v11, v11, v18, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v16, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v10, vcc +; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v19, s[4:5] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v7 +; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v11, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v2, v2, v6 +; CGP-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[4:5] +; CGP-NEXT: v_xor_b32_e32 v0, v0, v4 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v6 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v1, v4, vcc +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v6, vcc +; CGP-NEXT: s_setpc_b64 s[30:31] + %result = srem <2 x i64> %num, + ret <2 x i64> %result +} + +define i64 @v_srem_i64_pow2_shl_denom(i64 %x, i64 %y) { +; CHECK-LABEL: v_srem_i64_pow2_shl_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_movk_i32 s4, 0x1000 +; CHECK-NEXT: s_mov_b32 s5, 0 +; CHECK-NEXT: v_mov_b32_e32 v6, 0 +; CHECK-NEXT: v_lshl_b64 v[4:5], s[4:5], v2 +; CHECK-NEXT: v_or_b32_e32 v7, v1, v5 +; CHECK-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7] +; CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CHECK-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 +; CHECK-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; CHECK-NEXT: s_cbranch_execz BB7_2 +; CHECK-NEXT: ; %bb.1: +; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1 +; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v5 +; CHECK-NEXT: v_add_i32_e32 v6, vcc, v0, v3 +; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; CHECK-NEXT: v_add_i32_e32 v7, vcc, v4, v2 +; CHECK-NEXT: v_addc_u32_e32 v5, vcc, v5, v2, vcc +; CHECK-NEXT: v_xor_b32_e32 v6, v6, v3 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 +; CHECK-NEXT: v_xor_b32_e32 v7, v7, v2 +; CHECK-NEXT: v_xor_b32_e32 v2, v5, v2 +; CHECK-NEXT: v_cvt_f32_u32_e32 v5, v7 +; CHECK-NEXT: v_cvt_f32_u32_e32 v8, v2 +; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v7 +; CHECK-NEXT: v_subb_u32_e32 v10, vcc, 0, v2, vcc +; CHECK-NEXT: v_mac_f32_e32 v5, 0x4f800000, v8 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; CHECK-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5 +; CHECK-NEXT: v_mul_f32_e32 v8, 0x2f800000, v5 +; CHECK-NEXT: v_trunc_f32_e32 v8, v8 +; CHECK-NEXT: v_mac_f32_e32 v5, 0xcf800000, v8 +; CHECK-NEXT: v_cvt_u32_f32_e32 v8, v8 +; CHECK-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CHECK-NEXT: v_mul_lo_u32 v11, v9, v8 +; CHECK-NEXT: v_mul_lo_u32 v12, v9, v5 +; CHECK-NEXT: v_mul_lo_u32 v13, v10, v5 +; CHECK-NEXT: v_mul_hi_u32 v14, v9, v5 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CHECK-NEXT: v_mul_lo_u32 v13, v8, v12 +; CHECK-NEXT: v_mul_hi_u32 v15, v5, v12 +; CHECK-NEXT: v_mul_hi_u32 v12, v8, v12 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, v11, v14 +; CHECK-NEXT: v_mul_lo_u32 v14, v5, v11 +; CHECK-NEXT: v_mul_lo_u32 v16, v8, v11 +; CHECK-NEXT: v_mul_hi_u32 v17, v5, v11 +; CHECK-NEXT: v_mul_hi_u32 v11, v8, v11 +; CHECK-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; CHECK-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v12, vcc, v16, v12 +; CHECK-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v12, vcc, v12, v17 +; CHECK-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CHECK-NEXT: v_add_i32_e32 v14, vcc, v16, v15 +; CHECK-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CHECK-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v12 +; CHECK-NEXT: v_addc_u32_e64 v12, s[4:5], v8, v11, vcc +; CHECK-NEXT: v_add_i32_e64 v8, s[4:5], v8, v11 +; CHECK-NEXT: v_mul_lo_u32 v11, v9, v5 +; CHECK-NEXT: v_mul_lo_u32 v10, v10, v5 +; CHECK-NEXT: v_mul_hi_u32 v13, v9, v5 +; CHECK-NEXT: v_mul_lo_u32 v9, v9, v12 +; CHECK-NEXT: v_mul_lo_u32 v14, v12, v11 +; CHECK-NEXT: v_mul_hi_u32 v15, v5, v11 +; CHECK-NEXT: v_mul_hi_u32 v11, v12, v11 +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v10, v9 +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v9, v13 +; CHECK-NEXT: v_mul_lo_u32 v10, v5, v9 +; CHECK-NEXT: v_mul_lo_u32 v13, v12, v9 +; CHECK-NEXT: v_mul_hi_u32 v16, v5, v9 +; CHECK-NEXT: v_mul_hi_u32 v9, v12, v9 +; CHECK-NEXT: v_add_i32_e64 v10, s[4:5], v14, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v11, s[4:5], v13, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v10, s[4:5], v10, v15 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v11, s[4:5], v11, v16 +; CHECK-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v10, s[4:5], v12, v10 +; CHECK-NEXT: v_add_i32_e64 v12, s[4:5], v13, v14 +; CHECK-NEXT: v_add_i32_e64 v10, s[4:5], v11, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; CHECK-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; CHECK-NEXT: v_add_i32_e64 v9, s[4:5], v9, v11 +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; CHECK-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc +; CHECK-NEXT: v_mul_lo_u32 v9, v1, v5 +; CHECK-NEXT: v_mul_hi_u32 v10, v6, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v1, v5 +; CHECK-NEXT: v_mul_lo_u32 v11, v6, v8 +; CHECK-NEXT: v_mul_lo_u32 v12, v1, v8 +; CHECK-NEXT: v_mul_hi_u32 v13, v6, v8 +; CHECK-NEXT: v_mul_hi_u32 v8, v1, v8 +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v12, v5 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v13 +; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9 +; CHECK-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v9 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CHECK-NEXT: v_mul_lo_u32 v10, v7, v5 +; CHECK-NEXT: v_mul_lo_u32 v11, v2, v5 +; CHECK-NEXT: v_mul_hi_u32 v5, v7, v5 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; CHECK-NEXT: v_mul_lo_u32 v8, v7, v8 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, v11, v8 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v6, v10 +; CHECK-NEXT: v_subb_u32_e64 v8, s[4:5], v1, v5, vcc +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5] +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v8, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc +; CHECK-NEXT: v_sub_i32_e32 v9, vcc, v6, v7 +; CHECK-NEXT: v_subbrev_u32_e64 v10, s[4:5], 0, v1, vcc +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v10, v2 +; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v9, v7 +; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v10, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v12, v11, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v9, v7, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc +; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc +; CHECK-NEXT: v_xor_b32_e32 v2, v2, v3 +; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 +; CHECK-NEXT: v_subb_u32_e32 v3, vcc, v1, v3, vcc +; CHECK-NEXT: BB7_2: ; %Flow +; CHECK-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] +; CHECK-NEXT: s_xor_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_cbranch_execz BB7_4 +; CHECK-NEXT: ; %bb.3: +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v4 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_lo_u32 v2, v1, v4 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v4 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1 +; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v1, v2 +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mul_lo_u32 v1, v1, v4 +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 +; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v2, v4 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1 +; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v2, v4 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v2, v3, v0, s[4:5] +; CHECK-NEXT: v_mov_b32_e32 v3, 0 +; CHECK-NEXT: BB7_4: +; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT: v_mov_b32_e32 v0, v2 +; CHECK-NEXT: v_mov_b32_e32 v1, v3 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %shl.y = shl i64 4096, %y + %r = srem i64 %x, %shl.y + ret i64 %r +} + +define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) { +; GISEL-LABEL: v_srem_v2i64_pow2_shl_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_movk_i32 s4, 0x1000 +; GISEL-NEXT: s_mov_b32 s5, 0 +; GISEL-NEXT: v_lshl_b64 v[4:5], s[4:5], v4 +; GISEL-NEXT: v_lshl_b64 v[6:7], s[4:5], v6 +; GISEL-NEXT: v_ashrrev_i32_e32 v8, 31, v1 +; GISEL-NEXT: v_ashrrev_i32_e32 v9, 31, v5 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v8 +; GISEL-NEXT: v_addc_u32_e32 v1, vcc, v1, v8, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9 +; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v8 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v8 +; GISEL-NEXT: v_xor_b32_e32 v4, v4, v9 +; GISEL-NEXT: v_xor_b32_e32 v5, v5, v9 +; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v4 +; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v5 +; GISEL-NEXT: v_mac_f32_e32 v9, 0x4f800000, v10 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v9, v9 +; GISEL-NEXT: v_mul_f32_e32 v9, 0x5f7ffffc, v9 +; GISEL-NEXT: v_mul_f32_e32 v10, 0x2f800000, v9 +; GISEL-NEXT: v_trunc_f32_e32 v10, v10 +; GISEL-NEXT: v_mac_f32_e32 v9, 0xcf800000, v10 +; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9 +; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v10 +; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v4 +; GISEL-NEXT: v_subb_u32_e32 v12, vcc, 0, v5, vcc +; GISEL-NEXT: v_mul_lo_u32 v13, v11, v9 +; GISEL-NEXT: v_mul_lo_u32 v14, v12, v9 +; GISEL-NEXT: v_mul_lo_u32 v15, v11, v10 +; GISEL-NEXT: v_mul_hi_u32 v16, v11, v9 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16 +; GISEL-NEXT: v_mul_lo_u32 v15, v10, v13 +; GISEL-NEXT: v_mul_lo_u32 v16, v9, v14 +; GISEL-NEXT: v_mul_hi_u32 v17, v9, v13 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; GISEL-NEXT: v_mul_lo_u32 v16, v10, v14 +; GISEL-NEXT: v_mul_hi_u32 v13, v10, v13 +; GISEL-NEXT: v_mul_hi_u32 v17, v9, v14 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v17 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; GISEL-NEXT: v_mul_hi_u32 v14, v10, v14 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 +; GISEL-NEXT: v_addc_u32_e64 v13, s[4:5], v10, v14, vcc +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v14 +; GISEL-NEXT: v_mul_lo_u32 v14, v11, v9 +; GISEL-NEXT: v_mul_lo_u32 v12, v12, v9 +; GISEL-NEXT: v_mul_lo_u32 v15, v11, v13 +; GISEL-NEXT: v_mul_hi_u32 v11, v11, v9 +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v15 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; GISEL-NEXT: v_mul_lo_u32 v12, v13, v14 +; GISEL-NEXT: v_mul_lo_u32 v15, v9, v11 +; GISEL-NEXT: v_mul_hi_u32 v16, v9, v14 +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v15, v12 +; GISEL-NEXT: v_mul_lo_u32 v15, v13, v11 +; GISEL-NEXT: v_mul_hi_u32 v14, v13, v14 +; GISEL-NEXT: v_mul_hi_u32 v16, v9, v11 +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v15, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v16 +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v15, v14 +; GISEL-NEXT: v_mul_hi_u32 v11, v13, v11 +; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v10, v11, vcc +; GISEL-NEXT: v_addc_u32_e64 v10, vcc, 0, v10, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9 +; GISEL-NEXT: v_mul_lo_u32 v12, v0, v10 +; GISEL-NEXT: v_mul_hi_u32 v13, v0, v9 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_mul_lo_u32 v12, v1, v10 +; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9 +; GISEL-NEXT: v_mul_hi_u32 v13, v0, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; GISEL-NEXT: v_mul_hi_u32 v10, v1, v10 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_mul_lo_u32 v11, v4, v9 +; GISEL-NEXT: v_mul_lo_u32 v12, v5, v9 +; GISEL-NEXT: v_mul_lo_u32 v10, v4, v10 +; GISEL-NEXT: v_mul_hi_u32 v9, v4, v9 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v11 +; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v9, vcc +; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v9 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v11, s[4:5], v0, v4 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v12, vcc, 0, v1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v12, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v12, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v14, vcc +; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v11, v4 +; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v1, v5, s[4:5] +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v11, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v12, v1, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc +; GISEL-NEXT: v_xor_b32_e32 v0, v0, v8 +; GISEL-NEXT: v_xor_b32_e32 v1, v1, v8 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 +; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc +; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v3 +; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v7 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v4 +; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v4, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v5 +; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v5, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 +; GISEL-NEXT: v_xor_b32_e32 v6, v6, v5 +; GISEL-NEXT: v_xor_b32_e32 v5, v7, v5 +; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v6 +; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v5 +; GISEL-NEXT: v_mac_f32_e32 v7, 0x4f800000, v8 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v7 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 +; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v7 +; GISEL-NEXT: v_trunc_f32_e32 v8, v8 +; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v8 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v6 +; GISEL-NEXT: v_subb_u32_e32 v10, vcc, 0, v5, vcc +; GISEL-NEXT: v_mul_lo_u32 v11, v9, v7 +; GISEL-NEXT: v_mul_lo_u32 v12, v10, v7 +; GISEL-NEXT: v_mul_lo_u32 v13, v9, v8 +; GISEL-NEXT: v_mul_hi_u32 v14, v9, v7 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_mul_lo_u32 v13, v8, v11 +; GISEL-NEXT: v_mul_lo_u32 v14, v7, v12 +; GISEL-NEXT: v_mul_hi_u32 v15, v7, v11 +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; GISEL-NEXT: v_mul_lo_u32 v14, v8, v12 +; GISEL-NEXT: v_mul_hi_u32 v11, v8, v11 +; GISEL-NEXT: v_mul_hi_u32 v15, v7, v12 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], v8, v12, vcc +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v12 +; GISEL-NEXT: v_mul_lo_u32 v12, v9, v7 +; GISEL-NEXT: v_mul_lo_u32 v10, v10, v7 +; GISEL-NEXT: v_mul_lo_u32 v13, v9, v11 +; GISEL-NEXT: v_mul_hi_u32 v9, v9, v7 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v10, v9 +; GISEL-NEXT: v_mul_lo_u32 v10, v11, v12 +; GISEL-NEXT: v_mul_lo_u32 v13, v7, v9 +; GISEL-NEXT: v_mul_hi_u32 v14, v7, v12 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v13, v10 +; GISEL-NEXT: v_mul_lo_u32 v13, v11, v9 +; GISEL-NEXT: v_mul_hi_u32 v12, v11, v12 +; GISEL-NEXT: v_mul_hi_u32 v14, v7, v9 +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v13, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v14 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v12, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v13, v12 +; GISEL-NEXT: v_mul_hi_u32 v9, v11, v9 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v12 +; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v7, v10 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v9, vcc +; GISEL-NEXT: v_addc_u32_e64 v8, vcc, 0, v8, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v9, v3, v7 +; GISEL-NEXT: v_mul_lo_u32 v10, v2, v8 +; GISEL-NEXT: v_mul_hi_u32 v11, v2, v7 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; GISEL-NEXT: v_mul_lo_u32 v10, v3, v8 +; GISEL-NEXT: v_mul_hi_u32 v7, v3, v7 +; GISEL-NEXT: v_mul_hi_u32 v11, v2, v8 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v10, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; GISEL-NEXT: v_mul_hi_u32 v8, v3, v8 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; GISEL-NEXT: v_mul_lo_u32 v9, v6, v7 +; GISEL-NEXT: v_mul_lo_u32 v10, v5, v7 +; GISEL-NEXT: v_mul_lo_u32 v8, v6, v8 +; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v9 +; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v7, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v7 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v9, s[4:5], v2, v6 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc +; GISEL-NEXT: v_subbrev_u32_e64 v10, vcc, 0, v3, s[4:5] +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v10, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v9, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v12, vcc +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v9, v6 +; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v5, s[4:5] +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; GISEL-NEXT: v_cndmask_b32_e32 v5, v9, v6, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc +; GISEL-NEXT: v_xor_b32_e32 v2, v2, v4 +; GISEL-NEXT: v_xor_b32_e32 v3, v3, v4 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 +; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v4, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_srem_v2i64_pow2_shl_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_mov_b32_e32 v5, v0 +; CGP-NEXT: v_mov_b32_e32 v7, v1 +; CGP-NEXT: s_movk_i32 s4, 0x1000 +; CGP-NEXT: s_mov_b32 s5, 0 +; CGP-NEXT: v_mov_b32_e32 v0, 0 +; CGP-NEXT: v_lshl_b64 v[10:11], s[4:5], v4 +; CGP-NEXT: v_lshl_b64 v[8:9], s[4:5], v6 +; CGP-NEXT: v_or_b32_e32 v1, v7, v11 +; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1 +; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; CGP-NEXT: s_cbranch_execz BB8_2 +; CGP-NEXT: ; %bb.1: +; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v7 +; CGP-NEXT: v_ashrrev_i32_e32 v0, 31, v11 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v1 +; CGP-NEXT: v_addc_u32_e32 v6, vcc, v7, v1, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v0 +; CGP-NEXT: v_addc_u32_e32 v11, vcc, v11, v0, vcc +; CGP-NEXT: v_xor_b32_e32 v4, v4, v1 +; CGP-NEXT: v_xor_b32_e32 v6, v6, v1 +; CGP-NEXT: v_xor_b32_e32 v7, v7, v0 +; CGP-NEXT: v_xor_b32_e32 v0, v11, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v11, v7 +; CGP-NEXT: v_cvt_f32_u32_e32 v12, v0 +; CGP-NEXT: v_sub_i32_e32 v13, vcc, 0, v7 +; CGP-NEXT: v_subb_u32_e32 v14, vcc, 0, v0, vcc +; CGP-NEXT: v_mac_f32_e32 v11, 0x4f800000, v12 +; CGP-NEXT: v_rcp_iflag_f32_e32 v11, v11 +; CGP-NEXT: v_mul_f32_e32 v11, 0x5f7ffffc, v11 +; CGP-NEXT: v_mul_f32_e32 v12, 0x2f800000, v11 +; CGP-NEXT: v_trunc_f32_e32 v12, v12 +; CGP-NEXT: v_mac_f32_e32 v11, 0xcf800000, v12 +; CGP-NEXT: v_cvt_u32_f32_e32 v12, v12 +; CGP-NEXT: v_cvt_u32_f32_e32 v11, v11 +; CGP-NEXT: v_mul_lo_u32 v15, v13, v12 +; CGP-NEXT: v_mul_lo_u32 v16, v13, v11 +; CGP-NEXT: v_mul_lo_u32 v17, v14, v11 +; CGP-NEXT: v_mul_hi_u32 v18, v13, v11 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v15 +; CGP-NEXT: v_mul_lo_u32 v17, v12, v16 +; CGP-NEXT: v_mul_hi_u32 v19, v11, v16 +; CGP-NEXT: v_mul_hi_u32 v16, v12, v16 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v18 +; CGP-NEXT: v_mul_lo_u32 v18, v11, v15 +; CGP-NEXT: v_mul_lo_u32 v20, v12, v15 +; CGP-NEXT: v_mul_hi_u32 v21, v11, v15 +; CGP-NEXT: v_mul_hi_u32 v15, v12, v15 +; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v18 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v16, vcc, v20, v16 +; CGP-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v19 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v21 +; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v17, vcc, v18, v17 +; CGP-NEXT: v_add_i32_e32 v18, vcc, v20, v19 +; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v17 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v17, vcc, v18, v17 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16 +; CGP-NEXT: v_addc_u32_e64 v16, s[4:5], v12, v15, vcc +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v15 +; CGP-NEXT: v_mul_lo_u32 v15, v13, v11 +; CGP-NEXT: v_mul_lo_u32 v14, v14, v11 +; CGP-NEXT: v_mul_hi_u32 v17, v13, v11 +; CGP-NEXT: v_mul_lo_u32 v13, v13, v16 +; CGP-NEXT: v_mul_lo_u32 v18, v16, v15 +; CGP-NEXT: v_mul_hi_u32 v19, v11, v15 +; CGP-NEXT: v_mul_hi_u32 v15, v16, v15 +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v17 +; CGP-NEXT: v_mul_lo_u32 v14, v11, v13 +; CGP-NEXT: v_mul_lo_u32 v17, v16, v13 +; CGP-NEXT: v_mul_hi_u32 v20, v11, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v16, v13 +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v18, v14 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v15, s[4:5], v17, v15 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v19 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v15, s[4:5], v15, v20 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v16, v14 +; CGP-NEXT: v_add_i32_e64 v16, s[4:5], v17, v18 +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v15, v14 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v15, s[4:5], v16, v15 +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v15 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, v12, v13, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v14 +; CGP-NEXT: v_addc_u32_e32 v12, vcc, 0, v12, vcc +; CGP-NEXT: v_mul_lo_u32 v13, v6, v11 +; CGP-NEXT: v_mul_hi_u32 v14, v4, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v6, v11 +; CGP-NEXT: v_mul_lo_u32 v15, v4, v12 +; CGP-NEXT: v_mul_lo_u32 v16, v6, v12 +; CGP-NEXT: v_mul_hi_u32 v17, v4, v12 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v12 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v17 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13 +; CGP-NEXT: v_mul_lo_u32 v14, v7, v11 +; CGP-NEXT: v_mul_lo_u32 v15, v0, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v7, v11 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; CGP-NEXT: v_mul_lo_u32 v12, v7, v12 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v14 +; CGP-NEXT: v_subb_u32_e64 v12, s[4:5], v6, v11, vcc +; CGP-NEXT: v_sub_i32_e64 v6, s[4:5], v6, v11 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v7 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v0 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v6, vcc, v6, v0, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v12, v0 +; CGP-NEXT: v_cndmask_b32_e32 v11, v13, v11, vcc +; CGP-NEXT: v_sub_i32_e32 v13, vcc, v4, v7 +; CGP-NEXT: v_subbrev_u32_e64 v14, s[4:5], 0, v6, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v13, v7 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v6, vcc, v6, v0, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v14, v0 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc +; CGP-NEXT: v_sub_i32_e32 v7, vcc, v13, v7 +; CGP-NEXT: v_subbrev_u32_e32 v6, vcc, 0, v6, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v14, v0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v16, v15, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v13, v7, vcc +; CGP-NEXT: v_cndmask_b32_e32 v6, v14, v6, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 +; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v6, vcc +; CGP-NEXT: v_xor_b32_e32 v0, v0, v1 +; CGP-NEXT: v_xor_b32_e32 v4, v4, v1 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_subb_u32_e32 v1, vcc, v4, v1, vcc +; CGP-NEXT: BB8_2: ; %Flow2 +; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] +; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] +; CGP-NEXT: s_cbranch_execz BB8_4 +; CGP-NEXT: ; %bb.3: +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10 +; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CGP-NEXT: v_mul_lo_u32 v1, v0, v10 +; CGP-NEXT: v_mul_hi_u32 v4, v0, v10 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v1 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; CGP-NEXT: v_mul_hi_u32 v1, v1, v0 +; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v0, v1 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; CGP-NEXT: v_mul_hi_u32 v0, v0, v5 +; CGP-NEXT: v_mul_lo_u32 v0, v0, v10 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v5, v0 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 +; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v1, v10 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v1, v10 +; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v1, 0 +; CGP-NEXT: BB8_4: +; CGP-NEXT: s_or_b64 exec, exec, s[8:9] +; CGP-NEXT: v_or_b32_e32 v5, v3, v9 +; CGP-NEXT: v_mov_b32_e32 v4, 0 +; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] +; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, 1 +; CGP-NEXT: s_xor_b64 s[4:5], vcc, s[4:5] +; CGP-NEXT: ; implicit-def: $vgpr4_vgpr5 +; CGP-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] +; CGP-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; CGP-NEXT: s_cbranch_execz BB8_6 +; CGP-NEXT: ; %bb.5: +; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v3 +; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v9 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v2, v5 +; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v4 +; CGP-NEXT: v_addc_u32_e32 v9, vcc, v9, v4, vcc +; CGP-NEXT: v_xor_b32_e32 v6, v6, v5 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v5 +; CGP-NEXT: v_xor_b32_e32 v7, v7, v4 +; CGP-NEXT: v_xor_b32_e32 v4, v9, v4 +; CGP-NEXT: v_cvt_f32_u32_e32 v9, v7 +; CGP-NEXT: v_cvt_f32_u32_e32 v10, v4 +; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v7 +; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v4, vcc +; CGP-NEXT: v_mac_f32_e32 v9, 0x4f800000, v10 +; CGP-NEXT: v_rcp_iflag_f32_e32 v9, v9 +; CGP-NEXT: v_mul_f32_e32 v9, 0x5f7ffffc, v9 +; CGP-NEXT: v_mul_f32_e32 v10, 0x2f800000, v9 +; CGP-NEXT: v_trunc_f32_e32 v10, v10 +; CGP-NEXT: v_mac_f32_e32 v9, 0xcf800000, v10 +; CGP-NEXT: v_cvt_u32_f32_e32 v10, v10 +; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9 +; CGP-NEXT: v_mul_lo_u32 v13, v11, v10 +; CGP-NEXT: v_mul_lo_u32 v14, v11, v9 +; CGP-NEXT: v_mul_lo_u32 v15, v12, v9 +; CGP-NEXT: v_mul_hi_u32 v16, v11, v9 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13 +; CGP-NEXT: v_mul_lo_u32 v15, v10, v14 +; CGP-NEXT: v_mul_hi_u32 v17, v9, v14 +; CGP-NEXT: v_mul_hi_u32 v14, v10, v14 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16 +; CGP-NEXT: v_mul_lo_u32 v16, v9, v13 +; CGP-NEXT: v_mul_lo_u32 v18, v10, v13 +; CGP-NEXT: v_mul_hi_u32 v19, v9, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v10, v13 +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v18, v14 +; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19 +; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; CGP-NEXT: v_add_i32_e32 v16, vcc, v18, v17 +; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v14 +; CGP-NEXT: v_addc_u32_e64 v14, s[4:5], v10, v13, vcc +; CGP-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13 +; CGP-NEXT: v_mul_lo_u32 v13, v11, v9 +; CGP-NEXT: v_mul_lo_u32 v12, v12, v9 +; CGP-NEXT: v_mul_hi_u32 v15, v11, v9 +; CGP-NEXT: v_mul_lo_u32 v11, v11, v14 +; CGP-NEXT: v_mul_lo_u32 v16, v14, v13 +; CGP-NEXT: v_mul_hi_u32 v17, v9, v13 +; CGP-NEXT: v_mul_hi_u32 v13, v14, v13 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15 +; CGP-NEXT: v_mul_lo_u32 v12, v9, v11 +; CGP-NEXT: v_mul_lo_u32 v15, v14, v11 +; CGP-NEXT: v_mul_hi_u32 v18, v9, v11 +; CGP-NEXT: v_mul_hi_u32 v11, v14, v11 +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v16, v12 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v15, v13 +; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v18 +; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12 +; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v15, v16 +; CGP-NEXT: v_add_i32_e64 v12, s[4:5], v13, v12 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5] +; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v14, v13 +; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v13 +; CGP-NEXT: v_addc_u32_e32 v10, vcc, v10, v11, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; CGP-NEXT: v_addc_u32_e32 v10, vcc, 0, v10, vcc +; CGP-NEXT: v_mul_lo_u32 v11, v3, v9 +; CGP-NEXT: v_mul_hi_u32 v12, v6, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v3, v9 +; CGP-NEXT: v_mul_lo_u32 v13, v6, v10 +; CGP-NEXT: v_mul_lo_u32 v14, v3, v10 +; CGP-NEXT: v_mul_hi_u32 v15, v6, v10 +; CGP-NEXT: v_mul_hi_u32 v10, v3, v10 +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v14, v9 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v15 +; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11 +; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc +; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; CGP-NEXT: v_mul_lo_u32 v12, v7, v9 +; CGP-NEXT: v_mul_lo_u32 v13, v4, v9 +; CGP-NEXT: v_mul_hi_u32 v9, v7, v9 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; CGP-NEXT: v_mul_lo_u32 v10, v7, v10 +; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10 +; CGP-NEXT: v_add_i32_e32 v9, vcc, v10, v9 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, v6, v12 +; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v3, v9, vcc +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v9 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v7 +; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v4 +; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v4, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v10, v4 +; CGP-NEXT: v_cndmask_b32_e32 v9, v11, v9, vcc +; CGP-NEXT: v_sub_i32_e32 v11, vcc, v6, v7 +; CGP-NEXT: v_subbrev_u32_e64 v12, s[4:5], 0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v11, v7 +; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5] +; CGP-NEXT: v_subb_u32_e32 v3, vcc, v3, v4, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v12, v4 +; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc +; CGP-NEXT: v_sub_i32_e32 v7, vcc, v11, v7 +; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v12, v4 +; CGP-NEXT: v_cndmask_b32_e32 v4, v14, v13, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v4, v11, v7, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v12, v3, vcc +; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; CGP-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc +; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc +; CGP-NEXT: v_xor_b32_e32 v4, v4, v5 +; CGP-NEXT: v_xor_b32_e32 v3, v3, v5 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v5 +; CGP-NEXT: v_subb_u32_e32 v5, vcc, v3, v5, vcc +; CGP-NEXT: BB8_6: ; %Flow +; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] +; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] +; CGP-NEXT: s_cbranch_execz BB8_8 +; CGP-NEXT: ; %bb.7: +; CGP-NEXT: v_cvt_f32_u32_e32 v3, v8 +; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CGP-NEXT: v_mul_lo_u32 v4, v3, v8 +; CGP-NEXT: v_mul_hi_u32 v5, v3, v8 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; CGP-NEXT: v_mul_hi_u32 v4, v4, v3 +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 +; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CGP-NEXT: v_mul_hi_u32 v3, v3, v2 +; CGP-NEXT: v_mul_lo_u32 v3, v3, v8 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v2, v3 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v8 +; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v4, v8 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v3 +; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v4, v8 +; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc +; CGP-NEXT: v_cndmask_b32_e64 v4, v5, v2, s[4:5] +; CGP-NEXT: v_mov_b32_e32 v5, 0 +; CGP-NEXT: BB8_8: +; CGP-NEXT: s_or_b64 exec, exec, s[8:9] +; CGP-NEXT: v_mov_b32_e32 v2, v4 +; CGP-NEXT: v_mov_b32_e32 v3, v5 +; CGP-NEXT: s_setpc_b64 s[30:31] + %shl.y = shl <2 x i64> , %y + %r = srem <2 x i64> %x, %shl.y + ret <2 x i64> %r +} + +define i64 @v_srem_i64_24bit(i64 %num, i64 %den) { +; GISEL-LABEL: v_srem_i64_24bit: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s4, 0xffffff +; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 +; GISEL-NEXT: v_and_b32_e32 v1, s4, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 +; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2 +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0 +; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v2 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1 +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v3, v1 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2 +; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v1 +; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_srem_i64_24bit: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s4, 0xffffff +; CGP-NEXT: v_and_b32_e32 v0, s4, v0 +; CGP-NEXT: v_and_b32_e32 v1, s4, v2 +; CGP-NEXT: v_xor_b32_e32 v2, v0, v1 +; CGP-NEXT: v_cvt_f32_i32_e32 v3, v0 +; CGP-NEXT: v_cvt_f32_i32_e32 v4, v1 +; CGP-NEXT: v_ashrrev_i32_e32 v2, 30, v2 +; CGP-NEXT: v_rcp_f32_e32 v5, v4 +; CGP-NEXT: v_or_b32_e32 v2, 1, v2 +; CGP-NEXT: v_mul_f32_e32 v5, v3, v5 +; CGP-NEXT: v_trunc_f32_e32 v5, v5 +; CGP-NEXT: v_fma_f32 v3, -v5, v4, v3 +; CGP-NEXT: v_cvt_i32_f32_e32 v5, v5 +; CGP-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v4| +; CGP-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CGP-NEXT: v_mul_lo_u32 v1, v2, v1 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_lshlrev_b32_e32 v0, 7, v0 +; CGP-NEXT: v_ashrrev_i32_e32 v0, 7, v0 +; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; CGP-NEXT: s_setpc_b64 s[30:31] + %num.mask = and i64 %num, 16777215 + %den.mask = and i64 %den, 16777215 + %result = srem i64 %num.mask, %den.mask + ret i64 %result +} + +define <2 x i64> @v_srem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) { +; GISEL-LABEL: v_srem_v2i64_24bit: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s10, 0xffffff +; GISEL-NEXT: v_and_b32_e32 v1, s10, v4 +; GISEL-NEXT: v_and_b32_e32 v3, s10, v6 +; GISEL-NEXT: v_add_i32_e32 v1, vcc, 0, v1 +; GISEL-NEXT: v_addc_u32_e64 v4, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 0, v3 +; GISEL-NEXT: v_addc_u32_e64 v5, s[4:5], 0, 0, vcc +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v1 +; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v4 +; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v1 +; GISEL-NEXT: v_subb_u32_e32 v9, vcc, 0, v4, vcc +; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v3 +; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v5 +; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v3 +; GISEL-NEXT: v_subb_u32_e32 v13, vcc, 0, v5, vcc +; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v7 +; GISEL-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v10 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6 +; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7 +; GISEL-NEXT: v_mul_f32_e32 v10, 0x2f800000, v6 +; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v7 +; GISEL-NEXT: v_trunc_f32_e32 v10, v10 +; GISEL-NEXT: v_trunc_f32_e32 v11, v11 +; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v10 +; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v10 +; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v11 +; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_lo_u32 v14, v8, v10 +; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 +; GISEL-NEXT: v_mul_lo_u32 v15, v12, v11 +; GISEL-NEXT: v_mul_lo_u32 v16, v8, v6 +; GISEL-NEXT: v_mul_lo_u32 v17, v9, v6 +; GISEL-NEXT: v_mul_hi_u32 v18, v8, v6 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v14 +; GISEL-NEXT: v_mul_lo_u32 v17, v10, v16 +; GISEL-NEXT: v_mul_hi_u32 v19, v6, v16 +; GISEL-NEXT: v_mul_hi_u32 v16, v10, v16 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v6, v14 +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19 +; GISEL-NEXT: v_mul_lo_u32 v17, v10, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v19 +; GISEL-NEXT: v_mul_hi_u32 v19, v6, v14 +; GISEL-NEXT: v_mul_hi_u32 v14, v10, v14 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v19 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19 +; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18 +; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v16 +; GISEL-NEXT: v_addc_u32_e64 v16, s[4:5], v10, v14, vcc +; GISEL-NEXT: v_mul_lo_u32 v17, v8, v6 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v6 +; GISEL-NEXT: v_mul_hi_u32 v18, v8, v6 +; GISEL-NEXT: v_mul_lo_u32 v8, v8, v16 +; GISEL-NEXT: v_mul_lo_u32 v19, v16, v17 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v9, v8 +; GISEL-NEXT: v_mul_hi_u32 v9, v6, v17 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v6, v8 +; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v19, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v18, v9 +; GISEL-NEXT: v_mul_lo_u32 v9, v12, v7 +; GISEL-NEXT: v_mul_lo_u32 v18, v13, v7 +; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v18, v15 +; GISEL-NEXT: v_mul_hi_u32 v18, v12, v7 +; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v11, v9 +; GISEL-NEXT: v_mul_lo_u32 v20, v7, v15 +; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v18, v20 +; GISEL-NEXT: v_mul_hi_u32 v20, v7, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v18, v20 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v21, v18 +; GISEL-NEXT: v_mul_hi_u32 v9, v11, v9 +; GISEL-NEXT: v_mul_lo_u32 v20, v11, v15 +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v20, v9 +; GISEL-NEXT: v_mul_hi_u32 v20, v7, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v20 +; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v21, v20 +; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7] +; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], v20, v18 +; GISEL-NEXT: v_mul_hi_u32 v15, v11, v15 +; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v15, v18 +; GISEL-NEXT: v_add_i32_e64 v7, s[6:7], v7, v9 +; GISEL-NEXT: v_addc_u32_e64 v9, s[8:9], v11, v15, s[6:7] +; GISEL-NEXT: v_mul_lo_u32 v13, v13, v7 +; GISEL-NEXT: v_mul_lo_u32 v18, v12, v9 +; GISEL-NEXT: v_add_i32_e64 v13, s[8:9], v13, v18 +; GISEL-NEXT: v_mul_lo_u32 v18, v12, v7 +; GISEL-NEXT: v_mul_hi_u32 v12, v12, v7 +; GISEL-NEXT: v_add_i32_e64 v12, s[8:9], v13, v12 +; GISEL-NEXT: v_mul_lo_u32 v13, v9, v18 +; GISEL-NEXT: v_mul_lo_u32 v20, v7, v12 +; GISEL-NEXT: v_add_i32_e64 v13, s[8:9], v13, v20 +; GISEL-NEXT: v_mul_hi_u32 v20, v7, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v13, s[8:9], v13, v20 +; GISEL-NEXT: v_and_b32_e32 v0, s10, v0 +; GISEL-NEXT: v_and_b32_e32 v2, s10, v2 +; GISEL-NEXT: v_add_i32_e64 v10, s[10:11], v10, v14 +; GISEL-NEXT: v_add_i32_e64 v0, s[10:11], 0, v0 +; GISEL-NEXT: v_addc_u32_e64 v13, s[10:11], 0, 0, s[10:11] +; GISEL-NEXT: v_add_i32_e64 v11, s[10:11], v11, v15 +; GISEL-NEXT: v_mul_hi_u32 v14, v16, v17 +; GISEL-NEXT: v_mul_hi_u32 v15, v9, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v19, v17 +; GISEL-NEXT: v_mul_lo_u32 v18, v16, v8 +; GISEL-NEXT: v_mul_hi_u32 v16, v16, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v6, v8 +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v18, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v14, v8 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v18, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[8:9] +; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v21, v18 +; GISEL-NEXT: v_mul_lo_u32 v19, v9, v12 +; GISEL-NEXT: v_mul_hi_u32 v9, v9, v12 +; GISEL-NEXT: v_mul_hi_u32 v12, v7, v12 +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v19, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v15, v12 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v19, v15 +; GISEL-NEXT: v_add_i32_e64 v2, s[4:5], 0, v2 +; GISEL-NEXT: v_addc_u32_e64 v19, s[4:5], 0, 0, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v8, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v17 +; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18 +; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v16, v14 +; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v15 +; GISEL-NEXT: v_addc_u32_e32 v10, vcc, v10, v14, vcc +; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v11, v9, s[6:7] +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v10, vcc +; GISEL-NEXT: v_mul_lo_u32 v10, v13, v6 +; GISEL-NEXT: v_mul_hi_u32 v11, v0, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v12 +; GISEL-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; GISEL-NEXT: v_mul_lo_u32 v12, v19, v7 +; GISEL-NEXT: v_mul_hi_u32 v14, v2, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, v19, v7 +; GISEL-NEXT: v_mul_lo_u32 v15, v0, v8 +; GISEL-NEXT: v_mul_lo_u32 v16, v13, v8 +; GISEL-NEXT: v_mul_hi_u32 v17, v0, v8 +; GISEL-NEXT: v_mul_hi_u32 v8, v13, v8 +; GISEL-NEXT: v_mul_lo_u32 v18, v2, v9 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v18 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_mul_lo_u32 v12, v19, v9 +; GISEL-NEXT: v_mul_hi_u32 v14, v2, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v19, v9 +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v16, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v12, v7 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5] +; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v17 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v14 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11 +; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v17 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10 +; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v15 +; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc +; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 +; GISEL-NEXT: v_mul_lo_u32 v11, v1, v6 +; GISEL-NEXT: v_mul_lo_u32 v15, v4, v6 +; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6 +; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14 +; GISEL-NEXT: v_mul_lo_u32 v14, v3, v7 +; GISEL-NEXT: v_mul_lo_u32 v16, v5, v7 +; GISEL-NEXT: v_mul_hi_u32 v7, v3, v7 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12 +; GISEL-NEXT: v_mul_lo_u32 v8, v1, v8 +; GISEL-NEXT: v_mul_lo_u32 v9, v3, v9 +; GISEL-NEXT: v_add_i32_e32 v8, vcc, v15, v8 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v11 +; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v13, v6, vcc +; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v13, v6 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v14 +; GISEL-NEXT: v_subb_u32_e64 v10, s[6:7], v19, v7, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v19, v7 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[6:7] +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v8, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[6:7] +; GISEL-NEXT: v_subb_u32_e32 v6, vcc, v6, v4, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v10, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc +; GISEL-NEXT: v_subb_u32_e64 v7, vcc, v7, v5, s[4:5] +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v8, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc +; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v0, v1 +; GISEL-NEXT: v_subbrev_u32_e64 v14, s[4:5], 0, v6, vcc +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v6, vcc, v6, v4, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v10, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v11, v13, v11, vcc +; GISEL-NEXT: v_sub_i32_e32 v13, vcc, v2, v3 +; GISEL-NEXT: v_subbrev_u32_e64 v16, s[4:5], 0, v7, vcc +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v13, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[4:5] +; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v7, v5, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v14, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, -1, vcc +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v12, v1 +; GISEL-NEXT: v_subbrev_u32_e32 v6, vcc, 0, v6, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v16, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, -1, vcc +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v13, v3 +; GISEL-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v7, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v14, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v18, v15, vcc +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v16, v5 +; GISEL-NEXT: v_cndmask_b32_e32 v5, v19, v17, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v12, v1, vcc +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v3, v13, v3, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v4, v14, v6, vcc +; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v1, v16, v7, s[4:5] +; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11 +; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v3, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v4, v10, v1, s[4:5] +; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, 0, v0 +; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v3, vcc +; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, 0, v2 +; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v4, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_srem_v2i64_24bit: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s4, 0xffffff +; CGP-NEXT: v_and_b32_e32 v0, s4, v0 +; CGP-NEXT: v_and_b32_e32 v1, s4, v2 +; CGP-NEXT: v_and_b32_e32 v2, s4, v4 +; CGP-NEXT: v_and_b32_e32 v3, s4, v6 +; CGP-NEXT: v_xor_b32_e32 v4, v0, v2 +; CGP-NEXT: v_cvt_f32_i32_e32 v5, v0 +; CGP-NEXT: v_cvt_f32_i32_e32 v6, v2 +; CGP-NEXT: v_xor_b32_e32 v7, v1, v3 +; CGP-NEXT: v_cvt_f32_i32_e32 v8, v1 +; CGP-NEXT: v_cvt_f32_i32_e32 v9, v3 +; CGP-NEXT: v_ashrrev_i32_e32 v4, 30, v4 +; CGP-NEXT: v_rcp_f32_e32 v10, v6 +; CGP-NEXT: v_ashrrev_i32_e32 v7, 30, v7 +; CGP-NEXT: v_rcp_f32_e32 v11, v9 +; CGP-NEXT: v_or_b32_e32 v4, 1, v4 +; CGP-NEXT: v_mul_f32_e32 v10, v5, v10 +; CGP-NEXT: v_or_b32_e32 v7, 1, v7 +; CGP-NEXT: v_mul_f32_e32 v11, v8, v11 +; CGP-NEXT: v_trunc_f32_e32 v10, v10 +; CGP-NEXT: v_trunc_f32_e32 v11, v11 +; CGP-NEXT: v_fma_f32 v5, -v10, v6, v5 +; CGP-NEXT: v_cvt_i32_f32_e32 v10, v10 +; CGP-NEXT: v_fma_f32 v8, -v11, v9, v8 +; CGP-NEXT: v_cvt_i32_f32_e32 v11, v11 +; CGP-NEXT: v_cmp_ge_f32_e64 vcc, |v5|, |v6| +; CGP-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc +; CGP-NEXT: v_cmp_ge_f32_e64 vcc, |v8|, |v9| +; CGP-NEXT: v_cndmask_b32_e32 v5, 0, v7, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v11, v5 +; CGP-NEXT: v_mul_lo_u32 v2, v4, v2 +; CGP-NEXT: v_mul_lo_u32 v3, v5, v3 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 +; CGP-NEXT: v_lshlrev_b32_e32 v0, 7, v0 +; CGP-NEXT: v_lshlrev_b32_e32 v1, 7, v1 +; CGP-NEXT: v_ashrrev_i32_e32 v0, 7, v0 +; CGP-NEXT: v_ashrrev_i32_e32 v2, 7, v1 +; CGP-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v2 +; CGP-NEXT: s_setpc_b64 s[30:31] + %num.mask = and <2 x i64> %num, + %den.mask = and <2 x i64> %den, + %result = srem <2 x i64> %num.mask, %den.mask + ret <2 x i64> %result +}