diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2523,104 +2523,46 @@ return false; } -static Register buildDivRCP(MachineIRBuilder &B, Register Src) { - const LLT S32 = LLT::scalar(32); - - auto Cvt0 = B.buildUITOFP(S32, Src); - auto RcpIFlag = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {S32}, {Cvt0}); - auto FPUIntMaxPlus1 = B.buildFConstant(S32, BitsToFloat(0x4f800000)); - auto Mul = B.buildFMul(S32, RcpIFlag, FPUIntMaxPlus1); - return B.buildFPTOUI(S32, Mul).getReg(0); -} - void AMDGPULegalizerInfo::legalizeUDIV_UREM32Impl(MachineIRBuilder &B, Register DstReg, - Register Num, - Register Den, + Register X, + Register Y, bool IsDiv) const { const LLT S1 = LLT::scalar(1); const LLT S32 = LLT::scalar(32); - // RCP = URECIP(Den) = 2^32 / Den + e - // e is rounding error. - auto RCP = buildDivRCP(B, Den); - - // RCP_LO = mul(RCP, Den) - auto RCP_LO = B.buildMul(S32, RCP, Den); - - // RCP_HI = mulhu (RCP, Den) */ - auto RCP_HI = B.buildUMulH(S32, RCP, Den); - - // NEG_RCP_LO = -RCP_LO - auto Zero = B.buildConstant(S32, 0); - auto NEG_RCP_LO = B.buildSub(S32, Zero, RCP_LO); - - // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) - auto CmpRcpHiZero = B.buildICmp(CmpInst::ICMP_EQ, S1, RCP_HI, Zero); - auto ABS_RCP_LO = B.buildSelect(S32, CmpRcpHiZero, NEG_RCP_LO, RCP_LO); - - // Calculate the rounding error from the URECIP instruction - // E = mulhu(ABS_RCP_LO, RCP) - auto E = B.buildUMulH(S32, ABS_RCP_LO, RCP); - - // RCP_A_E = RCP + E - auto RCP_A_E = B.buildAdd(S32, RCP, E); - - // RCP_S_E = RCP - E - auto RCP_S_E = B.buildSub(S32, RCP, E); - - // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) - auto Tmp0 = B.buildSelect(S32, CmpRcpHiZero, RCP_A_E, RCP_S_E); - - // Quotient = mulhu(Tmp0, Num)stmp - auto Quotient = B.buildUMulH(S32, Tmp0, Num); - - // Num_S_Remainder = Quotient * Den - auto Num_S_Remainder = B.buildMul(S32, Quotient, Den); + // See AMDGPUCodeGenPrepare::expandDivRem32 for a description of the + // algorithm used here. - // Remainder = Num - Num_S_Remainder - auto Remainder = B.buildSub(S32, Num, Num_S_Remainder); + // Initial estimate of inv(y). + auto FloatY = B.buildUITOFP(S32, Y); + auto RcpIFlag = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {S32}, {FloatY}); + auto Scale = B.buildFConstant(S32, BitsToFloat(0x4f7ffffe)); + auto ScaledY = B.buildFMul(S32, RcpIFlag, Scale); + auto Z = B.buildFPTOUI(S32, ScaledY); - // Remainder_GE_Den = Remainder >= Den - auto Remainder_GE_Den = B.buildICmp(CmpInst::ICMP_UGE, S1, Remainder, Den); + // One round of UNR. + auto NegY = B.buildSub(S32, B.buildConstant(S32, 0), Y); + auto NegYZ = B.buildMul(S32, NegY, Z); + Z = B.buildAdd(S32, Z, B.buildUMulH(S32, Z, NegYZ)); - // Remainder_GE_Zero = Num >= Num_S_Remainder; - auto Remainder_GE_Zero = B.buildICmp(CmpInst::ICMP_UGE, S1, - Num, Num_S_Remainder); + // Quotient/remainder estimate. + auto Q = B.buildUMulH(S32, X, Z); + auto R = B.buildSub(S32, X, B.buildMul(S32, Q, Y)); - // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero - auto Tmp1 = B.buildAnd(S1, Remainder_GE_Den, Remainder_GE_Zero); - - // Calculate Division result: - - // Quotient_A_One = Quotient + 1 + // First quotient/remainder refinement. auto One = B.buildConstant(S32, 1); - auto Quotient_A_One = B.buildAdd(S32, Quotient, One); - - // Quotient_S_One = Quotient - 1 - auto Quotient_S_One = B.buildSub(S32, Quotient, One); - - // Div = (Tmp1 ? Quotient_A_One : Quotient) - auto Div = B.buildSelect(S32, Tmp1, Quotient_A_One, Quotient); - - // Div = (Remainder_GE_Zero ? Div : Quotient_S_One) - if (IsDiv) { - B.buildSelect(DstReg, Remainder_GE_Zero, Div, Quotient_S_One); - } else { - Div = B.buildSelect(S32, Remainder_GE_Zero, Div, Quotient_S_One); - - // Calculate Rem result: - auto Remainder_S_Den = B.buildSub(S32, Remainder, Den); - - // Remainder_A_Den = Remainder + Den - auto Remainder_A_Den = B.buildAdd(S32, Remainder, Den); - - // Rem = (Tmp1 ? Remainder_S_Den : Remainder) - auto Rem = B.buildSelect(S32, Tmp1, Remainder_S_Den, Remainder); + auto Cond = B.buildICmp(CmpInst::ICMP_UGE, S1, R, Y); + if (IsDiv) + Q = B.buildSelect(S32, Cond, B.buildAdd(S32, Q, One), Q); + R = B.buildSelect(S32, Cond, B.buildSub(S32, R, Y), R); - // Rem = (Remainder_GE_Zero ? Rem : Remainder_A_Den) - B.buildSelect(DstReg, Remainder_GE_Zero, Rem, Remainder_A_Den); - } + // Second quotient/remainder refinement. + Cond = B.buildICmp(CmpInst::ICMP_UGE, S1, R, Y); + if (IsDiv) + B.buildSelect(DstReg, Cond, B.buildAdd(S32, Q, One), Q); + else + B.buildSelect(DstReg, Cond, B.buildSub(S32, R, Y), R); } bool AMDGPULegalizerInfo::legalizeUDIV_UREM32(MachineInstr &MI, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir @@ -21,34 +21,30 @@ ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX6: $vgpr0 = COPY [[SUB4]](s32) + ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX6: $vgpr0 = COPY [[SUB3]](s32) ; GFX8-LABEL: name: test_sdiv_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -61,34 +57,30 @@ ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX8: $vgpr0 = COPY [[SUB4]](s32) + ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX8: $vgpr0 = COPY [[SUB3]](s32) ; GFX9-LABEL: name: test_sdiv_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -101,34 +93,30 @@ ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX9: $vgpr0 = COPY [[SUB4]](s32) + ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX9: $vgpr0 = COPY [[SUB3]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_SDIV %0, %1 @@ -155,67 +143,59 @@ ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32) - ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] - ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] - ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] - ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]] + ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]] ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]] ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]] - ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]] - ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] - ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] - ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]] - ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]] - ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] - ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]] - ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]] - ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C3]] - ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C3]] - ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[ADD7]], [[UMULH5]] - ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR5]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]] + ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]] + ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C3]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]] + ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]] + ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]] + ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C3]] + ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]] ; GFX6: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] - ; GFX6: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]] - ; GFX6: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) + ; GFX6: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]] + ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32) ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: test_sdiv_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -231,67 +211,59 @@ ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32) - ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] - ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] - ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] - ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]] + ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]] ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]] ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]] - ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]] - ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] - ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] - ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]] - ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]] - ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] - ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]] - ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]] - ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C3]] - ; GFX8: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C3]] - ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[ADD7]], [[UMULH5]] - ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR5]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]] + ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]] + ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C3]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]] + ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]] + ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]] + ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C3]] + ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]] ; GFX8: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] - ; GFX8: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]] - ; GFX8: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) + ; GFX8: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]] + ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32) ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_sdiv_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -307,67 +279,59 @@ ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32) - ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] - ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] - ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] - ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]] + ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]] ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]] ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]] - ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]] - ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]] - ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] - ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] - ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]] - ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]] - ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] - ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]] - ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]] - ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C3]] - ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C3]] - ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[ADD7]], [[UMULH5]] - ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR5]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]] + ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]] + ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C3]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]] + ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]] + ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]] + ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C3]] + ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]] ; GFX9: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] - ; GFX9: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]] - ; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) + ; GFX9: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]] + ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB3]](s32), [[SUB7]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -1981,34 +1945,30 @@ ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32) ; GFX6: $vgpr0 = COPY [[COPY4]](s32) ; GFX8-LABEL: name: test_sdiv_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -2026,34 +1986,30 @@ ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32) ; GFX8: $vgpr0 = COPY [[COPY4]](s32) ; GFX9-LABEL: name: test_sdiv_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -2071,34 +2027,30 @@ ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32) ; GFX9: $vgpr0 = COPY [[COPY4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -2136,77 +2088,69 @@ ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX6: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16 ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX6: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16 ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32) - ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] - ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] - ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] - ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]] + ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]] ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]] ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]] - ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] - ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] - ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] - ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]] - ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]] - ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] - ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]] - ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]] - ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]] - ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]] - ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[ADD7]], [[UMULH5]] - ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR5]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]] + ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]] + ; GFX6: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]] + ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]] + ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]] + ; GFX6: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] + ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]] ; GFX6: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] - ; GFX6: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]] - ; GFX6: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX6: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]] + ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]] - ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]] + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB7]](s32) + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: test_sdiv_v2s16 @@ -2230,77 +2174,69 @@ ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX8: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16 ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX8: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16 ; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32) - ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] - ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] - ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] - ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]] + ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]] ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]] ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]] - ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] - ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] - ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] - ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]] - ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]] - ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] - ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]] - ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]] - ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]] - ; GFX8: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]] - ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[ADD7]], [[UMULH5]] - ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR5]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]] + ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]] + ; GFX8: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]] + ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]] + ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]] + ; GFX8: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] + ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]] ; GFX8: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] - ; GFX8: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]] - ; GFX8: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX8: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]] + ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]] - ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32) - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C6]] + ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB7]](s32) + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C6]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_sdiv_v2s16 @@ -2324,72 +2260,64 @@ ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) ; GFX9: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16 ; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32) - ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] - ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] - ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] - ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR2]] + ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[ADD6]], [[ASHR3]] ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR5]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]] ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR5]] - ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR5]] - ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] - ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] - ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] - ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR4]] - ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR5]] - ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] - ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR5]] - ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR4]](s32), [[MUL3]] - ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]] - ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]] - ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[ADD7]], [[UMULH5]] - ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB8]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR5]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR4]], [[ADD7]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR5]] + ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[XOR4]], [[MUL3]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[XOR5]] + ; GFX9: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD8]], [[UMULH3]] + ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[XOR5]] + ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[XOR5]] + ; GFX9: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] + ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD9]], [[SELECT3]] ; GFX9: [[XOR6:%[0-9]+]]:_(s32) = G_XOR [[ASHR2]], [[ASHR3]] - ; GFX9: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[XOR6]] - ; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32) + ; GFX9: [[XOR7:%[0-9]+]]:_(s32) = G_XOR [[SELECT5]], [[XOR6]] + ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR7]], [[XOR6]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB7]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -2420,34 +2348,30 @@ ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32) ; GFX6: $vgpr0 = COPY [[COPY4]](s32) ; GFX8-LABEL: name: test_sdiv_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -2465,34 +2389,30 @@ ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32) ; GFX8: $vgpr0 = COPY [[COPY4]](s32) ; GFX9-LABEL: name: test_sdiv_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -2510,34 +2430,30 @@ ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32) ; GFX9: $vgpr0 = COPY [[COPY4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -2570,34 +2486,30 @@ ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32) ; GFX6: $vgpr0 = COPY [[COPY4]](s32) ; GFX8-LABEL: name: test_sdiv_s17 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -2615,34 +2527,30 @@ ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32) ; GFX8: $vgpr0 = COPY [[COPY4]](s32) ; GFX9-LABEL: name: test_sdiv_s17 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -2660,34 +2568,30 @@ ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD3]], [[UMULH2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD3]], [[UMULH1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[XOR1]] + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD4]], [[SELECT]] ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[ASHR]], [[ASHR1]] - ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[XOR2]] - ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) + ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[SELECT2]], [[XOR2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[XOR2]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB3]](s32) ; GFX9: $vgpr0 = COPY [[COPY4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir @@ -21,30 +21,24 @@ ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] - ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] - ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX6: $vgpr0 = COPY [[SUB4]](s32) ; GFX8-LABEL: name: test_srem_s32 @@ -59,30 +53,24 @@ ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] - ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX8: $vgpr0 = COPY [[SUB4]](s32) ; GFX9-LABEL: name: test_srem_s32 @@ -97,30 +85,24 @@ ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] - ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX9: $vgpr0 = COPY [[SUB4]](s32) %0:_(s32) = COPY $vgpr0 @@ -149,62 +131,50 @@ ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] - ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] - ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32) - ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] - ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] - ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] - ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]] + ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]] ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]] ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]] - ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]] - ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] - ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] - ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]] - ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]] - ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] - ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]] - ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]] - ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]] - ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]] - ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]] - ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]] - ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR4]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]] + ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]] + ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]] + ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]] + ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]] ; GFX6: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) @@ -222,62 +192,50 @@ ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] - ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32) - ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] - ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] - ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] - ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]] + ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]] ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]] ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]] - ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]] - ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] - ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] - ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]] - ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]] - ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] - ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]] - ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]] - ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX8: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]] - ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]] - ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]] - ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]] - ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR4]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]] + ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]] + ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]] + ; GFX8: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]] + ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]] ; GFX8: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) @@ -295,62 +253,50 @@ ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] - ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[UV1]], [[C]](s32) ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[UV3]], [[C]](s32) - ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] - ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] - ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] - ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[ASHR2]] + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[ASHR3]] + ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]] + ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]] ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]] ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]] - ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]] - ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL2]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C2]] - ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] - ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] - ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]] - ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]] - ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] - ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]] - ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]] - ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]] - ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]] - ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]] - ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]] - ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR4]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]] + ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]] + ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]] + ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]] + ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]] ; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SUB4]](s32), [[SUB9]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) @@ -1897,35 +1843,29 @@ ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] - ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] - ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX6: $vgpr0 = COPY [[AND1]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX6: $vgpr0 = COPY [[AND]](s32) ; GFX8-LABEL: name: test_srem_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1942,35 +1882,29 @@ ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] - ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX8: $vgpr0 = COPY [[AND1]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX8: $vgpr0 = COPY [[AND]](s32) ; GFX9-LABEL: name: test_srem_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1987,35 +1921,29 @@ ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] - ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) - ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] - ; GFX9: $vgpr0 = COPY [[AND1]](s32) + ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C3]] + ; GFX9: $vgpr0 = COPY [[AND]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -2052,30 +1980,24 @@ ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] - ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] - ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX6: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16 @@ -2083,43 +2005,37 @@ ; GFX6: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16 ; GFX6: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) ; GFX6: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32) - ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] - ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] - ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] - ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX6: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]] + ; GFX6: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]] ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]] ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]] - ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] - ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] - ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX6: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] - ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]] - ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]] - ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] - ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]] - ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]] - ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]] - ; GFX6: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]] - ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]] - ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]] - ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR4]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]] + ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]] + ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]] + ; GFX6: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]] + ; GFX6: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]] ; GFX6: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: test_srem_v2s16 @@ -2143,30 +2059,24 @@ ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] - ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX8: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16 @@ -2174,43 +2084,37 @@ ; GFX8: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16 ; GFX8: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) ; GFX8: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32) - ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] - ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] - ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] - ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX8: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]] + ; GFX8: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]] ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]] ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]] - ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] - ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] - ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX8: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] - ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]] - ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]] - ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] - ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]] - ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]] - ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX8: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]] - ; GFX8: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]] - ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]] - ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]] - ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR4]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]] + ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]] + ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]] + ; GFX8: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]] + ; GFX8: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]] ; GFX8: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] + ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C5]] ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32) - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL]] + ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C5]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_srem_v2s16 @@ -2234,30 +2138,24 @@ ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] - ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; GFX9: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16 @@ -2265,35 +2163,29 @@ ; GFX9: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16 ; GFX9: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG2]], [[C1]](s32) ; GFX9: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[SEXT_INREG3]], [[C1]](s32) - ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] - ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] - ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR2]] - ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD5]], [[ASHR3]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG2]], [[ASHR2]] + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[SEXT_INREG3]], [[ASHR3]] + ; GFX9: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[ADD3]], [[ASHR2]] + ; GFX9: [[XOR4:%[0-9]+]]:_(s32) = G_XOR [[ADD4]], [[ASHR3]] ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[XOR4]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]] ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[XOR4]] - ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[XOR4]] - ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] - ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB5]], [[MUL2]] - ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX9: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD6]], [[SUB6]] - ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[XOR3]] - ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[XOR4]] - ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] - ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB7]](s32), [[XOR4]] - ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR3]](s32), [[MUL3]] - ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB7]], [[XOR4]] - ; GFX9: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[SUB7]], [[XOR4]] - ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB8]], [[SUB7]] - ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD7]] - ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT7]], [[ASHR2]] + ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[XOR4]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB5]], [[FPTOUI1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[XOR3]], [[ADD5]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[XOR4]] + ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[XOR3]], [[MUL3]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[XOR4]] + ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[XOR4]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB7]], [[SUB6]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[XOR4]] + ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[XOR4]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB8]], [[SELECT2]] + ; GFX9: [[XOR5:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR2]] ; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[XOR5]], [[ASHR2]] ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SUB9]](s32) @@ -2327,30 +2219,24 @@ ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] - ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] - ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) ; GFX6: $vgpr0 = COPY [[COPY4]](s32) @@ -2370,30 +2256,24 @@ ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] - ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) ; GFX8: $vgpr0 = COPY [[COPY4]](s32) @@ -2413,30 +2293,24 @@ ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] - ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) ; GFX9: $vgpr0 = COPY [[COPY4]](s32) @@ -2471,30 +2345,24 @@ ; GFX6: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] - ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] - ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX6: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) ; GFX6: $vgpr0 = COPY [[COPY4]](s32) @@ -2514,30 +2382,24 @@ ; GFX8: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] - ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX8: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) ; GFX8: $vgpr0 = COPY [[COPY4]](s32) @@ -2557,30 +2419,24 @@ ; GFX9: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[ADD1]], [[ASHR1]] ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[XOR1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[XOR1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[XOR1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD2]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[XOR]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[XOR1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[XOR1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[XOR]](s32), [[MUL1]] - ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[XOR1]] - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[XOR1]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD3]] - ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT3]], [[ASHR]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[XOR1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[XOR]], [[ADD2]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[XOR1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[XOR]], [[MUL1]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[XOR1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[XOR1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[XOR1]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[XOR1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX9: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[SELECT1]], [[ASHR]] ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[XOR2]], [[ASHR]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SUB4]](s32) ; GFX9: $vgpr0 = COPY [[COPY4]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir @@ -14,91 +14,79 @@ ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[COPY1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[COPY1]] ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[COPY]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[COPY1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[COPY1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[MUL1]] - ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C2]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C2]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD1]], [[UMULH2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] - ; GFX6: $vgpr0 = COPY [[SELECT3]](s32) + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[COPY1]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX6: $vgpr0 = COPY [[SELECT2]](s32) ; GFX8-LABEL: name: test_udiv_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[COPY1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[COPY1]] ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[COPY]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[COPY1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[COPY1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[MUL1]] - ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C2]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C2]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD1]], [[UMULH2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] - ; GFX8: $vgpr0 = COPY [[SELECT3]](s32) + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[COPY1]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX8: $vgpr0 = COPY [[SELECT2]](s32) ; GFX9-LABEL: name: test_udiv_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[COPY1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[COPY1]] ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[COPY]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[COPY1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[COPY1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[MUL1]] - ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C2]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C2]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD1]], [[UMULH2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] - ; GFX9: $vgpr0 = COPY [[SELECT3]](s32) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[COPY1]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX9: $vgpr0 = COPY [[SELECT2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_UDIV %0, %1 @@ -118,55 +106,47 @@ ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV2]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV2]] ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[UV]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[UV2]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[UV2]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV]](s32), [[MUL1]] - ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C2]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C2]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD1]], [[UMULH2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[UV2]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]] ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV3]] - ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV3]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL2]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C1]] - ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] - ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] - ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[UV1]] - ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[UV3]] - ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] - ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[UV3]] - ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV1]](s32), [[MUL3]] - ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C2]] - ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C2]] - ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[ADD3]], [[UMULH5]] - ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB7]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT3]](s32), [[SELECT7]](s32) + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD3]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[UV3]] + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] + ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[UV3]] + ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[UV3]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C2]] + ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32) ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: test_udiv_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -175,55 +155,47 @@ ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV2]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV2]] ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[UV]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[UV2]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[UV2]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV]](s32), [[MUL1]] - ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C2]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C2]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD1]], [[UMULH2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[UV2]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]] ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV3]] - ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV3]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL2]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C1]] - ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] - ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] - ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[UV1]] - ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[UV3]] - ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] - ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[UV3]] - ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV1]](s32), [[MUL3]] - ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C2]] - ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C2]] - ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[ADD3]], [[UMULH5]] - ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB7]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT3]](s32), [[SELECT7]](s32) + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD3]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[UV3]] + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C2]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] + ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[UV3]] + ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[UV3]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C2]] + ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32) ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_udiv_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -232,55 +204,47 @@ ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV2]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV2]] ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[UV]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[UV2]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[UV2]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV]](s32), [[MUL1]] - ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C2]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C2]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[ADD1]], [[UMULH2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[UV2]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C2]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]] ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV3]] - ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV3]] - ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL2]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C1]] - ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] - ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] - ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[UV1]] - ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[UV3]] - ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] - ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[UV3]] - ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV1]](s32), [[MUL3]] - ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C2]] - ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C2]] - ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[ADD3]], [[UMULH5]] - ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB7]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT3]](s32), [[SELECT7]](s32) + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD3]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[UV3]] + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C2]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] + ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[UV3]] + ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[UV3]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C2]] + ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT2]](s32), [[SELECT5]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -1693,33 +1657,29 @@ ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX6: $vgpr0 = COPY [[AND3]](s32) + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GFX6: $vgpr0 = COPY [[AND2]](s32) ; GFX8-LABEL: name: test_udiv_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1730,33 +1690,29 @@ ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX8: $vgpr0 = COPY [[AND3]](s32) + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32) + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GFX8: $vgpr0 = COPY [[AND2]](s32) ; GFX9-LABEL: name: test_udiv_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1767,33 +1723,29 @@ ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX9: $vgpr0 = COPY [[AND3]](s32) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GFX9: $vgpr0 = COPY [[AND2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -1824,64 +1776,56 @@ ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND4]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]] ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[AND4]] - ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[AND4]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] - ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] - ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] - ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[AND3]] - ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[AND4]] - ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[MUL3]] - ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[AND4]] - ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND3]](s32), [[MUL3]] - ; GFX6: [[AND5:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]] - ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]] - ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s1), [[ADD3]], [[UMULH5]] - ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB7]] - ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) - ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT7]](s32) - ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD3]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[AND3]] + ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] + ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[AND3]] + ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[AND3]] + ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] + ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32) + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT5]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: test_udiv_v2s16 @@ -1899,64 +1843,56 @@ ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND4]](s32) + ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]] ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[AND4]] - ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[AND4]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] - ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] - ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] - ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[AND3]] - ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[AND4]] - ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[MUL3]] - ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[AND4]] - ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND3]](s32), [[MUL3]] - ; GFX8: [[AND5:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]] - ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]] - ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s1), [[ADD3]], [[UMULH5]] - ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB7]] - ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) - ; GFX8: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT7]](s32) - ; GFX8: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD3]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[AND3]] + ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] + ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[AND3]] + ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[AND3]] + ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] + ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] + ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32) + ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT5]](s32) + ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_udiv_v2s16 @@ -1974,60 +1910,52 @@ ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C4]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C4]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND4]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]] ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[AND4]] - ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[AND4]] - ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] - ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] - ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] - ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[AND3]] - ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[AND4]] - ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[MUL3]] - ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[AND4]] - ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND3]](s32), [[MUL3]] - ; GFX9: [[AND5:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]] - ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]] - ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s1), [[ADD3]], [[UMULH5]] - ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB7]] - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT7]](s32) + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB3]], [[FPTOUI1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD3]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB4]](s32), [[AND3]] + ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UMULH3]], [[C4]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[ADD4]], [[UMULH3]] + ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[SUB4]], [[AND3]] + ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB5]], [[SUB4]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT4]](s32), [[AND3]] + ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[SELECT3]], [[C4]] + ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD5]], [[SELECT3]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT5]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -2052,31 +1980,27 @@ ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32) ; GFX6: $vgpr0 = COPY [[COPY4]](s32) ; GFX8-LABEL: name: test_udiv_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -2088,31 +2012,27 @@ ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32) ; GFX8: $vgpr0 = COPY [[COPY4]](s32) ; GFX9-LABEL: name: test_udiv_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -2124,31 +2044,27 @@ ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32) ; GFX9: $vgpr0 = COPY [[COPY4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -2175,31 +2091,27 @@ ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32) ; GFX6: $vgpr0 = COPY [[COPY4]](s32) ; GFX8-LABEL: name: test_udiv_s17 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -2211,31 +2123,27 @@ ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32) ; GFX8: $vgpr0 = COPY [[COPY4]](s32) ; GFX9-LABEL: name: test_udiv_s17 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -2247,31 +2155,27 @@ ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[ADD1]], [[UMULH2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH1]], [[C3]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD1]], [[UMULH1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT1]](s32), [[AND1]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C3]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[ADD2]], [[SELECT]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT2]](s32) ; GFX9: $vgpr0 = COPY [[COPY4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir @@ -14,88 +14,70 @@ ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[COPY1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[COPY1]] ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[COPY]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[COPY1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[COPY1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[MUL1]] - ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[COPY1]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[COPY1]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] - ; GFX6: $vgpr0 = COPY [[SELECT3]](s32) + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY1]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX6: $vgpr0 = COPY [[SELECT1]](s32) ; GFX8-LABEL: name: test_urem_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[COPY1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[COPY1]] ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[COPY]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[COPY1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[COPY1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[MUL1]] - ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[COPY1]] - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[COPY1]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] - ; GFX8: $vgpr0 = COPY [[SELECT3]](s32) + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY1]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX8: $vgpr0 = COPY [[SELECT1]](s32) ; GFX9-LABEL: name: test_urem_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[COPY1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[COPY1]] ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[COPY]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[COPY1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[COPY1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[MUL1]] - ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[COPY1]] - ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[COPY1]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] - ; GFX9: $vgpr0 = COPY [[SELECT3]](s32) + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[COPY1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[COPY]], [[ADD]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[COPY1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[COPY1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[COPY1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[COPY1]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[COPY1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX9: $vgpr0 = COPY [[SELECT1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_UREM %0, %1 @@ -115,54 +97,42 @@ ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV2]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV2]] ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[UV]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[UV2]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[UV2]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV]](s32), [[MUL1]] - ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[UV2]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[UV2]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[UV2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[UV2]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV3]] - ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV3]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL2]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C1]] - ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] - ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] - ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[UV1]] - ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[UV3]] - ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] - ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[UV3]] - ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV1]](s32), [[MUL3]] - ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[UV3]] - ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB6]], [[UV3]] - ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB7]], [[SUB6]] - ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD3]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT3]](s32), [[SELECT7]](s32) + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD1]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]] + ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[UV3]] + ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[UV3]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[UV3]] + ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[UV3]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: test_urem_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -171,54 +141,42 @@ ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV2]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV2]] ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[UV]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[UV2]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[UV2]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV]](s32), [[MUL1]] - ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[UV2]] - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[UV2]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[UV2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[UV2]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV3]] - ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV3]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL2]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C1]] - ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] - ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] - ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[UV1]] - ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[UV3]] - ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] - ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[UV3]] - ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV1]](s32), [[MUL3]] - ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[UV3]] - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB6]], [[UV3]] - ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB7]], [[SUB6]] - ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT3]](s32), [[SELECT7]](s32) + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD1]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]] + ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[UV3]] + ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[UV3]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[UV3]] + ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[UV3]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_urem_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -227,54 +185,42 @@ ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV2]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV2]] ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[UV]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[UV2]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[UV2]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV]](s32), [[MUL1]] - ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[UV2]] - ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[UV2]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV2]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[ADD]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[UV2]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[UV2]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[UV2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[UV2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[UV2]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV3]] - ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV3]] - ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL2]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C1]] - ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] - ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] - ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[UV1]] - ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[UV3]] - ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] - ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[UV3]] - ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV1]](s32), [[MUL3]] - ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[UV3]] - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB6]], [[UV3]] - ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB7]], [[SUB6]] - ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT3]](s32), [[SELECT7]](s32) + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[UV3]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[ADD1]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[UV3]] + ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[UV3]] + ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[UV3]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[UV3]] + ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[UV3]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT1]](s32), [[SELECT3]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -1627,32 +1573,26 @@ ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX6: $vgpr0 = COPY [[AND3]](s32) + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GFX6: $vgpr0 = COPY [[AND2]](s32) ; GFX8-LABEL: name: test_urem_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1663,32 +1603,26 @@ ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX8: $vgpr0 = COPY [[AND3]](s32) + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GFX8: $vgpr0 = COPY [[AND2]](s32) ; GFX9-LABEL: name: test_urem_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -1699,32 +1633,26 @@ ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] - ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX9: $vgpr0 = COPY [[AND3]](s32) + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GFX9: $vgpr0 = COPY [[AND2]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -1755,63 +1683,51 @@ ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND4]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]] ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[AND4]] - ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[AND4]] - ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] - ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] - ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] - ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] - ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[AND3]] - ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[AND4]] - ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[MUL3]] - ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[AND4]] - ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND3]](s32), [[MUL3]] - ; GFX6: [[AND5:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[AND4]] - ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB6]], [[AND4]] - ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s1), [[SUB7]], [[SUB6]] - ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD3]] - ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) - ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT7]](s32) - ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD1]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] + ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[AND3]] + ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND3]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[AND3]] + ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[AND3]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: test_urem_v2s16 @@ -1829,63 +1745,51 @@ ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND4]](s32) + ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]] ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[AND4]] - ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[AND4]] - ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] - ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] - ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] - ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] - ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[AND3]] - ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[AND4]] - ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[MUL3]] - ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[AND4]] - ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND3]](s32), [[MUL3]] - ; GFX8: [[AND5:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[AND4]] - ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB6]], [[AND4]] - ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s1), [[SUB7]], [[SUB6]] - ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD3]] - ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) - ; GFX8: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT7]](s32) - ; GFX8: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD1]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] + ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[AND3]] + ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND3]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[AND3]] + ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[AND3]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] + ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_urem_v2s16 @@ -1903,59 +1807,47 @@ ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] - ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND4]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND3]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) - ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]] ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) - ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[AND4]] - ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[AND4]] - ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] - ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] - ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] - ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] - ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] - ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] - ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] - ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[AND3]] - ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[AND4]] - ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[MUL3]] - ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[AND4]] - ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND3]](s32), [[MUL3]] - ; GFX9: [[AND5:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] - ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[AND4]] - ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB6]], [[AND4]] - ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s1), [[SUB7]], [[SUB6]] - ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD3]] - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT7]](s32) + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[AND3]] + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[SUB4]], [[FPTOUI1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[MUL2]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH2]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[AND2]], [[ADD1]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH3]], [[AND3]] + ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[AND2]], [[MUL3]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB5]](s32), [[AND3]] + ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB5]], [[AND3]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SUB6]], [[SUB5]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT2]](s32), [[AND3]] + ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SELECT2]], [[AND3]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB7]], [[SELECT2]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -1980,30 +1872,24 @@ ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) ; GFX6: $vgpr0 = COPY [[COPY4]](s32) ; GFX8-LABEL: name: test_urem_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -2015,30 +1901,24 @@ ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) ; GFX8: $vgpr0 = COPY [[COPY4]](s32) ; GFX9-LABEL: name: test_urem_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -2050,30 +1930,24 @@ ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] - ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) ; GFX9: $vgpr0 = COPY [[COPY4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -2100,30 +1974,24 @@ ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] - ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] - ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] - ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) ; GFX6: $vgpr0 = COPY [[COPY4]](s32) ; GFX8-LABEL: name: test_urem_s17 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -2135,30 +2003,24 @@ ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] - ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] - ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] - ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) ; GFX8: $vgpr0 = COPY [[COPY4]](s32) ; GFX9-LABEL: name: test_urem_s17 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -2170,30 +2032,24 @@ ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41EFFFFFC0000000 ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) - ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] - ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] - ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] - ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] - ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] - ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] - ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] - ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] - ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] - ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] - ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] - ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[AND1]] + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[SUB]], [[FPTOUI]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[MUL]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[AND]], [[ADD]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH1]], [[AND1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB1]](s32), [[AND1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[AND1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB2]], [[SUB1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SELECT]](s32), [[AND1]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SELECT]], [[AND1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[SUB3]], [[SELECT]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) ; GFX9: $vgpr0 = COPY [[COPY4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll @@ -15,28 +15,24 @@ ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v1 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v1 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GISEL-NEXT: v_mul_lo_u32 v5, v4, v1 -; GISEL-NEXT: v_mul_hi_u32 v6, v4, v1 -; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v5 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GISEL-NEXT: v_mul_hi_u32 v5, v5, v4 -; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v4, v5 -; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], v4, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0 +; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 +; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 ; GISEL-NEXT: v_mul_lo_u32 v5, v4, v1 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4 -; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, 1, v4 -; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v5 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v1 -; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v6, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v4 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc ; GISEL-NEXT: v_xor_b32_e32 v1, v2, v3 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v1 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 @@ -96,31 +92,27 @@ ; GISEL-NEXT: s_ashr_i32 s3, s1, 31 ; GISEL-NEXT: s_add_i32 s0, s0, s2 ; GISEL-NEXT: s_add_i32 s1, s1, s3 -; GISEL-NEXT: s_xor_b32 s4, s0, s2 -; GISEL-NEXT: s_xor_b32 s5, s1, s3 -; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s5 +; GISEL-NEXT: s_xor_b32 s0, s0, s2 +; GISEL-NEXT: s_xor_b32 s4, s1, s3 +; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s4 +; GISEL-NEXT: s_sub_i32 s1, 0, s4 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GISEL-NEXT: v_mul_lo_u32 v1, v0, s5 -; GISEL-NEXT: v_mul_hi_u32 v2, v0, s5 -; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GISEL-NEXT: v_mul_hi_u32 v1, v1, v0 -; GISEL-NEXT: v_add_i32_e64 v2, s[0:1], v0, v1 -; GISEL-NEXT: v_sub_i32_e64 v0, s[0:1], v0, v1 +; GISEL-NEXT: v_mul_lo_u32 v1, s1, v0 +; GISEL-NEXT: v_mul_hi_u32 v1, v0, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GISEL-NEXT: v_mul_hi_u32 v0, s0, v0 +; GISEL-NEXT: v_mul_lo_u32 v1, v0, s4 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, s0, v1 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GISEL-NEXT: v_mul_hi_u32 v0, v0, s4 -; GISEL-NEXT: v_mul_lo_u32 v1, v0, s5 +; GISEL-NEXT: v_subrev_i32_e64 v2, s[0:1], s4, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v0 -; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 1, v0 -; GISEL-NEXT: v_sub_i32_e32 v4, vcc, s4, v1 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, s4, v1 -; GISEL-NEXT: v_cmp_le_u32_e64 s[0:1], s5, v4 -; GISEL-NEXT: s_and_b64 s[0:1], s[0:1], vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GISEL-NEXT: s_xor_b32 s0, s2, s3 ; GISEL-NEXT: v_xor_b32_e32 v0, s0, v0 ; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0 @@ -192,51 +184,43 @@ ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 -; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2 -; GISEL-NEXT: v_mul_lo_u32 v10, v5, v3 -; GISEL-NEXT: v_mul_hi_u32 v11, v5, v3 -; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v6 -; GISEL-NEXT: v_sub_i32_e32 v13, vcc, 0, v10 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v12, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v7, v10, v13, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4 -; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5 -; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v4, v6 -; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7 -; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0 -; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 +; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 +; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 +; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7 +; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 +; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 ; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4 -; GISEL-NEXT: v_subrev_i32_e32 v10, vcc, 1, v4 -; GISEL-NEXT: v_mul_lo_u32 v11, v5, v3 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v5 -; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, 1, v5 -; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v0, v6 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6 -; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v11 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v11 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v14, v2 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3 -; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v7, s[6:7] -; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v1, v5, v12, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v10, v5, v3 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v5 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v10 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v8 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v9 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 @@ -329,31 +313,27 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_movk_i32 s6, 0x1000 ; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; CHECK-NEXT: v_mov_b32_e32 v2, 0xfffff000 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 +; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 -; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_lshlrev_b32_e32 v3, 12, v2 -; CHECK-NEXT: v_mul_hi_u32 v4, v2, s6 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2 -; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 -; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; CHECK-NEXT: v_mul_hi_u32 v2, v2, v0 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_lo_u32 v2, v2, v3 +; CHECK-NEXT: v_mul_hi_u32 v2, v3, v2 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 ; CHECK-NEXT: v_lshlrev_b32_e32 v3, 12, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v2 -; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2 -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v3 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v6 -; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v2, v4, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; CHECK-NEXT: v_subrev_i32_e64 v3, s[4:5], s6, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v2 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -370,52 +350,43 @@ ; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s8 +; GISEL-NEXT: s_sub_i32 s4, 0, s8 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v4 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v4 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 ; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GISEL-NEXT: v_mul_lo_u32 v6, v5, s8 -; GISEL-NEXT: v_mul_hi_u32 v7, v5, s8 -; GISEL-NEXT: v_mul_lo_u32 v8, v4, s8 -; GISEL-NEXT: v_mul_hi_u32 v9, v4, s8 -; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v6, v6, v5 -; GISEL-NEXT: v_mul_hi_u32 v7, v7, v4 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v5, v6 -; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v6 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v4, v7 -; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v5, v5, v0 -; GISEL-NEXT: v_mul_hi_u32 v4, v4, v1 +; GISEL-NEXT: v_mul_lo_u32 v6, s4, v5 +; GISEL-NEXT: v_mul_lo_u32 v7, s4, v4 +; GISEL-NEXT: v_mul_hi_u32 v6, v5, v6 +; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5 +; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4 ; GISEL-NEXT: v_mul_lo_u32 v6, v5, s8 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5 -; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, 1, v5 -; GISEL-NEXT: v_mul_lo_u32 v9, v4, s8 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v4 -; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, 1, v4 -; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v0, v6 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6 -; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v9 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v9 -; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v12 -; GISEL-NEXT: v_cmp_le_u32_e64 s[8:9], s8, v0 -; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v5, v7, s[6:7] -; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v1, v4, v10, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v1, v11, v1, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v8, v4, s8 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v4 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v8 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc +; GISEL-NEXT: v_subrev_i32_e64 v6, s[4:5], s8, v0 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v9, s[4:5] +; GISEL-NEXT: v_subrev_i32_e64 v7, s[6:7], s8, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v6, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v7, vcc ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 @@ -428,61 +399,53 @@ ; CGP-NEXT: s_movk_i32 s4, 0x1000 ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0 ; CGP-NEXT: v_mov_b32_e32 v3, 0x1000 -; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; CGP-NEXT: s_mov_b32 s5, 0xfffff000 +; CGP-NEXT: v_mov_b32_e32 v4, 0xfffff000 +; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, s4 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 -; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v6, s4 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 ; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; CGP-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 -; CGP-NEXT: v_mul_f32_e32 v6, 0x4f800000, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v7 +; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v5 -; CGP-NEXT: v_mul_hi_u32 v8, v5, s4 -; CGP-NEXT: v_lshlrev_b32_e32 v9, 12, v6 -; CGP-NEXT: v_mul_hi_u32 v10, v6, v3 -; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v7 -; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v9 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 -; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 -; CGP-NEXT: v_cndmask_b32_e64 v8, v9, v12, s[4:5] -; CGP-NEXT: v_mul_hi_u32 v7, v7, v5 -; CGP-NEXT: v_mul_hi_u32 v8, v8, v6 -; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v5, v7 -; CGP-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7 -; CGP-NEXT: v_add_i32_e64 v7, s[6:7], v6, v8 -; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v6, v8 -; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; CGP-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5] -; CGP-NEXT: v_mul_hi_u32 v5, v5, v0 -; CGP-NEXT: v_mul_hi_u32 v6, v6, v1 -; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v5 -; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v5 -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, 1, v5 -; CGP-NEXT: v_lshlrev_b32_e32 v10, 12, v6 -; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v6 -; CGP-NEXT: v_subrev_i32_e32 v12, vcc, 1, v6 -; CGP-NEXT: v_sub_i32_e32 v13, vcc, v0, v7 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v7 -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v10 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v10 -; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v13, v3 -; CGP-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3 -; CGP-NEXT: s_and_b64 s[6:7], s[6:7], vcc -; CGP-NEXT: v_cndmask_b32_e64 v0, v5, v8, s[6:7] -; CGP-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] -; CGP-NEXT: v_cndmask_b32_e64 v1, v6, v11, s[6:7] -; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v0, vcc -; CGP-NEXT: v_cndmask_b32_e64 v1, v12, v1, s[4:5] +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_mul_lo_u32 v8, s5, v6 +; CGP-NEXT: v_mul_lo_u32 v4, v4, v7 +; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 +; CGP-NEXT: v_mul_hi_u32 v4, v7, v4 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; CGP-NEXT: v_mul_hi_u32 v6, v0, v6 +; CGP-NEXT: v_mul_hi_u32 v4, v1, v4 +; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v6 +; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v6 +; CGP-NEXT: v_lshlrev_b32_e32 v9, 12, v4 +; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v4 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v9 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CGP-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc +; CGP-NEXT: v_subrev_i32_e64 v7, s[4:5], s4, v0 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 +; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v8, s[6:7], v1, v3 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v6 +; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[4:5] +; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v0, v6, v7, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v8, vcc ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 ; CGP-NEXT: s_setpc_b64 s[30:31] %result = sdiv <2 x i32> %num, ret <2 x i32> %result @@ -494,31 +457,27 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb ; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 +; CHECK-NEXT: v_mov_b32_e32 v2, 0xffed2705 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 +; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 -; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_lo_u32 v3, v2, s6 -; CHECK-NEXT: v_mul_hi_u32 v4, v2, s6 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2 -; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 -; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; CHECK-NEXT: v_mul_hi_u32 v2, v2, v0 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_mul_lo_u32 v2, v2, v3 +; CHECK-NEXT: v_mul_hi_u32 v2, v3, v2 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 ; CHECK-NEXT: v_mul_lo_u32 v3, v2, s6 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v2 -; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2 -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v3 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v6 -; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v2, v4, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; CHECK-NEXT: v_subrev_i32_e64 v3, s[4:5], s6, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v2 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -535,52 +494,43 @@ ; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s8 +; GISEL-NEXT: s_sub_i32 s4, 0, s8 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v4 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v4 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 ; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GISEL-NEXT: v_mul_lo_u32 v6, v5, s8 -; GISEL-NEXT: v_mul_hi_u32 v7, v5, s8 -; GISEL-NEXT: v_mul_lo_u32 v8, v4, s8 -; GISEL-NEXT: v_mul_hi_u32 v9, v4, s8 -; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v6, v6, v5 -; GISEL-NEXT: v_mul_hi_u32 v7, v7, v4 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v5, v6 -; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v6 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v4, v7 -; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v5, v5, v0 -; GISEL-NEXT: v_mul_hi_u32 v4, v4, v1 +; GISEL-NEXT: v_mul_lo_u32 v6, s4, v5 +; GISEL-NEXT: v_mul_lo_u32 v7, s4, v4 +; GISEL-NEXT: v_mul_hi_u32 v6, v5, v6 +; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5 +; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4 ; GISEL-NEXT: v_mul_lo_u32 v6, v5, s8 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5 -; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, 1, v5 -; GISEL-NEXT: v_mul_lo_u32 v9, v4, s8 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v4 -; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, 1, v4 -; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v0, v6 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6 -; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v9 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v9 -; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v12 -; GISEL-NEXT: v_cmp_le_u32_e64 s[8:9], s8, v0 -; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v5, v7, s[6:7] -; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v1, v4, v10, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v1, v11, v1, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v8, v4, s8 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v4 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v8 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc +; GISEL-NEXT: v_subrev_i32_e64 v6, s[4:5], s8, v0 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v9, s[4:5] +; GISEL-NEXT: v_subrev_i32_e64 v7, s[6:7], s8, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v5 +; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v6, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v7, vcc ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 @@ -590,64 +540,56 @@ ; CGP-LABEL: v_sdiv_v2i32_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_mov_b32 s8, 0x12d8fb +; CGP-NEXT: s_mov_b32 s4, 0x12d8fb ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0 ; CGP-NEXT: v_mov_b32_e32 v3, 0x12d8fb -; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; CGP-NEXT: s_mov_b32 s5, 0xffed2705 +; CGP-NEXT: v_mov_b32_e32 v4, 0xffed2705 +; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, s8 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 -; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v6, s4 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 ; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; CGP-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 -; CGP-NEXT: v_mul_f32_e32 v6, 0x4f800000, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v7 +; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_mul_lo_u32 v7, v5, s8 -; CGP-NEXT: v_mul_hi_u32 v8, v5, s8 -; CGP-NEXT: v_mul_lo_u32 v9, v6, v3 -; CGP-NEXT: v_mul_hi_u32 v10, v6, v3 -; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v7 -; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v9 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 -; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 -; CGP-NEXT: v_cndmask_b32_e64 v8, v9, v12, s[4:5] -; CGP-NEXT: v_mul_hi_u32 v7, v7, v5 -; CGP-NEXT: v_mul_hi_u32 v8, v8, v6 -; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v5, v7 -; CGP-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7 -; CGP-NEXT: v_add_i32_e64 v7, s[6:7], v6, v8 -; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v6, v8 -; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; CGP-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5] -; CGP-NEXT: v_mul_hi_u32 v5, v5, v0 -; CGP-NEXT: v_mul_hi_u32 v6, v6, v1 -; CGP-NEXT: v_mul_lo_u32 v7, v5, s8 -; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v5 -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, 1, v5 -; CGP-NEXT: v_mul_lo_u32 v10, v6, v3 -; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v6 -; CGP-NEXT: v_subrev_i32_e32 v12, vcc, 1, v6 -; CGP-NEXT: v_sub_i32_e32 v13, vcc, v0, v7 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v7 -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v10 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v10 -; CGP-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v13 -; CGP-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3 -; CGP-NEXT: s_and_b64 s[6:7], s[6:7], vcc -; CGP-NEXT: v_cndmask_b32_e64 v0, v5, v8, s[6:7] -; CGP-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] -; CGP-NEXT: v_cndmask_b32_e64 v1, v6, v11, s[6:7] -; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v0, vcc -; CGP-NEXT: v_cndmask_b32_e64 v1, v12, v1, s[4:5] +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_mul_lo_u32 v8, s5, v6 +; CGP-NEXT: v_mul_lo_u32 v4, v4, v7 +; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 +; CGP-NEXT: v_mul_hi_u32 v4, v7, v4 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; CGP-NEXT: v_mul_hi_u32 v6, v0, v6 +; CGP-NEXT: v_mul_hi_u32 v4, v1, v4 +; CGP-NEXT: v_mul_lo_u32 v7, v6, s4 +; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v6 +; CGP-NEXT: v_mul_lo_u32 v9, v4, v3 +; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v4 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v7 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v9 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CGP-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc +; CGP-NEXT: v_subrev_i32_e64 v7, s[4:5], s4, v0 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 +; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v10, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v8, s[6:7], v1, v3 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc +; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v6 +; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v8, s[4:5] +; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v0, v6, v7, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v8, vcc ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 ; CGP-NEXT: s_setpc_b64 s[30:31] %result = sdiv <2 x i32> %num, ret <2 x i32> %result @@ -665,28 +607,24 @@ ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 ; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v1 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v1 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; CHECK-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; CHECK-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, v4, v1 -; CHECK-NEXT: v_mul_hi_u32 v6, v4, v1 -; CHECK-NEXT: v_sub_i32_e32 v7, vcc, 0, v5 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 -; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; CHECK-NEXT: v_mul_hi_u32 v5, v5, v4 -; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], v4, v5 -; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], v4, v5 -; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; CHECK-NEXT: v_mul_hi_u32 v4, v4, v0 +; CHECK-NEXT: v_mul_lo_u32 v5, v5, v4 +; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; CHECK-NEXT: v_mul_hi_u32 v4, v0, v4 ; CHECK-NEXT: v_mul_lo_u32 v5, v4, v1 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v4 -; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, 1, v4 -; CHECK-NEXT: v_sub_i32_e32 v8, vcc, v0, v5 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v1 -; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v4, v6, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; CHECK-NEXT: v_sub_i32_e64 v5, s[4:5], v0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v4 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc ; CHECK-NEXT: v_xor_b32_e32 v1, v2, v3 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 @@ -718,51 +656,43 @@ ; GISEL-NEXT: v_xor_b32_e32 v2, v2, v6 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 ; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v2 -; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v3 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3 +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v7 -; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f800000, v6 -; GISEL-NEXT: v_mul_f32_e32 v7, 0x4f800000, v7 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GISEL-NEXT: v_mul_lo_u32 v8, v6, v2 -; GISEL-NEXT: v_mul_hi_u32 v9, v6, v2 -; GISEL-NEXT: v_mul_lo_u32 v10, v7, v3 -; GISEL-NEXT: v_mul_hi_u32 v11, v7, v3 -; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v8 -; GISEL-NEXT: v_sub_i32_e32 v13, vcc, 0, v10 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v9, v10, v13, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v8, v8, v6 -; GISEL-NEXT: v_mul_hi_u32 v9, v9, v7 -; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v6, v8 -; GISEL-NEXT: v_sub_i32_e64 v6, s[6:7], v6, v8 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v7, v9 -; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v7, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v6, v6, v0 -; GISEL-NEXT: v_mul_hi_u32 v7, v7, v1 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 +; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8 +; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 +; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9 +; GISEL-NEXT: v_mul_hi_u32 v6, v0, v6 +; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 ; GISEL-NEXT: v_mul_lo_u32 v8, v6, v2 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v6 -; GISEL-NEXT: v_subrev_i32_e32 v10, vcc, 1, v6 -; GISEL-NEXT: v_mul_lo_u32 v11, v7, v3 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v7 -; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, 1, v7 -; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v0, v8 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v8 -; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v11 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v11 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v14, v2 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3 -; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v6, v9, s[6:7] -; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v1, v7, v12, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v10, v7, v3 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v7 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v10 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc +; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v0, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v11, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v9, s[6:7], v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc +; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v6 +; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v7 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v8, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v9, vcc ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 @@ -867,28 +797,24 @@ ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v1 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v1 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GISEL-NEXT: v_mul_lo_u32 v5, v4, v1 -; GISEL-NEXT: v_mul_hi_u32 v6, v4, v1 -; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v5 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GISEL-NEXT: v_mul_hi_u32 v5, v5, v4 -; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v4, v5 -; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], v4, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0 +; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 +; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 ; GISEL-NEXT: v_mul_lo_u32 v5, v4, v1 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4 -; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, 1, v4 -; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v5 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v1 -; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v6, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; GISEL-NEXT: v_sub_i32_e64 v5, s[4:5], v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v4 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc ; GISEL-NEXT: v_xor_b32_e32 v1, v2, v3 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v1 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 @@ -958,51 +884,43 @@ ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 -; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2 -; GISEL-NEXT: v_mul_lo_u32 v10, v5, v3 -; GISEL-NEXT: v_mul_hi_u32 v11, v5, v3 -; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v6 -; GISEL-NEXT: v_sub_i32_e32 v13, vcc, 0, v10 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v12, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v7, v10, v13, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4 -; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5 -; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v4, v6 -; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7 -; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0 -; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 +; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 +; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 +; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7 +; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 +; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 ; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4 -; GISEL-NEXT: v_subrev_i32_e32 v10, vcc, 1, v4 -; GISEL-NEXT: v_mul_lo_u32 v11, v5, v3 -; GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v5 -; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, 1, v5 -; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v0, v6 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6 -; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v11 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v11 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v14, v2 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3 -; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v7, s[6:7] -; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v1, v5, v12, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v1, v13, v1, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v10, v5, v3 +; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v5 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v10 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v11, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v8 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v9 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll @@ -166,29 +166,25 @@ ; CHECK-NEXT: s_cbranch_execz BB0_4 ; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v2 +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v2 +; CHECK-NEXT: v_mov_b32_e32 v5, 0 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_lo_u32 v3, v1, v2 -; CHECK-NEXT: v_mul_hi_u32 v4, v1, v2 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CHECK-NEXT: v_mul_hi_u32 v3, v3, v1 -; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v1, v3 -; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mul_lo_u32 v3, v3, v1 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 ; CHECK-NEXT: v_mul_lo_u32 v3, v1, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v1 -; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v1 -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v3 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v2 -; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v1, v4, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v4, v5, v0, vcc -; CHECK-NEXT: v_mov_b32_e32 v5, 0 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v1, v3, vcc ; CHECK-NEXT: BB0_4: ; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] ; CHECK-NEXT: v_mov_b32_e32 v0, v4 @@ -369,28 +365,24 @@ ; CHECK-NEXT: s_cbranch_scc0 BB1_4 ; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4 +; CHECK-NEXT: s_sub_i32 s0, 0, s4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CHECK-NEXT: v_mul_lo_u32 v1, s0, v0 +; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_mul_hi_u32 v0, s2, v0 ; CHECK-NEXT: v_mul_lo_u32 v1, v0, s4 -; CHECK-NEXT: v_mul_hi_u32 v2, v0, s4 -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 -; CHECK-NEXT: v_add_i32_e64 v2, s[0:1], v0, v1 -; CHECK-NEXT: v_sub_i32_e64 v0, s[0:1], v0, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, s2, v1 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; CHECK-NEXT: v_mul_hi_u32 v0, v0, s2 -; CHECK-NEXT: v_mul_lo_u32 v1, v0, s4 +; CHECK-NEXT: v_subrev_i32_e64 v2, s[0:1], s4, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0 -; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, 1, v0 -; CHECK-NEXT: v_sub_i32_e32 v4, vcc, s2, v1 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, s2, v1 -; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s4, v4 -; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; CHECK-NEXT: BB1_4: ; CHECK-NEXT: v_readfirstlane_b32 s0, v0 ; CHECK-NEXT: s_mov_b32 s1, s0 @@ -860,28 +852,24 @@ ; CGP-NEXT: s_cbranch_execz BB2_4 ; CGP-NEXT: ; %bb.3: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CGP-NEXT: v_mul_lo_u32 v1, v1, v0 +; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 ; CGP-NEXT: v_mul_lo_u32 v1, v0, v4 -; CGP-NEXT: v_mul_hi_u32 v5, v0, v4 -; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v1 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc -; CGP-NEXT: v_mul_hi_u32 v1, v1, v0 -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v0, v1 -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 +; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v0 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v1 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc -; CGP-NEXT: v_mul_hi_u32 v0, v0, v8 -; CGP-NEXT: v_mul_lo_u32 v1, v0, v4 +; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v1, v4 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v0 -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, 1, v0 -; CGP-NEXT: v_sub_i32_e32 v10, vcc, v8, v1 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v1 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v4 -; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v5, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v0, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; CGP-NEXT: v_mov_b32_e32 v1, 0 ; CGP-NEXT: BB2_4: ; CGP-NEXT: s_or_b64 exec, exec, s[6:7] @@ -1043,28 +1031,24 @@ ; CGP-NEXT: s_cbranch_execz BB2_8 ; CGP-NEXT: ; %bb.7: ; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v6 ; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 -; CGP-NEXT: v_mul_lo_u32 v4, v3, v6 -; CGP-NEXT: v_mul_hi_u32 v5, v3, v6 -; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v4, v3 -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CGP-NEXT: v_mul_hi_u32 v3, v3, v2 +; CGP-NEXT: v_mul_lo_u32 v4, v4, v3 +; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 ; CGP-NEXT: v_mul_lo_u32 v4, v3, v6 ; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v3 -; CGP-NEXT: v_subrev_i32_e32 v7, vcc, 1, v3 -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v4 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v6 -; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; CGP-NEXT: v_cndmask_b32_e64 v2, v3, v5, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v2, vcc +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 +; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v2, v6 +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v3 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 +; CGP-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc ; CGP-NEXT: v_mov_b32_e32 v5, 0 ; CGP-NEXT: BB2_8: ; CGP-NEXT: s_or_b64 exec, exec, s[6:7] @@ -2686,28 +2670,24 @@ ; CHECK-NEXT: s_cbranch_execz BB7_4 ; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v4 +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, 0, v4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_lo_u32 v2, v1, v4 -; CHECK-NEXT: v_mul_hi_u32 v3, v1, v4 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc -; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1 -; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v1, v2 -; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1 +; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 ; CHECK-NEXT: v_mul_lo_u32 v2, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1 -; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v1 -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v2 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v4 -; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v1, v3, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v0, vcc +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v1 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc ; CHECK-NEXT: v_mov_b32_e32 v3, 0 ; CHECK-NEXT: BB7_4: ; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] @@ -3180,28 +3160,24 @@ ; CGP-NEXT: s_cbranch_execz BB8_4 ; CGP-NEXT: ; %bb.3: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v10 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CGP-NEXT: v_mul_lo_u32 v1, v1, v0 +; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_mul_hi_u32 v0, v5, v0 ; CGP-NEXT: v_mul_lo_u32 v1, v0, v10 -; CGP-NEXT: v_mul_hi_u32 v4, v0, v10 -; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v1 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc -; CGP-NEXT: v_mul_hi_u32 v1, v1, v0 -; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v0, v1 -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 +; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v0 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v5, v1 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; CGP-NEXT: v_mul_hi_u32 v0, v0, v5 -; CGP-NEXT: v_mul_lo_u32 v1, v0, v10 +; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v1, v10 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v0 -; CGP-NEXT: v_subrev_i32_e32 v6, vcc, 1, v0 -; CGP-NEXT: v_sub_i32_e32 v7, vcc, v5, v1 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v1 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v10 -; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; CGP-NEXT: v_mov_b32_e32 v1, 0 ; CGP-NEXT: BB8_4: ; CGP-NEXT: s_or_b64 exec, exec, s[6:7] @@ -3363,28 +3339,24 @@ ; CGP-NEXT: s_cbranch_execz BB8_8 ; CGP-NEXT: ; %bb.7: ; CGP-NEXT: v_cvt_f32_u32_e32 v3, v8 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v8 ; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 -; CGP-NEXT: v_mul_lo_u32 v4, v3, v8 -; CGP-NEXT: v_mul_hi_u32 v5, v3, v8 -; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v4, v3 -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CGP-NEXT: v_mul_hi_u32 v3, v3, v2 +; CGP-NEXT: v_mul_lo_u32 v4, v4, v3 +; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 ; CGP-NEXT: v_mul_lo_u32 v4, v3, v8 ; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v3 -; CGP-NEXT: v_subrev_i32_e32 v6, vcc, 1, v3 -; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v4 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v8 -; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; CGP-NEXT: v_cndmask_b32_e64 v2, v3, v5, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8 +; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v2, v8 +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v3 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8 +; CGP-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc ; CGP-NEXT: v_mov_b32_e32 v5, 0 ; CGP-NEXT: BB8_8: ; CGP-NEXT: s_or_b64 exec, exec, s[6:7] @@ -3403,29 +3375,25 @@ ; GISEL-NEXT: s_mov_b32 s4, 0xffffff ; GISEL-NEXT: v_and_b32_e32 v1, s4, v2 ; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 ; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 ; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 -; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1 -; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0 +; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 +; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2 ; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v2 -; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2 -; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v3 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v1 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 -; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v2, v4, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v2 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc ; GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll @@ -15,28 +15,22 @@ ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 ; GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1 +; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 ; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 -; GISEL-NEXT: v_mul_lo_u32 v4, v3, v1 -; GISEL-NEXT: v_mul_hi_u32 v5, v3, v1 -; GISEL-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GISEL-NEXT: v_mul_hi_u32 v4, v4, v3 -; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 -; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GISEL-NEXT: v_mul_hi_u32 v3, v3, v0 +; GISEL-NEXT: v_mul_lo_u32 v4, v4, v3 +; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3 ; GISEL-NEXT: v_mul_lo_u32 v3, v3, v1 -; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v3 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1 -; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v4, v1 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 -; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v4, v1 -; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 ; GISEL-NEXT: s_setpc_b64 s[30:31] @@ -88,37 +82,31 @@ define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) { ; GISEL-LABEL: s_srem_i32: ; GISEL: ; %bb.0: -; GISEL-NEXT: s_ashr_i32 s4, s0, 31 -; GISEL-NEXT: s_ashr_i32 s2, s1, 31 -; GISEL-NEXT: s_add_i32 s0, s0, s4 -; GISEL-NEXT: s_add_i32 s1, s1, s2 -; GISEL-NEXT: s_xor_b32 s3, s0, s4 -; GISEL-NEXT: s_xor_b32 s2, s1, s2 -; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s2 +; GISEL-NEXT: s_ashr_i32 s2, s0, 31 +; GISEL-NEXT: s_ashr_i32 s3, s1, 31 +; GISEL-NEXT: s_add_i32 s0, s0, s2 +; GISEL-NEXT: s_add_i32 s1, s1, s3 +; GISEL-NEXT: s_xor_b32 s0, s0, s2 +; GISEL-NEXT: s_xor_b32 s1, s1, s3 +; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s1 +; GISEL-NEXT: s_sub_i32 s3, 0, s1 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GISEL-NEXT: v_mul_lo_u32 v1, v0, s2 -; GISEL-NEXT: v_mul_hi_u32 v2, v0, s2 -; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GISEL-NEXT: v_mul_hi_u32 v1, v1, v0 -; GISEL-NEXT: v_add_i32_e64 v2, s[0:1], v0, v1 -; GISEL-NEXT: v_sub_i32_e64 v0, s[0:1], v0, v1 -; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GISEL-NEXT: v_mul_hi_u32 v0, v0, s3 -; GISEL-NEXT: v_mul_lo_u32 v0, v0, s2 -; GISEL-NEXT: v_sub_i32_e32 v1, vcc, s3, v0 -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s2, v1 -; GISEL-NEXT: v_add_i32_e64 v2, s[0:1], s2, v1 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[0:1], s3, v0 -; GISEL-NEXT: v_subrev_i32_e64 v0, s[2:3], s2, v1 -; GISEL-NEXT: s_and_b64 vcc, vcc, s[0:1] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] -; GISEL-NEXT: v_xor_b32_e32 v0, s4, v0 -; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s4, v0 +; GISEL-NEXT: v_mul_lo_u32 v1, s3, v0 +; GISEL-NEXT: v_mul_hi_u32 v1, v0, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GISEL-NEXT: v_mul_hi_u32 v0, s0, v0 +; GISEL-NEXT: v_mul_lo_u32 v0, v0, s1 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 +; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GISEL-NEXT: v_xor_b32_e32 v0, s2, v0 +; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0 ; GISEL-NEXT: v_readfirstlane_b32 s0, v0 ; GISEL-NEXT: ; return to shader part epilog ; @@ -182,51 +170,39 @@ ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 ; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v2 -; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v3 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3 +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v7 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 -; GISEL-NEXT: v_mul_f32_e32 v7, 0x4f800000, v7 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5 +; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 ; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GISEL-NEXT: v_mul_lo_u32 v8, v5, v2 -; GISEL-NEXT: v_mul_hi_u32 v9, v5, v2 -; GISEL-NEXT: v_mul_lo_u32 v10, v7, v3 -; GISEL-NEXT: v_mul_hi_u32 v11, v7, v3 -; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v8 -; GISEL-NEXT: v_sub_i32_e32 v13, vcc, 0, v10 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v9, v10, v13, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v8, v8, v5 -; GISEL-NEXT: v_mul_hi_u32 v9, v9, v7 -; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v5, v8 -; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v8 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v7, v9 -; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v7, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v5, v5, v0 -; GISEL-NEXT: v_mul_hi_u32 v7, v7, v1 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 +; GISEL-NEXT: v_mul_lo_u32 v7, v7, v5 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8 +; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7 +; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9 +; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5 +; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v2 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3 -; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v5 -; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v1, v7 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v2 -; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v8, v2 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 -; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v8, v2 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v3 -; GISEL-NEXT: v_add_i32_e64 v2, s[8:9], v9, v3 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v7 -; GISEL-NEXT: v_sub_i32_e64 v1, s[10:11], v9, v3 -; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc -; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9] -; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v10, v0, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[8:9] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 @@ -311,34 +287,27 @@ ; CHECK-LABEL: v_srem_i32_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s6, 0x1000 +; CHECK-NEXT: s_movk_i32 s4, 0x1000 ; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; CHECK-NEXT: v_mov_b32_e32 v2, 0x1000 +; CHECK-NEXT: v_mov_b32_e32 v2, 0xfffff000 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6 +; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s4 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 -; CHECK-NEXT: v_lshlrev_b32_e32 v4, 12, v3 -; CHECK-NEXT: v_mul_hi_u32 v5, v3, s6 -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; CHECK-NEXT: v_mul_hi_u32 v4, v4, v3 -; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 -; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0 -; CHECK-NEXT: v_lshlrev_b32_e32 v3, 12, v3 -; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v3 -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v4 -; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v4, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 -; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v4, v2 -; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] +; CHECK-NEXT: v_mul_lo_u32 v2, v2, v3 +; CHECK-NEXT: v_mul_hi_u32 v2, v3, v2 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 +; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v2 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -351,56 +320,43 @@ ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 -; GISEL-NEXT: s_add_i32 s10, 0x1000, 0 +; GISEL-NEXT: s_add_i32 s4, 0x1000, 0 ; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s10 +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s4 +; GISEL-NEXT: s_sub_i32 s5, 0, s4 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v4 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v4 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 ; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GISEL-NEXT: v_mul_lo_u32 v6, v5, s10 -; GISEL-NEXT: v_mul_hi_u32 v7, v5, s10 -; GISEL-NEXT: v_mul_lo_u32 v8, v4, s10 -; GISEL-NEXT: v_mul_hi_u32 v9, v4, s10 -; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v6, v6, v5 -; GISEL-NEXT: v_mul_hi_u32 v7, v7, v4 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v5, v6 -; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v6 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v4, v7 -; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v5, v5, v0 -; GISEL-NEXT: v_mul_hi_u32 v4, v4, v1 -; GISEL-NEXT: v_mul_lo_u32 v5, v5, s10 -; GISEL-NEXT: v_mul_lo_u32 v4, v4, s10 -; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v5 -; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v4 -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s10, v6 -; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], s10, v6 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 -; GISEL-NEXT: v_subrev_i32_e64 v0, s[6:7], s10, v6 -; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s10, v7 -; GISEL-NEXT: v_add_i32_e64 v5, s[8:9], s10, v7 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v4 -; GISEL-NEXT: v_subrev_i32_e64 v1, s[10:11], s10, v7 -; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc -; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9] -; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[8:9] +; GISEL-NEXT: v_mul_lo_u32 v6, s5, v5 +; GISEL-NEXT: v_mul_lo_u32 v7, s5, v4 +; GISEL-NEXT: v_mul_hi_u32 v6, v5, v6 +; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5 +; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4 +; GISEL-NEXT: v_mul_lo_u32 v5, v5, s4 +; GISEL-NEXT: v_mul_lo_u32 v4, v4, s4 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 +; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0 +; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0 +; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 @@ -413,61 +369,49 @@ ; CGP-NEXT: s_movk_i32 s4, 0x1000 ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0 ; CGP-NEXT: v_mov_b32_e32 v3, 0x1000 -; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; CGP-NEXT: s_mov_b32 s5, 0xfffff000 +; CGP-NEXT: v_mov_b32_e32 v4, 0xfffff000 +; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, s4 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 -; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v6, s4 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 ; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; CGP-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 -; CGP-NEXT: v_mul_f32_e32 v6, 0x4f800000, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v7 +; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v5 -; CGP-NEXT: v_mul_hi_u32 v8, v5, s4 -; CGP-NEXT: v_lshlrev_b32_e32 v9, 12, v6 -; CGP-NEXT: v_mul_hi_u32 v10, v6, v3 -; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v7 -; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v9 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 -; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 -; CGP-NEXT: v_cndmask_b32_e64 v8, v9, v12, s[4:5] -; CGP-NEXT: v_mul_hi_u32 v7, v7, v5 -; CGP-NEXT: v_mul_hi_u32 v8, v8, v6 -; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v5, v7 -; CGP-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7 -; CGP-NEXT: v_add_i32_e64 v7, s[6:7], v6, v8 -; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v6, v8 -; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; CGP-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5] -; CGP-NEXT: v_mul_hi_u32 v5, v5, v0 -; CGP-NEXT: v_mul_hi_u32 v6, v6, v1 -; CGP-NEXT: v_lshlrev_b32_e32 v5, 12, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_mul_lo_u32 v8, s5, v6 +; CGP-NEXT: v_mul_lo_u32 v4, v4, v7 +; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 +; CGP-NEXT: v_mul_hi_u32 v4, v7, v4 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; CGP-NEXT: v_mul_hi_u32 v6, v0, v6 +; CGP-NEXT: v_mul_hi_u32 v4, v1, v4 ; CGP-NEXT: v_lshlrev_b32_e32 v6, 12, v6 -; CGP-NEXT: v_sub_i32_e32 v7, vcc, v0, v5 -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v1, v6 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v3 -; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v7, v3 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 -; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v7, v3 -; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v8, v3 -; CGP-NEXT: v_add_i32_e64 v5, s[8:9], v8, v3 -; CGP-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v6 -; CGP-NEXT: v_sub_i32_e64 v1, s[10:11], v8, v3 -; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc -; CGP-NEXT: s_and_b64 vcc, s[6:7], s[8:9] -; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v0, v9, v0, s[4:5] -; CGP-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[8:9] +; CGP-NEXT: v_lshlrev_b32_e32 v4, 12, v4 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, v1, v3 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; CGP-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, v1, v3 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 ; CGP-NEXT: s_setpc_b64 s[30:31] %result = srem <2 x i32> %num, ret <2 x i32> %result @@ -477,34 +421,27 @@ ; CHECK-LABEL: v_srem_i32_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb +; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb ; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; CHECK-NEXT: v_mov_b32_e32 v2, 0x12d8fb +; CHECK-NEXT: v_mov_b32_e32 v2, 0xffed2705 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s6 +; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s4 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 -; CHECK-NEXT: v_mul_lo_u32 v4, v3, s6 -; CHECK-NEXT: v_mul_hi_u32 v5, v3, s6 -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; CHECK-NEXT: v_mul_hi_u32 v4, v4, v3 -; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 -; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0 -; CHECK-NEXT: v_mul_lo_u32 v3, v3, s6 -; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v3 -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v4 -; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v4, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 -; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v4, v2 -; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] +; CHECK-NEXT: v_mul_lo_u32 v2, v2, v3 +; CHECK-NEXT: v_mul_hi_u32 v2, v3, v2 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 +; CHECK-NEXT: v_mul_lo_u32 v2, v2, s4 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -517,56 +454,43 @@ ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0 -; GISEL-NEXT: s_add_i32 s10, 0x12d8fb, 0 +; GISEL-NEXT: s_add_i32 s4, 0x12d8fb, 0 ; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s10 +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s4 +; GISEL-NEXT: s_sub_i32 s5, 0, s4 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v4 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v4 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 ; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GISEL-NEXT: v_mul_lo_u32 v6, v5, s10 -; GISEL-NEXT: v_mul_hi_u32 v7, v5, s10 -; GISEL-NEXT: v_mul_lo_u32 v8, v4, s10 -; GISEL-NEXT: v_mul_hi_u32 v9, v4, s10 -; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v6, v6, v5 -; GISEL-NEXT: v_mul_hi_u32 v7, v7, v4 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v5, v6 -; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v6 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v4, v7 -; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v8, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v5, v5, v0 -; GISEL-NEXT: v_mul_hi_u32 v4, v4, v1 -; GISEL-NEXT: v_mul_lo_u32 v5, v5, s10 -; GISEL-NEXT: v_mul_lo_u32 v4, v4, s10 -; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v5 -; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v4 -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s10, v6 -; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], s10, v6 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 -; GISEL-NEXT: v_subrev_i32_e64 v0, s[6:7], s10, v6 -; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s10, v7 -; GISEL-NEXT: v_add_i32_e64 v5, s[8:9], s10, v7 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v4 -; GISEL-NEXT: v_subrev_i32_e64 v1, s[10:11], s10, v7 -; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc -; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9] -; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[8:9] +; GISEL-NEXT: v_mul_lo_u32 v6, s5, v5 +; GISEL-NEXT: v_mul_lo_u32 v7, s5, v4 +; GISEL-NEXT: v_mul_hi_u32 v6, v5, v6 +; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5 +; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4 +; GISEL-NEXT: v_mul_lo_u32 v5, v5, s4 +; GISEL-NEXT: v_mul_lo_u32 v4, v4, s4 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 +; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0 +; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0 +; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 @@ -576,64 +500,52 @@ ; CGP-LABEL: v_srem_v2i32_oddk_denom: ; CGP: ; %bb.0: ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CGP-NEXT: s_mov_b32 s8, 0x12d8fb +; CGP-NEXT: s_mov_b32 s4, 0x12d8fb ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0 ; CGP-NEXT: v_mov_b32_e32 v3, 0x12d8fb -; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v1 +; CGP-NEXT: s_mov_b32 s5, 0xffed2705 +; CGP-NEXT: v_mov_b32_e32 v4, 0xffed2705 +; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_cvt_f32_u32_e32 v5, s8 -; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 -; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v6, s4 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5 +; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 -; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 ; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; CGP-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 -; CGP-NEXT: v_mul_f32_e32 v6, 0x4f800000, v6 -; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 +; CGP-NEXT: v_rcp_iflag_f32_e32 v7, v7 +; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6 -; CGP-NEXT: v_mul_lo_u32 v7, v5, s8 -; CGP-NEXT: v_mul_hi_u32 v8, v5, s8 -; CGP-NEXT: v_mul_lo_u32 v9, v6, v3 -; CGP-NEXT: v_mul_hi_u32 v10, v6, v3 -; CGP-NEXT: v_sub_i32_e32 v11, vcc, 0, v7 -; CGP-NEXT: v_sub_i32_e32 v12, vcc, 0, v9 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 -; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc -; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 -; CGP-NEXT: v_cndmask_b32_e64 v8, v9, v12, s[4:5] -; CGP-NEXT: v_mul_hi_u32 v7, v7, v5 -; CGP-NEXT: v_mul_hi_u32 v8, v8, v6 -; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v5, v7 -; CGP-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7 -; CGP-NEXT: v_add_i32_e64 v7, s[6:7], v6, v8 -; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v6, v8 -; CGP-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; CGP-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[4:5] -; CGP-NEXT: v_mul_hi_u32 v5, v5, v0 -; CGP-NEXT: v_mul_hi_u32 v6, v6, v1 -; CGP-NEXT: v_mul_lo_u32 v5, v5, s8 -; CGP-NEXT: v_mul_lo_u32 v6, v6, v3 -; CGP-NEXT: v_sub_i32_e32 v7, vcc, v0, v5 -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v1, v6 -; CGP-NEXT: v_cmp_le_u32_e32 vcc, s8, v7 -; CGP-NEXT: v_add_i32_e64 v9, s[4:5], v7, v3 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 -; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v7, v3 -; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v8, v3 -; CGP-NEXT: v_add_i32_e64 v5, s[8:9], v8, v3 -; CGP-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v6 -; CGP-NEXT: v_sub_i32_e64 v1, s[10:11], v8, v3 -; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc -; CGP-NEXT: s_and_b64 vcc, s[6:7], s[8:9] -; CGP-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc -; CGP-NEXT: v_cndmask_b32_e64 v0, v9, v0, s[4:5] -; CGP-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[8:9] +; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7 +; CGP-NEXT: v_mul_lo_u32 v8, s5, v6 +; CGP-NEXT: v_mul_lo_u32 v4, v4, v7 +; CGP-NEXT: v_mul_hi_u32 v8, v6, v8 +; CGP-NEXT: v_mul_hi_u32 v4, v7, v4 +; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v8 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4 +; CGP-NEXT: v_mul_hi_u32 v6, v0, v6 +; CGP-NEXT: v_mul_hi_u32 v4, v1, v4 +; CGP-NEXT: v_mul_lo_u32 v6, v6, s4 +; CGP-NEXT: v_mul_lo_u32 v4, v4, v3 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, v1, v3 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; CGP-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, v1, v3 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2 -; CGP-NEXT: v_xor_b32_e32 v1, v1, v4 +; CGP-NEXT: v_xor_b32_e32 v1, v1, v5 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 ; CGP-NEXT: s_setpc_b64 s[30:31] %result = srem <2 x i32> %num, ret <2 x i32> %result @@ -651,28 +563,22 @@ ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3 ; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v1 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, 0, v1 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 -; CHECK-NEXT: v_mul_lo_u32 v4, v3, v1 -; CHECK-NEXT: v_mul_hi_u32 v5, v3, v1 -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; CHECK-NEXT: v_mul_hi_u32 v4, v4, v3 -; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 -; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0 +; CHECK-NEXT: v_mul_lo_u32 v4, v4, v3 +; CHECK-NEXT: v_mul_hi_u32 v4, v3, v4 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CHECK-NEXT: v_mul_hi_u32 v3, v0, v3 ; CHECK-NEXT: v_mul_lo_u32 v3, v3, v1 -; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v3 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1 -; CHECK-NEXT: v_add_i32_e64 v5, s[4:5], v4, v1 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 -; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v4, v1 -; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -701,51 +607,39 @@ ; GISEL-NEXT: v_xor_b32_e32 v2, v2, v6 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 ; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v2 -; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v3 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3 +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v7 -; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f800000, v6 -; GISEL-NEXT: v_mul_f32_e32 v7, 0x4f800000, v7 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 +; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 -; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GISEL-NEXT: v_mul_lo_u32 v8, v6, v2 -; GISEL-NEXT: v_mul_hi_u32 v9, v6, v2 -; GISEL-NEXT: v_mul_lo_u32 v10, v7, v3 -; GISEL-NEXT: v_mul_hi_u32 v11, v7, v3 -; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v8 -; GISEL-NEXT: v_sub_i32_e32 v13, vcc, 0, v10 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v9, v10, v13, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v8, v8, v6 -; GISEL-NEXT: v_mul_hi_u32 v9, v9, v7 -; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v6, v8 -; GISEL-NEXT: v_sub_i32_e64 v6, s[6:7], v6, v8 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v7, v9 -; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v7, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v6, v6, v0 -; GISEL-NEXT: v_mul_hi_u32 v7, v7, v1 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 +; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8 +; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 +; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 +; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9 +; GISEL-NEXT: v_mul_hi_u32 v6, v0, v6 +; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 ; GISEL-NEXT: v_mul_lo_u32 v6, v6, v2 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3 -; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v6 -; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v1, v7 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v2 -; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v8, v2 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v6 -; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v8, v2 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v3 -; GISEL-NEXT: v_add_i32_e64 v2, s[8:9], v9, v3 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v7 -; GISEL-NEXT: v_sub_i32_e64 v1, s[10:11], v9, v3 -; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc -; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9] -; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v10, v0, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[8:9] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v2 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v2 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 @@ -844,28 +738,22 @@ ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3 ; GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1 +; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 ; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 -; GISEL-NEXT: v_mul_lo_u32 v4, v3, v1 -; GISEL-NEXT: v_mul_hi_u32 v5, v3, v1 -; GISEL-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; GISEL-NEXT: v_mul_hi_u32 v4, v4, v3 -; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 -; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GISEL-NEXT: v_mul_hi_u32 v3, v3, v0 +; GISEL-NEXT: v_mul_lo_u32 v4, v4, v3 +; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3 ; GISEL-NEXT: v_mul_lo_u32 v3, v3, v1 -; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v3 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v4, v1 -; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v4, v1 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 -; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v4, v1 -; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 ; GISEL-NEXT: s_setpc_b64 s[30:31] @@ -930,51 +818,39 @@ ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7 ; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v2 -; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v3 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3 +; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v7 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 -; GISEL-NEXT: v_mul_f32_e32 v7, 0x4f800000, v7 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5 +; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 ; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7 -; GISEL-NEXT: v_mul_lo_u32 v8, v5, v2 -; GISEL-NEXT: v_mul_hi_u32 v9, v5, v2 -; GISEL-NEXT: v_mul_lo_u32 v10, v7, v3 -; GISEL-NEXT: v_mul_hi_u32 v11, v7, v3 -; GISEL-NEXT: v_sub_i32_e32 v12, vcc, 0, v8 -; GISEL-NEXT: v_sub_i32_e32 v13, vcc, 0, v10 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v8, v8, v12, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11 -; GISEL-NEXT: v_cndmask_b32_e64 v9, v10, v13, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v8, v8, v5 -; GISEL-NEXT: v_mul_hi_u32 v9, v9, v7 -; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v5, v8 -; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v8 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v7, v9 -; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v7, v9 -; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v10, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v5, v5, v0 -; GISEL-NEXT: v_mul_hi_u32 v7, v7, v1 +; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8 +; GISEL-NEXT: v_mul_lo_u32 v7, v7, v5 +; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8 +; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7 +; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9 +; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5 +; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v2 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3 -; GISEL-NEXT: v_sub_i32_e32 v8, vcc, v0, v5 -; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v1, v7 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v2 -; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v8, v2 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v5 -; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v8, v2 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v3 -; GISEL-NEXT: v_add_i32_e64 v2, s[8:9], v9, v3 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v7 -; GISEL-NEXT: v_sub_i32_e64 v1, s[10:11], v9, v3 -; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc -; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9] -; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v10, v0, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[8:9] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll @@ -159,36 +159,30 @@ ; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v3, v7 ; CHECK-NEXT: v_subb_u32_e32 v5, vcc, v1, v7, vcc ; CHECK-NEXT: BB0_2: ; %Flow -; CHECK-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] -; CHECK-NEXT: s_xor_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] +; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5] ; CHECK-NEXT: s_cbranch_execz BB0_4 ; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v2 +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v2 +; CHECK-NEXT: v_mov_b32_e32 v5, 0 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_lo_u32 v3, v1, v2 -; CHECK-NEXT: v_mul_hi_u32 v4, v1, v2 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CHECK-NEXT: v_mul_hi_u32 v3, v3, v1 -; CHECK-NEXT: v_mov_b32_e32 v5, 0 -; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v1, v3 -; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mul_lo_u32 v3, v3, v1 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2 -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 -; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v3, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2 -; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v2 -; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v4, v4, v0, s[4:5] +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v2 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v2 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc ; CHECK-NEXT: BB0_4: -; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] ; CHECK-NEXT: v_mov_b32_e32 v0, v4 ; CHECK-NEXT: v_mov_b32_e32 v1, v5 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -365,28 +359,22 @@ ; CHECK-NEXT: s_cbranch_scc0 BB1_4 ; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s4 +; CHECK-NEXT: s_sub_i32 s0, 0, s4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 -; CHECK-NEXT: v_mul_lo_u32 v1, v0, s4 -; CHECK-NEXT: v_mul_hi_u32 v2, v0, s4 -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 -; CHECK-NEXT: v_add_i32_e64 v2, s[0:1], v0, v1 -; CHECK-NEXT: v_sub_i32_e64 v0, s[0:1], v0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; CHECK-NEXT: v_mul_hi_u32 v0, v0, s2 +; CHECK-NEXT: v_mul_lo_u32 v1, s0, v0 +; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_mul_hi_u32 v0, s2, v0 ; CHECK-NEXT: v_mul_lo_u32 v0, v0, s4 -; CHECK-NEXT: v_sub_i32_e32 v1, vcc, s2, v0 -; CHECK-NEXT: v_add_i32_e64 v2, s[0:1], s4, v1 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[0:1], s2, v0 -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 -; CHECK-NEXT: v_subrev_i32_e64 v0, s[2:3], s4, v1 -; CHECK-NEXT: s_and_b64 vcc, vcc, s[0:1] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 +; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; CHECK-NEXT: BB1_4: ; CHECK-NEXT: v_readfirstlane_b32 s0, v0 ; CHECK-NEXT: s_mov_b32 s1, s0 @@ -845,36 +833,30 @@ ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v1, v11 ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v5, v11, vcc ; CGP-NEXT: BB2_2: ; %Flow2 -; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] -; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] +; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] ; CGP-NEXT: s_cbranch_execz BB2_4 ; CGP-NEXT: ; %bb.3: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 -; CGP-NEXT: v_mul_lo_u32 v1, v0, v4 -; CGP-NEXT: v_mul_hi_u32 v5, v0, v4 -; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v1 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc -; CGP-NEXT: v_mul_hi_u32 v1, v1, v0 -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v0, v1 -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc -; CGP-NEXT: v_mul_hi_u32 v0, v0, v8 +; CGP-NEXT: v_mul_lo_u32 v1, v1, v0 +; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 ; CGP-NEXT: v_mul_lo_u32 v0, v0, v4 -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v0 -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v1, v4 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v0 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4 -; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v1, v4 -; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc -; CGP-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v8, v0 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; CGP-NEXT: v_mov_b32_e32 v1, 0 ; CGP-NEXT: BB2_4: -; CGP-NEXT: s_or_b64 exec, exec, s[8:9] +; CGP-NEXT: s_or_b64 exec, exec, s[4:5] ; CGP-NEXT: v_or_b32_e32 v5, v3, v7 ; CGP-NEXT: v_mov_b32_e32 v4, 0 ; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] @@ -1026,36 +1008,30 @@ ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v9 ; CGP-NEXT: v_subb_u32_e32 v5, vcc, v3, v9, vcc ; CGP-NEXT: BB2_6: ; %Flow -; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] -; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] +; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] ; CGP-NEXT: s_cbranch_execz BB2_8 ; CGP-NEXT: ; %bb.7: ; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v6 +; CGP-NEXT: v_mov_b32_e32 v5, 0 ; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 -; CGP-NEXT: v_mul_lo_u32 v4, v3, v6 -; CGP-NEXT: v_mul_hi_u32 v5, v3, v6 -; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v4, v3 -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CGP-NEXT: v_mul_hi_u32 v3, v3, v2 +; CGP-NEXT: v_mul_lo_u32 v4, v4, v3 +; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v6 -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v2, v3 -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v4, v6 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v3 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6 -; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v4, v6 -; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CGP-NEXT: v_cndmask_b32_e64 v4, v5, v2, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v5, 0 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v6 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v6 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 +; CGP-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc ; CGP-NEXT: BB2_8: -; CGP-NEXT: s_or_b64 exec, exec, s[8:9] +; CGP-NEXT: s_or_b64 exec, exec, s[4:5] ; CGP-NEXT: v_mov_b32_e32 v2, v4 ; CGP-NEXT: v_mov_b32_e32 v3, v5 ; CGP-NEXT: s_setpc_b64 s[30:31] @@ -2651,36 +2627,30 @@ ; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 ; CHECK-NEXT: v_subb_u32_e32 v3, vcc, v1, v7, vcc ; CHECK-NEXT: BB7_2: ; %Flow -; CHECK-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] -; CHECK-NEXT: s_xor_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] +; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5] ; CHECK-NEXT: s_cbranch_execz BB7_4 ; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v4 +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, 0, v4 +; CHECK-NEXT: v_mov_b32_e32 v3, 0 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_lo_u32 v2, v1, v4 -; CHECK-NEXT: v_mul_hi_u32 v3, v1, v4 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc -; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1 -; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v1, v2 -; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1 +; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, v4 -; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 -; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v2, v4 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 -; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v2, v4 -; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v2, v3, v0, s[4:5] -; CHECK-NEXT: v_mov_b32_e32 v3, 0 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v4 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v4 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc ; CHECK-NEXT: BB7_4: -; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] ; CHECK-NEXT: v_mov_b32_e32 v0, v2 ; CHECK-NEXT: v_mov_b32_e32 v1, v3 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -3139,36 +3109,30 @@ ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v1, v11 ; CGP-NEXT: v_subb_u32_e32 v1, vcc, v4, v11, vcc ; CGP-NEXT: BB8_2: ; %Flow2 -; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] -; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] +; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] ; CGP-NEXT: s_cbranch_execz BB8_4 ; CGP-NEXT: ; %bb.3: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v10 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 -; CGP-NEXT: v_mul_lo_u32 v1, v0, v10 -; CGP-NEXT: v_mul_hi_u32 v4, v0, v10 -; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v1 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc -; CGP-NEXT: v_mul_hi_u32 v1, v1, v0 -; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v0, v1 -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; CGP-NEXT: v_mul_hi_u32 v0, v0, v5 +; CGP-NEXT: v_mul_lo_u32 v1, v1, v0 +; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_mul_hi_u32 v0, v5, v0 ; CGP-NEXT: v_mul_lo_u32 v0, v0, v10 -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v5, v0 -; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v1, v10 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v0 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 -; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v1, v10 -; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc -; CGP-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v5, v0 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v10 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v10 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; CGP-NEXT: v_mov_b32_e32 v1, 0 ; CGP-NEXT: BB8_4: -; CGP-NEXT: s_or_b64 exec, exec, s[8:9] +; CGP-NEXT: s_or_b64 exec, exec, s[4:5] ; CGP-NEXT: v_or_b32_e32 v5, v3, v9 ; CGP-NEXT: v_mov_b32_e32 v4, 0 ; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] @@ -3320,36 +3284,30 @@ ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v4, v9 ; CGP-NEXT: v_subb_u32_e32 v5, vcc, v3, v9, vcc ; CGP-NEXT: BB8_6: ; %Flow -; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] -; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] +; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] ; CGP-NEXT: s_cbranch_execz BB8_8 ; CGP-NEXT: ; %bb.7: ; CGP-NEXT: v_cvt_f32_u32_e32 v3, v8 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v8 +; CGP-NEXT: v_mov_b32_e32 v5, 0 ; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 -; CGP-NEXT: v_mul_lo_u32 v4, v3, v8 -; CGP-NEXT: v_mul_hi_u32 v5, v3, v8 -; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v4, v3 -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CGP-NEXT: v_mul_hi_u32 v3, v3, v2 +; CGP-NEXT: v_mul_lo_u32 v4, v4, v3 +; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v8 -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v2, v3 -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v4, v8 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v3 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v8 -; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v4, v8 -; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CGP-NEXT: v_cndmask_b32_e64 v4, v5, v2, s[4:5] -; CGP-NEXT: v_mov_b32_e32 v5, 0 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v8 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8 +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v8 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8 +; CGP-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc ; CGP-NEXT: BB8_8: -; CGP-NEXT: s_or_b64 exec, exec, s[8:9] +; CGP-NEXT: s_or_b64 exec, exec, s[4:5] ; CGP-NEXT: v_mov_b32_e32 v2, v4 ; CGP-NEXT: v_mov_b32_e32 v3, v5 ; CGP-NEXT: s_setpc_b64 s[30:31] @@ -3365,29 +3323,23 @@ ; GISEL-NEXT: s_mov_b32 s4, 0xffffff ; GISEL-NEXT: v_and_b32_e32 v1, s4, v2 ; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 ; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 ; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 -; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1 -; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0 +; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 +; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2 ; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1 -; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v2 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v3, v1 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1 -; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v1 -; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll @@ -9,28 +9,24 @@ ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 ; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 -; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1 -; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0 +; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 +; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2 ; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v2 -; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2 -; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v3 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v1 -; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v2, v4, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v2 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_udiv_i32: @@ -75,28 +71,24 @@ ; GISEL-LABEL: s_udiv_i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s1 +; GISEL-NEXT: s_sub_i32 s2, 0, s1 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 +; GISEL-NEXT: v_mul_lo_u32 v1, s2, v0 +; GISEL-NEXT: v_mul_hi_u32 v1, v0, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GISEL-NEXT: v_mul_hi_u32 v0, s0, v0 ; GISEL-NEXT: v_mul_lo_u32 v1, v0, s1 -; GISEL-NEXT: v_mul_hi_u32 v2, v0, s1 -; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GISEL-NEXT: v_mul_hi_u32 v1, v1, v0 -; GISEL-NEXT: v_add_i32_e64 v2, s[2:3], v0, v1 -; GISEL-NEXT: v_sub_i32_e64 v0, s[2:3], v0, v1 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, s0, v1 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v1 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GISEL-NEXT: v_mul_hi_u32 v0, v0, s0 -; GISEL-NEXT: v_mul_lo_u32 v1, v0, s1 +; GISEL-NEXT: v_subrev_i32_e64 v2, s[2:3], s1, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v0 -; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, 1, v0 -; GISEL-NEXT: v_sub_i32_e32 v4, vcc, s0, v1 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, s0, v1 -; GISEL-NEXT: v_cmp_le_u32_e64 s[0:1], s1, v4 -; GISEL-NEXT: s_and_b64 s[0:1], s[0:1], vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GISEL-NEXT: v_readfirstlane_b32 s0, v0 ; GISEL-NEXT: ; return to shader part epilog ; @@ -142,51 +134,43 @@ ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 -; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2 -; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3 -; GISEL-NEXT: v_mul_hi_u32 v9, v5, v3 -; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4 -; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v4, v6 -; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7 -; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0 -; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 +; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 +; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 +; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7 +; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 +; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 ; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4 -; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, 1, v4 -; GISEL-NEXT: v_mul_lo_u32 v9, v5, v3 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v5 -; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, 1, v5 -; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v0, v6 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6 -; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v9 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v9 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v2 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3 -; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v7, s[6:7] -; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v1, v5, v10, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v1, v11, v1, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v5 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v8 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_udiv_v2i32: @@ -255,89 +239,117 @@ ; CHECK-LABEL: v_udiv_i32_pow2k_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s4, 0x1000 -; CHECK-NEXT: v_mov_b32_e32 v1, 0x1000 -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 +; CHECK-NEXT: s_movk_i32 s6, 0x1000 +; CHECK-NEXT: v_mov_b32_e32 v1, 0xfffff000 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_lshlrev_b32_e32 v3, 12, v2 -; CHECK-NEXT: v_mul_hi_u32 v4, v2, s4 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2 -; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 -; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; CHECK-NEXT: v_mul_hi_u32 v2, v2, v0 -; CHECK-NEXT: v_lshlrev_b32_e32 v3, 12, v2 -; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v2 -; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2 -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v3 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v1 -; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v2, v4, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2 +; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 +; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v1 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; CHECK-NEXT: v_subrev_i32_e64 v2, s[4:5], s6, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v1 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc ; CHECK-NEXT: s_setpc_b64 s[30:31] %result = udiv i32 %num, 4096 ret i32 %result } define <2 x i32> @v_udiv_v2i32_pow2k_denom(<2 x i32> %num) { -; CHECK-LABEL: v_udiv_v2i32_pow2k_denom: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s4, 0x1000 -; CHECK-NEXT: v_mov_b32_e32 v2, 0x1000 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s4 -; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 -; CHECK-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 -; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 -; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CHECK-NEXT: v_lshlrev_b32_e32 v5, 12, v3 -; CHECK-NEXT: v_mul_hi_u32 v6, v3, s4 -; CHECK-NEXT: v_lshlrev_b32_e32 v7, 12, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, v4, v2 -; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v5 -; CHECK-NEXT: v_sub_i32_e32 v10, vcc, 0, v7 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 -; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v6, v7, v10, s[4:5] -; CHECK-NEXT: v_mul_hi_u32 v5, v5, v3 -; CHECK-NEXT: v_mul_hi_u32 v6, v6, v4 -; CHECK-NEXT: v_add_i32_e64 v7, s[6:7], v3, v5 -; CHECK-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v5 -; CHECK-NEXT: v_add_i32_e64 v5, s[6:7], v4, v6 -; CHECK-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] -; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0 -; CHECK-NEXT: v_mul_hi_u32 v4, v4, v1 -; CHECK-NEXT: v_lshlrev_b32_e32 v5, 12, v3 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v3 -; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, 1, v3 -; CHECK-NEXT: v_lshlrev_b32_e32 v8, 12, v4 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v4 -; CHECK-NEXT: v_subrev_i32_e32 v10, vcc, 1, v4 -; CHECK-NEXT: v_sub_i32_e32 v11, vcc, v0, v5 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 -; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v8 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v8 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[6:7], v11, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v2 -; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v3, v6, s[6:7] -; CHECK-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] -; CHECK-NEXT: v_cndmask_b32_e64 v1, v4, v9, s[6:7] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v1, v10, v1, s[4:5] -; CHECK-NEXT: s_setpc_b64 s[30:31] +; GISEL-LABEL: v_udiv_v2i32_pow2k_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_movk_i32 s8, 0x1000 +; GISEL-NEXT: v_cvt_f32_u32_e32 v2, s8 +; GISEL-NEXT: s_sub_i32 s4, 0, s8 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 +; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_lo_u32 v4, s4, v3 +; GISEL-NEXT: v_mul_lo_u32 v5, s4, v2 +; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 +; GISEL-NEXT: v_mul_hi_u32 v5, v2, v5 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5 +; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3 +; GISEL-NEXT: v_mul_hi_u32 v2, v1, v2 +; GISEL-NEXT: v_lshlrev_b32_e32 v4, 12, v3 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v3 +; GISEL-NEXT: v_lshlrev_b32_e32 v6, 12, v2 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v2 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GISEL-NEXT: v_subrev_i32_e64 v4, s[4:5], s8, v0 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v7, s[4:5] +; GISEL-NEXT: v_subrev_i32_e64 v5, s[6:7], s8, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v2 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_udiv_v2i32_pow2k_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_movk_i32 s4, 0x1000 +; CGP-NEXT: v_mov_b32_e32 v2, 0x1000 +; CGP-NEXT: s_mov_b32 s5, 0xfffff000 +; CGP-NEXT: v_mov_b32_e32 v3, 0xfffff000 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, s4 +; CGP-NEXT: v_cvt_f32_u32_e32 v5, v2 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 +; CGP-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5 +; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 +; CGP-NEXT: v_mul_lo_u32 v6, s5, v4 +; CGP-NEXT: v_mul_lo_u32 v3, v3, v5 +; CGP-NEXT: v_mul_hi_u32 v6, v4, v6 +; CGP-NEXT: v_mul_hi_u32 v3, v5, v3 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3 +; CGP-NEXT: v_mul_hi_u32 v4, v0, v4 +; CGP-NEXT: v_mul_hi_u32 v3, v1, v3 +; CGP-NEXT: v_lshlrev_b32_e32 v5, 12, v4 +; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v4 +; CGP-NEXT: v_lshlrev_b32_e32 v7, 12, v3 +; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v3 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc +; CGP-NEXT: v_subrev_i32_e64 v5, s[4:5], s4, v0 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2 +; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v8, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v1, v2 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v4 +; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5] +; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v3 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 +; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc +; CGP-NEXT: s_setpc_b64 s[30:31] %result = udiv <2 x i32> %num, ret <2 x i32> %result } @@ -347,87 +359,115 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb -; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s6 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 -; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_lo_u32 v2, v1, s6 -; CHECK-NEXT: v_mul_hi_u32 v3, v1, s6 -; CHECK-NEXT: v_sub_i32_e32 v4, vcc, 0, v2 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1 -; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v1, v2 -; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mov_b32_e32 v1, 0xffed2705 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2 +; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 ; CHECK-NEXT: v_mul_lo_u32 v2, v1, s6 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1 -; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, 1, v1 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 -; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v5 -; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v1, v3, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; CHECK-NEXT: v_subrev_i32_e64 v2, s[4:5], s6, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v1 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc ; CHECK-NEXT: s_setpc_b64 s[30:31] %result = udiv i32 %num, 1235195 ret i32 %result } define <2 x i32> @v_udiv_v2i32_oddk_denom(<2 x i32> %num) { -; CHECK-LABEL: v_udiv_v2i32_oddk_denom: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s8, 0x12d8fb -; CHECK-NEXT: v_mov_b32_e32 v2, 0x12d8fb -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s8 -; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 -; CHECK-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 -; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 -; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, v3, s8 -; CHECK-NEXT: v_mul_hi_u32 v6, v3, s8 -; CHECK-NEXT: v_mul_lo_u32 v7, v4, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, v4, v2 -; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v5 -; CHECK-NEXT: v_sub_i32_e32 v10, vcc, 0, v7 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 -; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v6, v7, v10, s[4:5] -; CHECK-NEXT: v_mul_hi_u32 v5, v5, v3 -; CHECK-NEXT: v_mul_hi_u32 v6, v6, v4 -; CHECK-NEXT: v_add_i32_e64 v7, s[6:7], v3, v5 -; CHECK-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v5 -; CHECK-NEXT: v_add_i32_e64 v5, s[6:7], v4, v6 -; CHECK-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] -; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0 -; CHECK-NEXT: v_mul_hi_u32 v4, v4, v1 -; CHECK-NEXT: v_mul_lo_u32 v5, v3, s8 -; CHECK-NEXT: v_add_i32_e32 v6, vcc, 1, v3 -; CHECK-NEXT: v_subrev_i32_e32 v7, vcc, 1, v3 -; CHECK-NEXT: v_mul_lo_u32 v8, v4, v2 -; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v4 -; CHECK-NEXT: v_subrev_i32_e32 v10, vcc, 1, v4 -; CHECK-NEXT: v_sub_i32_e32 v11, vcc, v0, v5 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 -; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v8 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v8 -; CHECK-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v11 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v2 -; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v3, v6, s[6:7] -; CHECK-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] -; CHECK-NEXT: v_cndmask_b32_e64 v1, v4, v9, s[6:7] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v1, v10, v1, s[4:5] -; CHECK-NEXT: s_setpc_b64 s[30:31] +; GISEL-LABEL: v_udiv_v2i32_oddk_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s8, 0x12d8fb +; GISEL-NEXT: v_cvt_f32_u32_e32 v2, s8 +; GISEL-NEXT: s_sub_i32 s4, 0, s8 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 +; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3 +; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_lo_u32 v4, s4, v3 +; GISEL-NEXT: v_mul_lo_u32 v5, s4, v2 +; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4 +; GISEL-NEXT: v_mul_hi_u32 v5, v2, v5 +; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5 +; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3 +; GISEL-NEXT: v_mul_hi_u32 v2, v1, v2 +; GISEL-NEXT: v_mul_lo_u32 v4, v3, s8 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v3 +; GISEL-NEXT: v_mul_lo_u32 v6, v2, s8 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v2 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GISEL-NEXT: v_subrev_i32_e64 v4, s[4:5], s8, v0 +; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v1 +; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v7, s[4:5] +; GISEL-NEXT: v_subrev_i32_e64 v5, s[6:7], s8, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v2 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_udiv_v2i32_oddk_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s4, 0x12d8fb +; CGP-NEXT: v_mov_b32_e32 v2, 0x12d8fb +; CGP-NEXT: s_mov_b32 s5, 0xffed2705 +; CGP-NEXT: v_cvt_f32_u32_e32 v3, s4 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 +; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 +; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 +; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v5, s5, v3 +; CGP-NEXT: v_mul_lo_u32 v6, s5, v4 +; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 +; CGP-NEXT: v_mul_hi_u32 v6, v4, v6 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CGP-NEXT: v_mul_hi_u32 v3, v0, v3 +; CGP-NEXT: v_mul_hi_u32 v4, v1, v4 +; CGP-NEXT: v_mul_lo_u32 v5, v3, s4 +; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v3 +; CGP-NEXT: v_mul_lo_u32 v7, v4, v2 +; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v4 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; CGP-NEXT: v_subrev_i32_e64 v5, s[4:5], s4, v0 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2 +; CGP-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[4:5] +; CGP-NEXT: v_sub_i32_e64 v6, s[6:7], v1, v2 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v3 +; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[4:5] +; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; CGP-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 +; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v6, vcc +; CGP-NEXT: s_setpc_b64 s[30:31] %result = udiv <2 x i32> %num, ret <2 x i32> %result } @@ -438,28 +478,24 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_lshl_b32_e32 v1, 0x1000, v1 ; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v1 +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_lo_u32 v3, v2, v1 -; CHECK-NEXT: v_mul_hi_u32 v4, v2, v1 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2 -; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 -; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; CHECK-NEXT: v_mul_hi_u32 v2, v2, v0 +; CHECK-NEXT: v_mul_lo_u32 v3, v3, v2 +; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 ; CHECK-NEXT: v_mul_lo_u32 v3, v2, v1 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v2 -; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2 -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v3 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v1 -; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v2, v4, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v2 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc ; CHECK-NEXT: s_setpc_b64 s[30:31] %shl.y = shl i32 4096, %y %r = udiv i32 %x, %shl.y @@ -474,51 +510,43 @@ ; GISEL-NEXT: v_lshl_b32_e32 v2, s4, v2 ; GISEL-NEXT: v_lshl_b32_e32 v3, s4, v3 ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 -; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2 -; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3 -; GISEL-NEXT: v_mul_hi_u32 v9, v5, v3 -; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4 -; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v4, v6 -; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7 -; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0 -; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 +; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 +; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 +; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7 +; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 +; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 ; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4 -; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, 1, v4 -; GISEL-NEXT: v_mul_lo_u32 v9, v5, v3 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v5 -; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, 1, v5 -; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v0, v6 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6 -; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v9 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v9 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v2 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3 -; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v7, s[6:7] -; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v1, v5, v10, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v1, v11, v1, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v5 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v8 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_udiv_v2i32_pow2_shl_denom: @@ -595,28 +623,24 @@ ; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 ; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 ; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 ; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 -; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1 -; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0 +; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 +; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2 ; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v2 -; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2 -; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v3 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v1 -; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v2, v4, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v2 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_udiv_i32_24bit: @@ -669,51 +693,43 @@ ; GISEL-NEXT: v_and_b32_e32 v2, s4, v2 ; GISEL-NEXT: v_and_b32_e32 v3, s4, v3 ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4 +; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 -; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2 -; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3 -; GISEL-NEXT: v_mul_hi_u32 v9, v5, v3 -; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4 -; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v4, v6 -; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7 -; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0 -; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 +; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 +; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 +; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7 +; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 +; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 ; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4 -; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, 1, v4 -; GISEL-NEXT: v_mul_lo_u32 v9, v5, v3 -; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v5 -; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, 1, v5 -; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v0, v6 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6 -; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v9 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v9 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v2 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3 -; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v7, s[6:7] -; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v1, v5, v10, s[6:7] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v1, v11, v1, s[4:5] +; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3 +; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v5 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v8 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc +; GISEL-NEXT: v_sub_i32_e64 v6, s[4:5], v0, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v3 +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v9, s[4:5] +; GISEL-NEXT: v_sub_i32_e64 v7, s[6:7], v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v4 +; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5] +; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_udiv_v2i32_24bit: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll @@ -151,28 +151,24 @@ ; CHECK-NEXT: s_cbranch_execz BB0_4 ; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v2 +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v2 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_lo_u32 v3, v1, v2 -; CHECK-NEXT: v_mul_hi_u32 v4, v1, v2 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CHECK-NEXT: v_mul_hi_u32 v3, v3, v1 -; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v1, v3 -; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mul_lo_u32 v3, v3, v1 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 ; CHECK-NEXT: v_mul_lo_u32 v3, v1, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v1 -; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v1 -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v3 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v2 -; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v1, v4, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v4, v5, v0, vcc +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v1, v3, vcc ; CHECK-NEXT: v_mov_b32_e32 v5, 0 ; CHECK-NEXT: BB0_4: ; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] @@ -335,28 +331,24 @@ ; CHECK-NEXT: s_cbranch_scc0 BB1_4 ; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 +; CHECK-NEXT: s_sub_i32 s1, 0, s2 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CHECK-NEXT: v_mul_lo_u32 v1, s1, v0 +; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_mul_hi_u32 v0, s0, v0 ; CHECK-NEXT: v_mul_lo_u32 v1, v0, s2 -; CHECK-NEXT: v_mul_hi_u32 v2, v0, s2 -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 -; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v0, v1 -; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, s0, v1 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v1 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; CHECK-NEXT: v_mul_hi_u32 v0, v0, s0 -; CHECK-NEXT: v_mul_lo_u32 v1, v0, s2 +; CHECK-NEXT: v_subrev_i32_e64 v2, s[0:1], s2, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0 -; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, 1, v0 -; CHECK-NEXT: v_sub_i32_e32 v4, vcc, s0, v1 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, s0, v1 -; CHECK-NEXT: v_cmp_le_u32_e64 s[0:1], s2, v4 -; CHECK-NEXT: s_and_b64 s[0:1], s[0:1], vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; CHECK-NEXT: BB1_4: ; CHECK-NEXT: v_readfirstlane_b32 s0, v0 ; CHECK-NEXT: s_mov_b32 s1, s0 @@ -781,28 +773,24 @@ ; CGP-NEXT: s_cbranch_execz BB2_4 ; CGP-NEXT: ; %bb.3: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CGP-NEXT: v_mul_lo_u32 v1, v1, v0 +; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 ; CGP-NEXT: v_mul_lo_u32 v1, v0, v4 -; CGP-NEXT: v_mul_hi_u32 v5, v0, v4 -; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v1 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc -; CGP-NEXT: v_mul_hi_u32 v1, v1, v0 -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v0, v1 -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 +; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v0 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v1 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc -; CGP-NEXT: v_mul_hi_u32 v0, v0, v8 -; CGP-NEXT: v_mul_lo_u32 v1, v0, v4 +; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v1, v4 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v0 -; CGP-NEXT: v_subrev_i32_e32 v9, vcc, 1, v0 -; CGP-NEXT: v_sub_i32_e32 v10, vcc, v8, v1 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v1 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v4 -; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v5, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v0, v9, v0, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc ; CGP-NEXT: v_mov_b32_e32 v1, 0 ; CGP-NEXT: BB2_4: ; CGP-NEXT: s_or_b64 exec, exec, s[6:7] @@ -949,28 +937,24 @@ ; CGP-NEXT: s_cbranch_execz BB2_8 ; CGP-NEXT: ; %bb.7: ; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v6 ; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 -; CGP-NEXT: v_mul_lo_u32 v4, v3, v6 -; CGP-NEXT: v_mul_hi_u32 v5, v3, v6 -; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v4, v3 -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CGP-NEXT: v_mul_hi_u32 v3, v3, v2 +; CGP-NEXT: v_mul_lo_u32 v4, v4, v3 +; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 ; CGP-NEXT: v_mul_lo_u32 v4, v3, v6 ; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v3 -; CGP-NEXT: v_subrev_i32_e32 v7, vcc, 1, v3 -; CGP-NEXT: v_sub_i32_e32 v8, vcc, v2, v4 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v6 -; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; CGP-NEXT: v_cndmask_b32_e64 v2, v3, v5, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v2, vcc +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v2, v6 +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v3 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 +; CGP-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc ; CGP-NEXT: v_mov_b32_e32 v5, 0 ; CGP-NEXT: BB2_8: ; CGP-NEXT: s_or_b64 exec, exec, s[6:7] @@ -2453,28 +2437,24 @@ ; CHECK-NEXT: s_cbranch_execz BB7_4 ; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v4 +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, 0, v4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_lo_u32 v2, v1, v4 -; CHECK-NEXT: v_mul_hi_u32 v3, v1, v4 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc -; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1 -; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v1, v2 -; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1 +; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 ; CHECK-NEXT: v_mul_lo_u32 v2, v1, v4 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1 -; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v1 -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v2 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v4 -; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v1, v3, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v0, vcc +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v1 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc ; CHECK-NEXT: v_mov_b32_e32 v3, 0 ; CHECK-NEXT: BB7_4: ; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] @@ -2902,28 +2882,24 @@ ; CGP-NEXT: s_cbranch_execz BB8_4 ; CGP-NEXT: ; %bb.3: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v10 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 +; CGP-NEXT: v_mul_lo_u32 v1, v1, v0 +; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_mul_hi_u32 v0, v5, v0 ; CGP-NEXT: v_mul_lo_u32 v1, v0, v10 -; CGP-NEXT: v_mul_hi_u32 v4, v0, v10 -; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v1 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc -; CGP-NEXT: v_mul_hi_u32 v1, v1, v0 -; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v0, v1 -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 +; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v0 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v5, v1 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; CGP-NEXT: v_mul_hi_u32 v0, v0, v5 -; CGP-NEXT: v_mul_lo_u32 v1, v0, v10 +; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v1, v10 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v0 -; CGP-NEXT: v_subrev_i32_e32 v6, vcc, 1, v0 -; CGP-NEXT: v_sub_i32_e32 v7, vcc, v5, v1 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v1 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v10 -; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; CGP-NEXT: v_mov_b32_e32 v1, 0 ; CGP-NEXT: BB8_4: ; CGP-NEXT: s_or_b64 exec, exec, s[6:7] @@ -3070,28 +3046,24 @@ ; CGP-NEXT: s_cbranch_execz BB8_8 ; CGP-NEXT: ; %bb.7: ; CGP-NEXT: v_cvt_f32_u32_e32 v3, v8 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v8 ; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 -; CGP-NEXT: v_mul_lo_u32 v4, v3, v8 -; CGP-NEXT: v_mul_hi_u32 v5, v3, v8 -; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v4, v3 -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CGP-NEXT: v_mul_hi_u32 v3, v3, v2 +; CGP-NEXT: v_mul_lo_u32 v4, v4, v3 +; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 ; CGP-NEXT: v_mul_lo_u32 v4, v3, v8 ; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v3 -; CGP-NEXT: v_subrev_i32_e32 v6, vcc, 1, v3 -; CGP-NEXT: v_sub_i32_e32 v7, vcc, v2, v4 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v8 -; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; CGP-NEXT: v_cndmask_b32_e64 v2, v3, v5, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v4, v6, v2, vcc +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8 +; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v2, v8 +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v3 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8 +; CGP-NEXT: v_cndmask_b32_e32 v4, v3, v4, vcc ; CGP-NEXT: v_mov_b32_e32 v5, 0 ; CGP-NEXT: BB8_8: ; CGP-NEXT: s_or_b64 exec, exec, s[6:7] @@ -3111,28 +3083,24 @@ ; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 ; GISEL-NEXT: v_and_b32_e32 v1, s4, v2 ; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 ; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 -; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1 -; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0 +; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 +; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2 ; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v2 -; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2 -; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v3 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v1 -; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v2, v4, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v2 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc ; GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll @@ -9,28 +9,22 @@ ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 ; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 -; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1 -; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0 +; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 +; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2 ; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1 -; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v2 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v3, v1 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2 -; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v1 -; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_urem_i32: @@ -73,28 +67,22 @@ ; GISEL-LABEL: s_urem_i32: ; GISEL: ; %bb.0: ; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s1 +; GISEL-NEXT: s_sub_i32 s2, 0, s1 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0 -; GISEL-NEXT: v_mul_lo_u32 v1, v0, s1 -; GISEL-NEXT: v_mul_hi_u32 v2, v0, s1 -; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; GISEL-NEXT: v_mul_hi_u32 v1, v1, v0 -; GISEL-NEXT: v_add_i32_e64 v2, s[2:3], v0, v1 -; GISEL-NEXT: v_sub_i32_e64 v0, s[2:3], v0, v1 -; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GISEL-NEXT: v_mul_hi_u32 v0, v0, s0 +; GISEL-NEXT: v_mul_lo_u32 v1, s2, v0 +; GISEL-NEXT: v_mul_hi_u32 v1, v0, v1 +; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GISEL-NEXT: v_mul_hi_u32 v0, s0, v0 ; GISEL-NEXT: v_mul_lo_u32 v0, v0, s1 -; GISEL-NEXT: v_sub_i32_e32 v1, vcc, s0, v0 -; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v1 -; GISEL-NEXT: v_add_i32_e64 v2, s[2:3], s1, v1 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], s0, v0 -; GISEL-NEXT: v_subrev_i32_e64 v0, s[2:3], s1, v1 -; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 +; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; GISEL-NEXT: v_readfirstlane_b32 s0, v0 ; GISEL-NEXT: ; return to shader part epilog ; @@ -138,51 +126,40 @@ ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 +; GISEL-NEXT: s_mov_b32 s4, 0x4f7ffffe +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_f32_e32 v4, s4, v4 +; GISEL-NEXT: v_mul_f32_e32 v6, s4, v6 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 -; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2 -; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3 -; GISEL-NEXT: v_mul_hi_u32 v9, v5, v3 -; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4 -; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v4, v6 -; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7 -; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0 -; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 +; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 +; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 +; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7 +; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 +; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, v2 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v3 -; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v4 -; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v5 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v6, v2 -; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v6, v2 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 -; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v6, v2 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v7, v3 -; GISEL-NEXT: v_add_i32_e64 v2, s[8:9], v7, v3 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v5 -; GISEL-NEXT: v_sub_i32_e64 v1, s[10:11], v7, v3 -; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc -; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9] -; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[8:9] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 +; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_urem_v2i32: @@ -248,88 +225,101 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_movk_i32 s4, 0x1000 -; CHECK-NEXT: v_mov_b32_e32 v1, 0x1000 +; CHECK-NEXT: v_mov_b32_e32 v1, 0xfffff000 ; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_lshlrev_b32_e32 v3, 12, v2 -; CHECK-NEXT: v_mul_hi_u32 v4, v2, s4 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2 -; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 -; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; CHECK-NEXT: v_mul_hi_u32 v2, v2, v0 -; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v2 -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v2 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1 -; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v3, v1 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2 -; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v1 -; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2 +; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 +; CHECK-NEXT: v_lshlrev_b32_e32 v1, 12, v1 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; CHECK-NEXT: s_setpc_b64 s[30:31] %result = urem i32 %num, 4096 ret i32 %result } define <2 x i32> @v_urem_v2i32_pow2k_denom(<2 x i32> %num) { -; CHECK-LABEL: v_urem_v2i32_pow2k_denom: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_movk_i32 s4, 0x1000 -; CHECK-NEXT: v_mov_b32_e32 v2, 0x1000 -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s4 -; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 -; CHECK-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 -; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 -; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CHECK-NEXT: v_lshlrev_b32_e32 v5, 12, v3 -; CHECK-NEXT: v_mul_hi_u32 v6, v3, s4 -; CHECK-NEXT: v_lshlrev_b32_e32 v7, 12, v4 -; CHECK-NEXT: v_mul_hi_u32 v8, v4, v2 -; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v5 -; CHECK-NEXT: v_sub_i32_e32 v10, vcc, 0, v7 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 -; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v6, v7, v10, s[4:5] -; CHECK-NEXT: v_mul_hi_u32 v5, v5, v3 -; CHECK-NEXT: v_mul_hi_u32 v6, v6, v4 -; CHECK-NEXT: v_add_i32_e64 v7, s[6:7], v3, v5 -; CHECK-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v5 -; CHECK-NEXT: v_add_i32_e64 v5, s[6:7], v4, v6 -; CHECK-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] -; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0 -; CHECK-NEXT: v_mul_hi_u32 v4, v4, v1 -; CHECK-NEXT: v_lshlrev_b32_e32 v3, 12, v3 -; CHECK-NEXT: v_lshlrev_b32_e32 v4, 12, v4 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v3 -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v1, v4 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v2 -; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v5, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 -; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v5, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[6:7], v6, v2 -; CHECK-NEXT: v_add_i32_e64 v3, s[8:9], v6, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v4 -; CHECK-NEXT: v_sub_i32_e64 v1, s[10:11], v6, v2 -; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc -; CHECK-NEXT: s_and_b64 vcc, s[6:7], s[8:9] -; CHECK-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v7, v0, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[8:9] -; CHECK-NEXT: s_setpc_b64 s[30:31] +; GISEL-LABEL: v_urem_v2i32_pow2k_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_movk_i32 s4, 0x1000 +; GISEL-NEXT: v_cvt_f32_u32_e32 v2, s4 +; GISEL-NEXT: s_sub_i32 s5, 0, s4 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 +; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_lo_u32 v3, s5, v2 +; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GISEL-NEXT: v_mul_hi_u32 v3, v0, v2 +; GISEL-NEXT: v_mul_hi_u32 v2, v1, v2 +; GISEL-NEXT: v_lshlrev_b32_e32 v3, 12, v3 +; GISEL-NEXT: v_lshlrev_b32_e32 v2, 12, v2 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 +; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 +; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 +; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_urem_v2i32_pow2k_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_movk_i32 s4, 0x1000 +; CGP-NEXT: v_mov_b32_e32 v2, 0x1000 +; CGP-NEXT: s_mov_b32 s5, 0x4f7ffffe +; CGP-NEXT: s_mov_b32 s6, 0xfffff000 +; CGP-NEXT: v_cvt_f32_u32_e32 v3, s4 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 +; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CGP-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; CGP-NEXT: v_mul_f32_e32 v3, s5, v3 +; CGP-NEXT: v_mul_f32_e32 v4, s5, v4 +; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4 +; CGP-NEXT: v_mul_lo_u32 v5, s6, v3 +; CGP-NEXT: v_mul_lo_u32 v6, s6, v4 +; CGP-NEXT: v_mul_hi_u32 v5, v3, v5 +; CGP-NEXT: v_mul_hi_u32 v6, v4, v6 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; CGP-NEXT: v_mul_hi_u32 v3, v0, v3 +; CGP-NEXT: v_mul_hi_u32 v4, v1, v4 +; CGP-NEXT: v_lshlrev_b32_e32 v3, 12, v3 +; CGP-NEXT: v_lshlrev_b32_e32 v4, 12, v4 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v2 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v2 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; CGP-NEXT: s_setpc_b64 s[30:31] %result = urem <2 x i32> %num, ret <2 x i32> %result } @@ -338,89 +328,94 @@ ; CHECK-LABEL: v_urem_i32_oddk_denom: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb -; CHECK-NEXT: v_mov_b32_e32 v1, 0x12d8fb -; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s6 +; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb +; CHECK-NEXT: v_mov_b32_e32 v1, 0xffed2705 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_lo_u32 v3, v2, s6 -; CHECK-NEXT: v_mul_hi_u32 v4, v2, s6 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2 -; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 -; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; CHECK-NEXT: v_mul_hi_u32 v2, v2, v0 -; CHECK-NEXT: v_mul_lo_u32 v2, v2, s6 -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v2 -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v3 -; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v3, v1 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2 -; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v1 -; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2 +; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 +; CHECK-NEXT: v_mul_lo_u32 v1, v1, s4 +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s4, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; CHECK-NEXT: s_setpc_b64 s[30:31] %result = urem i32 %num, 1235195 ret i32 %result } define <2 x i32> @v_urem_v2i32_oddk_denom(<2 x i32> %num) { -; CHECK-LABEL: v_urem_v2i32_oddk_denom: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_mov_b32 s8, 0x12d8fb -; CHECK-NEXT: v_mov_b32_e32 v2, 0x12d8fb -; CHECK-NEXT: v_cvt_f32_u32_e32 v3, s8 -; CHECK-NEXT: v_cvt_f32_u32_e32 v4, v2 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CHECK-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 -; CHECK-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 -; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 -; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4 -; CHECK-NEXT: v_mul_lo_u32 v5, v3, s8 -; CHECK-NEXT: v_mul_hi_u32 v6, v3, s8 -; CHECK-NEXT: v_mul_lo_u32 v7, v4, v2 -; CHECK-NEXT: v_mul_hi_u32 v8, v4, v2 -; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v5 -; CHECK-NEXT: v_sub_i32_e32 v10, vcc, 0, v7 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 -; CHECK-NEXT: v_cndmask_b32_e32 v5, v5, v9, vcc -; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8 -; CHECK-NEXT: v_cndmask_b32_e64 v6, v7, v10, s[4:5] -; CHECK-NEXT: v_mul_hi_u32 v5, v5, v3 -; CHECK-NEXT: v_mul_hi_u32 v6, v6, v4 -; CHECK-NEXT: v_add_i32_e64 v7, s[6:7], v3, v5 -; CHECK-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v5 -; CHECK-NEXT: v_add_i32_e64 v5, s[6:7], v4, v6 -; CHECK-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[4:5] -; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0 -; CHECK-NEXT: v_mul_hi_u32 v4, v4, v1 -; CHECK-NEXT: v_mul_lo_u32 v3, v3, s8 -; CHECK-NEXT: v_mul_lo_u32 v4, v4, v2 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v3 -; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v1, v4 -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s8, v5 -; CHECK-NEXT: v_add_i32_e64 v7, s[4:5], v5, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 -; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v5, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[6:7], v6, v2 -; CHECK-NEXT: v_add_i32_e64 v3, s[8:9], v6, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v4 -; CHECK-NEXT: v_sub_i32_e64 v1, s[10:11], v6, v2 -; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc -; CHECK-NEXT: s_and_b64 vcc, s[6:7], s[8:9] -; CHECK-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v7, v0, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[8:9] -; CHECK-NEXT: s_setpc_b64 s[30:31] +; GISEL-LABEL: v_urem_v2i32_oddk_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s4, 0x12d8fb +; GISEL-NEXT: v_cvt_f32_u32_e32 v2, s4 +; GISEL-NEXT: s_sub_i32 s5, 0, s4 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 +; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_lo_u32 v3, s5, v2 +; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GISEL-NEXT: v_mul_hi_u32 v3, v0, v2 +; GISEL-NEXT: v_mul_hi_u32 v2, v1, v2 +; GISEL-NEXT: v_mul_lo_u32 v3, v3, s4 +; GISEL-NEXT: v_mul_lo_u32 v2, v2, s4 +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v2 +; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 +; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GISEL-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0 +; GISEL-NEXT: v_subrev_i32_e32 v3, vcc, s4, v1 +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: v_urem_v2i32_oddk_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s4, 0x12d8fb +; CGP-NEXT: v_mov_b32_e32 v2, 0x12d8fb +; CGP-NEXT: s_mov_b32 s5, 0xffed2705 +; CGP-NEXT: v_cvt_f32_u32_e32 v3, s4 +; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 +; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CGP-NEXT: v_mul_lo_u32 v4, s5, v3 +; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_mul_hi_u32 v4, v0, v3 +; CGP-NEXT: v_mul_hi_u32 v3, v1, v3 +; CGP-NEXT: v_mul_lo_u32 v4, v4, s4 +; CGP-NEXT: v_mul_lo_u32 v3, v3, v2 +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 +; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v2 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v2 +; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2 +; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; CGP-NEXT: s_setpc_b64 s[30:31] %result = urem <2 x i32> %num, ret <2 x i32> %result } @@ -431,28 +426,22 @@ ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: v_lshl_b32_e32 v1, 0x1000, v1 ; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v1 +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 -; CHECK-NEXT: v_mul_lo_u32 v3, v2, v1 -; CHECK-NEXT: v_mul_hi_u32 v4, v2, v1 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2 -; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 -; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; CHECK-NEXT: v_mul_hi_u32 v2, v2, v0 +; CHECK-NEXT: v_mul_lo_u32 v3, v3, v2 +; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3 +; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2 ; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1 -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v2 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1 -; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v3, v1 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2 -; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v1 -; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; CHECK-NEXT: s_setpc_b64 s[30:31] %shl.y = shl i32 4096, %y %r = urem i32 %x, %shl.y @@ -464,54 +453,43 @@ ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: s_movk_i32 s4, 0x1000 +; GISEL-NEXT: s_mov_b32 s5, 0x4f7ffffe ; GISEL-NEXT: v_lshl_b32_e32 v2, s4, v2 ; GISEL-NEXT: v_lshl_b32_e32 v3, s4, v3 ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_f32_e32 v4, s5, v4 +; GISEL-NEXT: v_mul_f32_e32 v6, s5, v6 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 -; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2 -; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3 -; GISEL-NEXT: v_mul_hi_u32 v9, v5, v3 -; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4 -; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v4, v6 -; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7 -; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0 -; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 +; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 +; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 +; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7 +; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 +; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, v2 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v3 -; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v4 -; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v5 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v6, v2 -; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v6, v2 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 -; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v6, v2 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v7, v3 -; GISEL-NEXT: v_add_i32_e64 v2, s[8:9], v7, v3 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v5 -; GISEL-NEXT: v_sub_i32_e64 v1, s[10:11], v7, v3 -; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc -; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9] -; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[8:9] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 +; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_urem_v2i32_pow2_shl_denom: @@ -584,28 +562,22 @@ ; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 ; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 ; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 ; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 -; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1 -; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0 +; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 +; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2 ; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1 -; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v2 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v3, v1 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2 -; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v1 -; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_urem_i32_24bit: @@ -651,56 +623,45 @@ ; GISEL: ; %bb.0: ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-NEXT: s_mov_b32 s4, 0xffffff +; GISEL-NEXT: s_mov_b32 s5, 0x4f7ffffe ; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 ; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 ; GISEL-NEXT: v_and_b32_e32 v2, s4, v2 ; GISEL-NEXT: v_and_b32_e32 v3, s4, v3 ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 -; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v3 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 -; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 -; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_f32_e32 v4, s5, v4 +; GISEL-NEXT: v_mul_f32_e32 v6, s5, v6 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 -; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 -; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 -; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2 -; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3 -; GISEL-NEXT: v_mul_hi_u32 v9, v5, v3 -; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 -; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc -; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 -; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4 -; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5 -; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v4, v6 -; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6 -; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7 -; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7 -; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] -; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0 -; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1 +; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6 +; GISEL-NEXT: v_mul_lo_u32 v5, v5, v4 +; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6 +; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 +; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7 +; GISEL-NEXT: v_mul_hi_u32 v4, v0, v4 +; GISEL-NEXT: v_mul_hi_u32 v5, v1, v5 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, v2 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v3 -; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v4 -; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v5 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v6, v2 -; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v6, v2 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 -; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v6, v2 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v7, v3 -; GISEL-NEXT: v_add_i32_e64 v2, s[8:9], v7, v3 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v5 -; GISEL-NEXT: v_sub_i32_e64 v1, s[10:11], v7, v3 -; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc -; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9] -; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[8:9] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 +; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 +; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc ; GISEL-NEXT: s_setpc_b64 s[30:31] ; ; CGP-LABEL: v_urem_v2i32_24bit: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -145,36 +145,30 @@ ; CHECK-NEXT: v_cndmask_b32_e32 v4, v5, v3, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v1, vcc ; CHECK-NEXT: BB0_2: ; %Flow -; CHECK-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] -; CHECK-NEXT: s_xor_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] +; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5] ; CHECK-NEXT: s_cbranch_execz BB0_4 ; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v2 +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v2 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_lo_u32 v3, v1, v2 -; CHECK-NEXT: v_mul_hi_u32 v4, v1, v2 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CHECK-NEXT: v_mul_hi_u32 v3, v3, v1 -; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v1, v3 -; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc -; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mul_lo_u32 v3, v3, v1 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2 -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2 -; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v3, v2 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1 -; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v2 -; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v4, v4, v0, s[4:5] +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v2 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v2 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc ; CHECK-NEXT: v_mov_b32_e32 v5, 0 ; CHECK-NEXT: BB0_4: -; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] ; CHECK-NEXT: v_mov_b32_e32 v0, v4 ; CHECK-NEXT: v_mov_b32_e32 v1, v5 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -333,28 +327,22 @@ ; CHECK-NEXT: s_cbranch_scc0 BB1_4 ; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2 +; CHECK-NEXT: s_sub_i32 s1, 0, s2 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 -; CHECK-NEXT: v_mul_lo_u32 v1, v0, s2 -; CHECK-NEXT: v_mul_hi_u32 v2, v0, s2 -; CHECK-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 -; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], v0, v1 -; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 -; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; CHECK-NEXT: v_mul_hi_u32 v0, v0, s0 +; CHECK-NEXT: v_mul_lo_u32 v1, s1, v0 +; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_mul_hi_u32 v0, s0, v0 ; CHECK-NEXT: v_mul_lo_u32 v0, v0, s2 -; CHECK-NEXT: v_sub_i32_e32 v1, vcc, s0, v0 -; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v1 -; CHECK-NEXT: v_add_i32_e64 v2, s[4:5], s2, v1 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[0:1], s0, v0 -; CHECK-NEXT: v_subrev_i32_e64 v0, s[2:3], s2, v1 -; CHECK-NEXT: s_and_b64 vcc, vcc, s[0:1] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, s0, v0 +; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s2, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s2, v0 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v0 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; CHECK-NEXT: BB1_4: ; CHECK-NEXT: v_readfirstlane_b32 s0, v0 ; CHECK-NEXT: s_mov_b32 s1, s0 @@ -771,36 +759,30 @@ ; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v5, vcc ; CGP-NEXT: v_cndmask_b32_e32 v1, v10, v11, vcc ; CGP-NEXT: BB2_2: ; %Flow2 -; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] -; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] +; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] ; CGP-NEXT: s_cbranch_execz BB2_4 ; CGP-NEXT: ; %bb.3: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 -; CGP-NEXT: v_mul_lo_u32 v1, v0, v4 -; CGP-NEXT: v_mul_hi_u32 v5, v0, v4 -; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v1 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc -; CGP-NEXT: v_mul_hi_u32 v1, v1, v0 -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v0, v1 -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc -; CGP-NEXT: v_mul_hi_u32 v0, v0, v8 +; CGP-NEXT: v_mul_lo_u32 v1, v1, v0 +; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_mul_hi_u32 v0, v8, v0 ; CGP-NEXT: v_mul_lo_u32 v0, v0, v4 -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v0 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4 -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v1, v4 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v0 -; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v1, v4 -; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc -; CGP-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v8, v0 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; CGP-NEXT: v_mov_b32_e32 v1, 0 ; CGP-NEXT: BB2_4: -; CGP-NEXT: s_or_b64 exec, exec, s[8:9] +; CGP-NEXT: s_or_b64 exec, exec, s[4:5] ; CGP-NEXT: v_or_b32_e32 v5, v3, v7 ; CGP-NEXT: v_mov_b32_e32 v4, 0 ; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] @@ -938,36 +920,30 @@ ; CGP-NEXT: v_cndmask_b32_e32 v4, v5, v7, vcc ; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v3, vcc ; CGP-NEXT: BB2_6: ; %Flow -; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] -; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] +; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] ; CGP-NEXT: s_cbranch_execz BB2_8 ; CGP-NEXT: ; %bb.7: ; CGP-NEXT: v_cvt_f32_u32_e32 v3, v6 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v6 ; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 -; CGP-NEXT: v_mul_lo_u32 v4, v3, v6 -; CGP-NEXT: v_mul_hi_u32 v5, v3, v6 -; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v7, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v4, v3 -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CGP-NEXT: v_mul_hi_u32 v3, v3, v2 +; CGP-NEXT: v_mul_lo_u32 v4, v4, v3 +; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v6 -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v2, v3 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6 -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v4, v6 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v3 -; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v4, v6 -; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CGP-NEXT: v_cndmask_b32_e64 v4, v5, v2, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v6 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v6 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6 +; CGP-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc ; CGP-NEXT: v_mov_b32_e32 v5, 0 ; CGP-NEXT: BB2_8: -; CGP-NEXT: s_or_b64 exec, exec, s[8:9] +; CGP-NEXT: s_or_b64 exec, exec, s[4:5] ; CGP-NEXT: v_mov_b32_e32 v2, v4 ; CGP-NEXT: v_mov_b32_e32 v3, v5 ; CGP-NEXT: s_setpc_b64 s[30:31] @@ -2421,36 +2397,30 @@ ; CHECK-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc ; CHECK-NEXT: v_cndmask_b32_e32 v3, v6, v1, vcc ; CHECK-NEXT: BB7_2: ; %Flow -; CHECK-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] -; CHECK-NEXT: s_xor_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] +; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5] ; CHECK-NEXT: s_cbranch_execz BB7_4 ; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v4 +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, 0, v4 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 ; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 -; CHECK-NEXT: v_mul_lo_u32 v2, v1, v4 -; CHECK-NEXT: v_mul_hi_u32 v3, v1, v4 -; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 -; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 -; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc -; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1 -; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v1, v2 -; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 -; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1 +; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2 +; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, v4 -; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 -; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4 -; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v2, v4 -; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1 -; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v2, v4 -; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc -; CHECK-NEXT: v_cndmask_b32_e64 v2, v3, v0, s[4:5] +; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v4 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v4 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc ; CHECK-NEXT: v_mov_b32_e32 v3, 0 ; CHECK-NEXT: BB7_4: -; CHECK-NEXT: s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] ; CHECK-NEXT: v_mov_b32_e32 v0, v2 ; CHECK-NEXT: v_mov_b32_e32 v1, v3 ; CHECK-NEXT: s_setpc_b64 s[30:31] @@ -2867,36 +2837,30 @@ ; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v7, vcc ; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v11, vcc ; CGP-NEXT: BB8_2: ; %Flow2 -; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] -; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] +; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] ; CGP-NEXT: s_cbranch_execz BB8_4 ; CGP-NEXT: ; %bb.3: ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v10 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v10 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0 -; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0 -; CGP-NEXT: v_mul_lo_u32 v1, v0, v10 -; CGP-NEXT: v_mul_hi_u32 v4, v0, v10 -; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v1 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc -; CGP-NEXT: v_mul_hi_u32 v1, v1, v0 -; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v0, v1 -; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v1 -; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc -; CGP-NEXT: v_mul_hi_u32 v0, v0, v5 +; CGP-NEXT: v_mul_lo_u32 v1, v1, v0 +; CGP-NEXT: v_mul_hi_u32 v1, v0, v1 +; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; CGP-NEXT: v_mul_hi_u32 v0, v5, v0 ; CGP-NEXT: v_mul_lo_u32 v0, v0, v10 -; CGP-NEXT: v_sub_i32_e32 v1, vcc, v5, v0 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 -; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v1, v10 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v0 -; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v1, v10 -; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc -; CGP-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v0, vcc, v5, v0 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v10 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v10 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v10 +; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc ; CGP-NEXT: v_mov_b32_e32 v1, 0 ; CGP-NEXT: BB8_4: -; CGP-NEXT: s_or_b64 exec, exec, s[8:9] +; CGP-NEXT: s_or_b64 exec, exec, s[4:5] ; CGP-NEXT: v_or_b32_e32 v5, v3, v9 ; CGP-NEXT: v_mov_b32_e32 v4, 0 ; CGP-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] @@ -3034,36 +2998,30 @@ ; CGP-NEXT: v_cndmask_b32_e32 v4, v5, v7, vcc ; CGP-NEXT: v_cndmask_b32_e32 v5, v6, v3, vcc ; CGP-NEXT: BB8_6: ; %Flow -; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[6:7] -; CGP-NEXT: s_xor_b64 exec, exec, s[8:9] +; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7] +; CGP-NEXT: s_xor_b64 exec, exec, s[4:5] ; CGP-NEXT: s_cbranch_execz BB8_8 ; CGP-NEXT: ; %bb.7: ; CGP-NEXT: v_cvt_f32_u32_e32 v3, v8 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v8 ; CGP-NEXT: v_rcp_iflag_f32_e32 v3, v3 -; CGP-NEXT: v_mul_f32_e32 v3, 0x4f800000, v3 +; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3 -; CGP-NEXT: v_mul_lo_u32 v4, v3, v8 -; CGP-NEXT: v_mul_hi_u32 v5, v3, v8 -; CGP-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 -; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 -; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc -; CGP-NEXT: v_mul_hi_u32 v4, v4, v3 -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v3, v4 -; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4 -; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; CGP-NEXT: v_mul_hi_u32 v3, v3, v2 +; CGP-NEXT: v_mul_lo_u32 v4, v4, v3 +; CGP-NEXT: v_mul_hi_u32 v4, v3, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_mul_hi_u32 v3, v2, v3 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v8 -; CGP-NEXT: v_sub_i32_e32 v4, vcc, v2, v3 -; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v8 -; CGP-NEXT: v_add_i32_e64 v5, s[4:5], v4, v8 -; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v3 -; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v4, v8 -; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] -; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc -; CGP-NEXT: v_cndmask_b32_e64 v4, v5, v2, s[4:5] +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v8 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8 +; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v8 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v8 +; CGP-NEXT: v_cndmask_b32_e32 v4, v2, v3, vcc ; CGP-NEXT: v_mov_b32_e32 v5, 0 ; CGP-NEXT: BB8_8: -; CGP-NEXT: s_or_b64 exec, exec, s[8:9] +; CGP-NEXT: s_or_b64 exec, exec, s[4:5] ; CGP-NEXT: v_mov_b32_e32 v2, v4 ; CGP-NEXT: v_mov_b32_e32 v3, v5 ; CGP-NEXT: s_setpc_b64 s[30:31] @@ -3080,28 +3038,22 @@ ; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 ; GISEL-NEXT: v_and_b32_e32 v1, s4, v2 ; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 -; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 ; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 -; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 -; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1 -; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 -; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 -; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc -; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 -; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 -; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0 +; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2 +; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3 +; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3 +; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2 ; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1 -; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v2 -; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1 -; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v3, v1 -; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2 -; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v1 -; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] -; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc -; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 +; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GISEL-NEXT: v_mov_b32_e32 v1, 0 ; GISEL-NEXT: s_setpc_b64 s[30:31] ;