Index: llvm/lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -143,6 +143,7 @@ def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -97,6 +97,11 @@ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, AMDGPUFunctionArgInfo::PreloadedValue ArgType) const; + bool legalizeUDIV_UREM(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeUDIV_UREM32(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; + bool legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; bool legalizeFDIV16(MachineInstr &MI, MachineRegisterInfo &MRI, Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -295,7 +295,7 @@ // FIXME: Not really legal. Placeholder for custom lowering. getActionDefinitionsBuilder({G_SDIV, G_UDIV, G_SREM, G_UREM}) - .legalFor({S32, S64}) + .customFor({S32, S64}) .clampScalar(0, S32, S64) .widenScalarToNextPow2(0, 32) .scalarize(0); @@ -1272,6 +1272,9 @@ return legalizeFMad(MI, MRI, B); case TargetOpcode::G_FDIV: return legalizeFDIV(MI, MRI, B); + case TargetOpcode::G_UDIV: + case TargetOpcode::G_UREM: + return legalizeUDIV_UREM(MI, MRI, B); case TargetOpcode::G_ATOMIC_CMPXCHG: return legalizeAtomicCmpXChg(MI, MRI, B); case TargetOpcode::G_FLOG: @@ -2237,6 +2240,122 @@ return false; } +static Register buildDivRCP(MachineIRBuilder &B, Register Src) { + const LLT S32 = LLT::scalar(32); + + auto Cvt0 = B.buildUITOFP(S32, Src); + auto RcpIFlag = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {S32}, {Cvt0}); + auto FPUIntMaxPlus1 = B.buildFConstant(S32, BitsToFloat(0x4f800000)); + auto Mul = B.buildFMul(S32, RcpIFlag, FPUIntMaxPlus1); + return B.buildFPTOUI(S32, Mul).getReg(0); +} + +bool AMDGPULegalizerInfo::legalizeUDIV_UREM32(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + B.setInstr(MI); + bool IsRem = MI.getOpcode() == AMDGPU::G_UREM; + + const LLT S1 = LLT::scalar(1); + const LLT S32 = LLT::scalar(32); + + Register DstReg = MI.getOperand(0).getReg(); + Register Num = MI.getOperand(1).getReg(); + Register Den = MI.getOperand(2).getReg(); + + // RCP = URECIP(Den) = 2^32 / Den + e + // e is rounding error. + auto RCP = buildDivRCP(B, Den); + + // RCP_LO = mul(RCP, Den) + auto RCP_LO = B.buildMul(S32, RCP, Den); + + // RCP_HI = mulhu (RCP, Den) */ + auto RCP_HI = B.buildUMulH(S32, RCP, Den); + + // NEG_RCP_LO = -RCP_LO + auto Zero = B.buildConstant(S32, 0); + auto NEG_RCP_LO = B.buildSub(S32, Zero, RCP_LO); + + // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) + auto CmpRcpHiZero = B.buildICmp(CmpInst::ICMP_EQ, S1, RCP_HI, Zero); + auto ABS_RCP_LO = B.buildSelect(S32, CmpRcpHiZero, NEG_RCP_LO, RCP_LO); + + // Calculate the rounding error from the URECIP instruction + // E = mulhu(ABS_RCP_LO, RCP) + auto E = B.buildUMulH(S32, ABS_RCP_LO, RCP); + + // RCP_A_E = RCP + E + auto RCP_A_E = B.buildAdd(S32, RCP, E); + + // RCP_S_E = RCP - E + auto RCP_S_E = B.buildSub(S32, RCP, E); + + // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) + auto Tmp0 = B.buildSelect(S32, CmpRcpHiZero, RCP_A_E, RCP_S_E); + + // Quotient = mulhu(Tmp0, Num)stmp + auto Quotient = B.buildUMulH(S32, Tmp0, Num); + + // Num_S_Remainder = Quotient * Den + auto Num_S_Remainder = B.buildMul(S32, Quotient, Den); + + // Remainder = Num - Num_S_Remainder + auto Remainder = B.buildSub(S32, Num, Num_S_Remainder); + + // Remainder_GE_Den = Remainder >= Den + auto Remainder_GE_Den = B.buildICmp(CmpInst::ICMP_UGE, S1, Remainder, Den); + + // Remainder_GE_Zero = Num >= Num_S_Remainder; + auto Remainder_GE_Zero = B.buildICmp(CmpInst::ICMP_UGE, S1, + Num, Num_S_Remainder); + + // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero + auto Tmp1 = B.buildAnd(S1, Remainder_GE_Den, Remainder_GE_Zero); + + // Calculate Division result: + + // Quotient_A_One = Quotient + 1 + auto One = B.buildConstant(S32, 1); + auto Quotient_A_One = B.buildAdd(S32, Quotient, One); + + // Quotient_S_One = Quotient - 1 + auto Quotient_S_One = B.buildSub(S32, Quotient, One); + + // Div = (Tmp1 == 0 ? Quotient_A_One : Quotient) + auto Div = B.buildSelect(S32, Tmp1, Quotient, Quotient_A_One); + + // Div = (Remainder_GE_Zero ? Div : Quotient_S_One) + if (IsRem) { + Div = B.buildSelect(S32, Remainder_GE_Zero, Div, Quotient_S_One); + + // Calculate Rem result: + auto Remainder_S_Den = B.buildSub(S32, Remainder, Den); + + // Remainder_A_Den = Remainder + Den + auto Remainder_A_Den = B.buildAdd(S32, Remainder, Den); + + // Rem = (Tmp1 ? Remainder_S_Den : Remainder) + auto Rem = B.buildSelect(S32, Tmp1, Remainder_S_Den, Remainder); + + // Rem = (Remainder_GE_Zero ? Rem : Remainder_A_Den) + B.buildSelect(DstReg, Remainder_GE_Zero, Rem, Remainder_A_Den); + } else { + B.buildSelect(DstReg, Remainder_GE_Zero, Div, Quotient_S_One); + } + + MI.eraseFromParent(); + return true; +} + +bool AMDGPULegalizerInfo::legalizeUDIV_UREM(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + if (MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32)) + return legalizeUDIV_UREM32(MI, MRI, B); + return false; +} + bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3114,6 +3114,7 @@ case AMDGPU::G_AMDGPU_FFBH_U32: case AMDGPU::G_AMDGPU_FMIN_LEGACY: case AMDGPU::G_AMDGPU_FMAX_LEGACY: + case AMDGPU::G_AMDGPU_RCP_IFLAG: return getDefaultMappingVOP(MI); case AMDGPU::G_UMULH: case AMDGPU::G_SMULH: { Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2144,6 +2144,12 @@ let hasSideEffects = 0; } +def G_AMDGPU_RCP_IFLAG : AMDGPUGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$src); + let hasSideEffects = 0; +} + class BufferLoadGenericInstruction : AMDGPUGenericInstruction { let OutOperandList = (outs type0:$dst); let InOperandList = (ins type1:$rsrc, type2:$vindex, type2:$voffset, Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sdiv.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_sdiv_s32 @@ -144,33 +144,36 @@ ; GFX6-LABEL: name: test_sdiv_s16 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 - ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 - ; GFX6: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SDIV]](s32) - ; GFX6: $vgpr0 = COPY [[COPY4]](s32) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) + ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16) + ; GFX6: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: test_sdiv_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 - ; GFX8: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SDIV]](s32) - ; GFX8: $vgpr0 = COPY [[COPY4]](s32) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) + ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16) + ; GFX8: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_sdiv_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 - ; GFX9: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SDIV]](s32) - ; GFX9: $vgpr0 = COPY [[COPY4]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) + ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16) + ; GFX9: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s16) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -189,79 +192,39 @@ ; GFX6-LABEL: name: test_sdiv_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 - ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 - ; GFX6: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX6: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16 - ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX6: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16 - ; GFX6: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG2]], [[SEXT_INREG3]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SDIV]](s32) - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SDIV1]](s32) - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX6: [[SDIV:%[0-9]+]]:_(s16) = G_SDIV [[UV]], [[UV2]] + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) + ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16) + ; GFX6: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV1]](s32) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SDIV]](s16), [[TRUNC]](s16) + ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX8-LABEL: name: test_sdiv_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 - ; GFX8: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX8: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16 - ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX8: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16 - ; GFX8: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG2]], [[SEXT_INREG3]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SDIV]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SDIV1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX8: [[SDIV:%[0-9]+]]:_(s16) = G_SDIV [[UV]], [[UV2]] + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) + ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16) + ; GFX8: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV1]](s32) + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SDIV]](s16), [[TRUNC]](s16) + ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_sdiv_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 - ; GFX9: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX9: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16 - ; GFX9: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG2]], [[SEXT_INREG3]] - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SDIV]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SDIV1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX9: [[SDIV:%[0-9]+]]:_(s16) = G_SDIV [[UV]], [[UV2]] + ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) + ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16) + ; GFX9: [[SDIV1:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SDIV1]](s32) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SDIV]](s16), [[TRUNC]](s16) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SDIV %0, %1 @@ -277,33 +240,36 @@ ; GFX6-LABEL: name: test_sdiv_s7 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 7 - ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7 - ; GFX6: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SDIV]](s32) - ; GFX6: $vgpr0 = COPY [[COPY4]](s32) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s7) + ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s7) + ; GFX6: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[SDIV]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: test_sdiv_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 7 - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7 - ; GFX8: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SDIV]](s32) - ; GFX8: $vgpr0 = COPY [[COPY4]](s32) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s7) + ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s7) + ; GFX8: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[SDIV]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_sdiv_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 7 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7 - ; GFX9: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SDIV]](s32) - ; GFX9: $vgpr0 = COPY [[COPY4]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s7) + ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s7) + ; GFX9: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[SDIV]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -322,33 +288,36 @@ ; GFX6-LABEL: name: test_sdiv_s17 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 17 - ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17 - ; GFX6: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SDIV]](s32) - ; GFX6: $vgpr0 = COPY [[COPY4]](s32) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s17) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s17) + ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s17) + ; GFX6: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s17) = G_TRUNC [[SDIV]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s17) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: test_sdiv_s17 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 17 - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17 - ; GFX8: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SDIV]](s32) - ; GFX8: $vgpr0 = COPY [[COPY4]](s32) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s17) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s17) + ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s17) + ; GFX8: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s17) = G_TRUNC [[SDIV]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s17) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_sdiv_s17 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 17 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17 - ; GFX9: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SDIV]](s32) - ; GFX9: $vgpr0 = COPY [[COPY4]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s17) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s17) + ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s17) + ; GFX9: [[SDIV:%[0-9]+]]:_(s32) = G_SDIV [[SEXT]], [[SEXT1]] + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s17) = G_TRUNC [[SDIV]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s17) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s17) = G_TRUNC %0 @@ -367,33 +336,36 @@ ; GFX6-LABEL: name: test_sdiv_s33 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY2]], 33 - ; GFX6: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY3]], 33 - ; GFX6: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SDIV]](s64) - ; GFX6: $vgpr0_vgpr1 = COPY [[COPY4]](s64) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) + ; GFX6: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s33) + ; GFX6: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC1]](s33) + ; GFX6: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[SEXT]], [[SEXT1]] + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[SDIV]](s64) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) + ; GFX6: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX8-LABEL: name: test_sdiv_s33 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY2]], 33 - ; GFX8: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY3]], 33 - ; GFX8: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SDIV]](s64) - ; GFX8: $vgpr0_vgpr1 = COPY [[COPY4]](s64) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) + ; GFX8: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s33) + ; GFX8: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC1]](s33) + ; GFX8: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[SEXT]], [[SEXT1]] + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[SDIV]](s64) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) + ; GFX8: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_sdiv_s33 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY2]], 33 - ; GFX9: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY3]], 33 - ; GFX9: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SDIV]](s64) - ; GFX9: $vgpr0_vgpr1 = COPY [[COPY4]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) + ; GFX9: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s33) + ; GFX9: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC1]](s33) + ; GFX9: [[SDIV:%[0-9]+]]:_(s64) = G_SDIV [[SEXT]], [[SEXT1]] + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[SDIV]](s64) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s33) = G_TRUNC %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-srem.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_srem_s32 @@ -144,39 +144,36 @@ ; GFX6-LABEL: name: test_srem_s16 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 - ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 - ; GFX6: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SREM]](s32) - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX6: $vgpr0 = COPY [[AND]](s32) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) + ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16) + ; GFX6: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SREM]](s32) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC2]](s16) + ; GFX6: $vgpr0 = COPY [[ZEXT]](s32) ; GFX8-LABEL: name: test_srem_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 - ; GFX8: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SREM]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX8: $vgpr0 = COPY [[AND]](s32) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) + ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16) + ; GFX8: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SREM]](s32) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC2]](s16) + ; GFX8: $vgpr0 = COPY [[ZEXT]](s32) ; GFX9-LABEL: name: test_srem_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 - ; GFX9: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SREM]](s32) - ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX9: $vgpr0 = COPY [[AND]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16) + ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16) + ; GFX9: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SREM]](s32) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC2]](s16) + ; GFX9: $vgpr0 = COPY [[ZEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -195,79 +192,39 @@ ; GFX6-LABEL: name: test_srem_v2s16 ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX6: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX6: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX6: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 - ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 - ; GFX6: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX6: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16 - ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX6: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16 - ; GFX6: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[SEXT_INREG2]], [[SEXT_INREG3]] - ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SREM]](s32) - ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SREM1]](s32) - ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX6: [[SREM:%[0-9]+]]:_(s16) = G_SREM [[UV]], [[UV2]] + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) + ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16) + ; GFX6: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] + ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SREM1]](s32) + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SREM]](s16), [[TRUNC]](s16) + ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX8-LABEL: name: test_srem_v2s16 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX8: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX8: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX8: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX8: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 - ; GFX8: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX8: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16 - ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX8: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16 - ; GFX8: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[SEXT_INREG2]], [[SEXT_INREG3]] - ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SREM]](s32) - ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SREM1]](s32) - ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]] - ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) - ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) + ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX8: [[SREM:%[0-9]+]]:_(s16) = G_SREM [[UV]], [[UV2]] + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) + ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16) + ; GFX8: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] + ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SREM1]](s32) + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SREM]](s16), [[TRUNC]](s16) + ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) ; GFX9-LABEL: name: test_srem_v2s16 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) - ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; GFX9: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) - ; GFX9: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 16 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) - ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 16 - ; GFX9: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 16 - ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX9: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 16 - ; GFX9: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[SEXT_INREG2]], [[SEXT_INREG3]] - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SREM]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SREM1]](s32) - ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) - ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>) + ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>) + ; GFX9: [[SREM:%[0-9]+]]:_(s16) = G_SREM [[UV]], [[UV2]] + ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[UV1]](s16) + ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[UV3]](s16) + ; GFX9: [[SREM1:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] + ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SREM1]](s32) + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SREM]](s16), [[TRUNC]](s16) + ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_SREM %0, %1 @@ -283,33 +240,36 @@ ; GFX6-LABEL: name: test_srem_s7 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 7 - ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7 - ; GFX6: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SREM]](s32) - ; GFX6: $vgpr0 = COPY [[COPY4]](s32) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s7) + ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s7) + ; GFX6: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[SREM]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: test_srem_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 7 - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7 - ; GFX8: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SREM]](s32) - ; GFX8: $vgpr0 = COPY [[COPY4]](s32) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s7) + ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s7) + ; GFX8: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[SREM]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_srem_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 7 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 7 - ; GFX9: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SREM]](s32) - ; GFX9: $vgpr0 = COPY [[COPY4]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s7) + ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s7) + ; GFX9: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[SREM]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s7) = G_TRUNC %0 @@ -328,33 +288,36 @@ ; GFX6-LABEL: name: test_srem_s17 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 17 - ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17 - ; GFX6: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SREM]](s32) - ; GFX6: $vgpr0 = COPY [[COPY4]](s32) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[COPY]](s32) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s17) = G_TRUNC [[COPY1]](s32) + ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s17) + ; GFX6: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s17) + ; GFX6: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s17) = G_TRUNC [[SREM]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s17) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX8-LABEL: name: test_srem_s17 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 17 - ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17 - ; GFX8: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SREM]](s32) - ; GFX8: $vgpr0 = COPY [[COPY4]](s32) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[COPY]](s32) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s17) = G_TRUNC [[COPY1]](s32) + ; GFX8: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s17) + ; GFX8: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s17) + ; GFX8: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s17) = G_TRUNC [[SREM]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s17) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-LABEL: name: test_srem_s17 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32) - ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 17 - ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) - ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY3]], 17 - ; GFX9: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SREM]](s32) - ; GFX9: $vgpr0 = COPY [[COPY4]](s32) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s17) = G_TRUNC [[COPY]](s32) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s17) = G_TRUNC [[COPY1]](s32) + ; GFX9: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s17) + ; GFX9: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s17) + ; GFX9: [[SREM:%[0-9]+]]:_(s32) = G_SREM [[SEXT]], [[SEXT1]] + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s17) = G_TRUNC [[SREM]](s32) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s17) + ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s17) = G_TRUNC %0 @@ -373,33 +336,36 @@ ; GFX6-LABEL: name: test_srem_s33 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; GFX6: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY2]], 33 - ; GFX6: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; GFX6: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY3]], 33 - ; GFX6: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SREM]](s64) - ; GFX6: $vgpr0_vgpr1 = COPY [[COPY4]](s64) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) + ; GFX6: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s33) + ; GFX6: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC1]](s33) + ; GFX6: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[SEXT]], [[SEXT1]] + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[SREM]](s64) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) + ; GFX6: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX8-LABEL: name: test_srem_s33 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; GFX8: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY2]], 33 - ; GFX8: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; GFX8: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY3]], 33 - ; GFX8: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SREM]](s64) - ; GFX8: $vgpr0_vgpr1 = COPY [[COPY4]](s64) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) + ; GFX8: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s33) + ; GFX8: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC1]](s33) + ; GFX8: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[SEXT]], [[SEXT1]] + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[SREM]](s64) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) + ; GFX8: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_srem_s33 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; GFX9: [[SEXT_INREG:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY2]], 33 - ; GFX9: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; GFX9: [[SEXT_INREG1:%[0-9]+]]:_(s64) = G_SEXT_INREG [[COPY3]], 33 - ; GFX9: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[SEXT_INREG]], [[SEXT_INREG1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s64) = COPY [[SREM]](s64) - ; GFX9: $vgpr0_vgpr1 = COPY [[COPY4]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) + ; GFX9: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s33) + ; GFX9: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC1]](s33) + ; GFX9: [[SREM:%[0-9]+]]:_(s64) = G_SREM [[SEXT]], [[SEXT1]] + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[SREM]](s64) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s33) = G_TRUNC %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-udiv.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_udiv_s32 @@ -12,18 +12,93 @@ ; GFX6-LABEL: name: test_udiv_s32 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[COPY]], [[COPY1]] - ; GFX6: $vgpr0 = COPY [[UDIV]](s32) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[COPY1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[COPY1]] + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[COPY]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[COPY1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[COPY1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[MUL1]] + ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C2]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD1]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX6: $vgpr0 = COPY [[SELECT3]](s32) ; GFX8-LABEL: name: test_udiv_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[COPY]], [[COPY1]] - ; GFX8: $vgpr0 = COPY [[UDIV]](s32) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[COPY1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[COPY1]] + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[COPY]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[COPY1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[COPY1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[MUL1]] + ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C2]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD1]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX8: $vgpr0 = COPY [[SELECT3]](s32) ; GFX9-LABEL: name: test_udiv_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[UDIV]](s32) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[COPY1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[COPY1]] + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[COPY]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[COPY1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[COPY1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[MUL1]] + ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C2]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD1]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX9: $vgpr0 = COPY [[SELECT3]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_UDIV %0, %1 @@ -41,27 +116,171 @@ ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[UV]], [[UV2]] - ; GFX6: [[UDIV1:%[0-9]+]]:_(s32) = G_UDIV [[UV1]], [[UV3]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UDIV]](s32), [[UDIV1]](s32) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV2]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV2]] + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[UV]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[UV2]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[UV2]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV]](s32), [[MUL1]] + ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C2]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD1]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]] + ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV3]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV3]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL2]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C1]] + ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] + ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] + ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[UV1]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[UV3]] + ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[UV3]] + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV1]](s32), [[MUL3]] + ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C2]] + ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C2]] + ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[UMULH5]], [[ADD3]] + ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB7]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT3]](s32), [[SELECT7]](s32) ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: test_udiv_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[UV]], [[UV2]] - ; GFX8: [[UDIV1:%[0-9]+]]:_(s32) = G_UDIV [[UV1]], [[UV3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UDIV]](s32), [[UDIV1]](s32) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV2]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV2]] + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[UV]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[UV2]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[UV2]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV]](s32), [[MUL1]] + ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C2]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD1]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]] + ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV3]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV3]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL2]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C1]] + ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] + ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] + ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[UV1]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[UV3]] + ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[UV3]] + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV1]](s32), [[MUL3]] + ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C2]] + ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C2]] + ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[UMULH5]], [[ADD3]] + ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB7]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT3]](s32), [[SELECT7]](s32) ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_udiv_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[UV]], [[UV2]] - ; GFX9: [[UDIV1:%[0-9]+]]:_(s32) = G_UDIV [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UDIV]](s32), [[UDIV1]](s32) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV2]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV2]] + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[UV]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[UV2]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[UV2]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV]](s32), [[MUL1]] + ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C2]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[UMULH2]], [[ADD1]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C3]] + ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV3]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV3]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL2]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C1]] + ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] + ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] + ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[UV1]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[UV3]] + ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[UV3]] + ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV1]](s32), [[MUL3]] + ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C2]] + ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C2]] + ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[UMULH5]], [[ADD3]] + ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB7]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT3]](s32), [[SELECT7]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -149,10 +368,35 @@ ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX6: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UDIV]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX6: $vgpr0 = COPY [[AND2]](s32) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[UMULH2]], [[ADD1]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GFX6: $vgpr0 = COPY [[AND3]](s32) ; GFX8-LABEL: name: test_udiv_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -161,10 +405,35 @@ ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX8: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UDIV]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX8: $vgpr0 = COPY [[AND2]](s32) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[UMULH2]], [[ADD1]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GFX8: $vgpr0 = COPY [[AND3]](s32) ; GFX9-LABEL: name: test_udiv_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -173,10 +442,35 @@ ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX9: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UDIV]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX9: $vgpr0 = COPY [[AND2]](s32) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[UMULH2]], [[ADD1]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GFX9: $vgpr0 = COPY [[AND3]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -205,18 +499,66 @@ ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX6: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[UMULH2]], [[ADD1]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; GFX6: [[UDIV1:%[0-9]+]]:_(s32) = G_UDIV [[AND2]], [[AND3]] - ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UDIV]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UDIV1]](s32) - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND4]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]] + ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[AND4]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[AND4]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] + ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] + ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] + ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[AND3]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[AND4]] + ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[MUL3]] + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[AND4]] + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND3]](s32), [[MUL3]] + ; GFX6: [[AND5:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]] + ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]] + ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s1), [[UMULH5]], [[ADD3]] + ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB7]] + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT7]](s32) + ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: test_udiv_v2s16 @@ -232,18 +574,66 @@ ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX8: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[UMULH2]], [[ADD1]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; GFX8: [[UDIV1:%[0-9]+]]:_(s32) = G_UDIV [[AND2]], [[AND3]] - ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UDIV]](s32) - ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UDIV1]](s32) - ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND4]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]] + ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[AND4]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[AND4]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] + ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] + ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] + ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[AND3]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[AND4]] + ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[MUL3]] + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[AND4]] + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND3]](s32), [[MUL3]] + ; GFX8: [[AND5:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]] + ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]] + ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s1), [[UMULH5]], [[ADD3]] + ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB7]] + ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX8: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT7]](s32) + ; GFX8: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_udiv_v2s16 @@ -259,14 +649,62 @@ ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX9: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C4]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C4]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[UMULH2]], [[ADD1]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; GFX9: [[UDIV1:%[0-9]+]]:_(s32) = G_UDIV [[AND2]], [[AND3]] - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UDIV]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UDIV1]](s32) + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND4]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX9: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C5]] + ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[AND4]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[AND4]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] + ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] + ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] + ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[AND3]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[AND4]] + ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[MUL3]] + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[AND4]] + ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND3]](s32), [[MUL3]] + ; GFX9: [[AND5:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UMULH5]], [[C4]] + ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[UMULH5]], [[C4]] + ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s1), [[UMULH5]], [[ADD3]] + ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[SUB7]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT7]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -289,8 +727,33 @@ ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX6: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UDIV]](s32) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[UMULH2]], [[ADD1]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) ; GFX6: $vgpr0 = COPY [[COPY4]](s32) ; GFX8-LABEL: name: test_udiv_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -300,8 +763,33 @@ ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX8: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UDIV]](s32) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[UMULH2]], [[ADD1]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) ; GFX8: $vgpr0 = COPY [[COPY4]](s32) ; GFX9-LABEL: name: test_udiv_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -311,8 +799,33 @@ ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX9: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UDIV]](s32) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[UMULH2]], [[ADD1]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) ; GFX9: $vgpr0 = COPY [[COPY4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -337,8 +850,33 @@ ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX6: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UDIV]](s32) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[UMULH2]], [[ADD1]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) ; GFX6: $vgpr0 = COPY [[COPY4]](s32) ; GFX8-LABEL: name: test_udiv_s17 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -348,8 +886,33 @@ ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX8: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UDIV]](s32) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[UMULH2]], [[ADD1]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) ; GFX8: $vgpr0 = COPY [[COPY4]](s32) ; GFX9-LABEL: name: test_udiv_s17 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -359,8 +922,33 @@ ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX9: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UDIV]](s32) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UMULH2]], [[C3]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[UMULH2]], [[C3]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[UMULH2]], [[ADD1]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[SUB3]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) ; GFX9: $vgpr0 = COPY [[COPY4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -380,36 +968,36 @@ ; GFX6-LABEL: name: test_udiv_s33 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 - ; GFX6: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; GFX6: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY3]], [[C]] - ; GFX6: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[AND]], [[AND1]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s64) = COPY [[UDIV]](s64) - ; GFX6: $vgpr0_vgpr1 = COPY [[COPY4]](s64) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s33) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s33) + ; GFX6: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[ZEXT]], [[ZEXT1]] + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[UDIV]](s64) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) + ; GFX6: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX8-LABEL: name: test_udiv_s33 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 - ; GFX8: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY3]], [[C]] - ; GFX8: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[AND]], [[AND1]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s64) = COPY [[UDIV]](s64) - ; GFX8: $vgpr0_vgpr1 = COPY [[COPY4]](s64) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s33) + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s33) + ; GFX8: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[ZEXT]], [[ZEXT1]] + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[UDIV]](s64) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) + ; GFX8: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_udiv_s33 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 - ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; GFX9: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; GFX9: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY3]], [[C]] - ; GFX9: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[AND]], [[AND1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s64) = COPY [[UDIV]](s64) - ; GFX9: $vgpr0_vgpr1 = COPY [[COPY4]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s33) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s33) + ; GFX9: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[ZEXT]], [[ZEXT1]] + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[UDIV]](s64) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s33) = G_TRUNC %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-urem.mir @@ -1,7 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX6 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX8 %s -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX6 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX8 %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -O0 -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s --- name: test_urem_s32 @@ -12,18 +12,90 @@ ; GFX6-LABEL: name: test_urem_s32 ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX6: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[COPY]], [[COPY1]] - ; GFX6: $vgpr0 = COPY [[UREM]](s32) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[COPY1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[COPY1]] + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[COPY]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[COPY1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[COPY1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[MUL1]] + ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[COPY1]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[COPY1]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX6: $vgpr0 = COPY [[SELECT3]](s32) ; GFX8-LABEL: name: test_urem_s32 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX8: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[COPY]], [[COPY1]] - ; GFX8: $vgpr0 = COPY [[UREM]](s32) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[COPY1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[COPY1]] + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[COPY]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[COPY1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[COPY1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[MUL1]] + ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[COPY1]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[COPY1]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX8: $vgpr0 = COPY [[SELECT3]](s32) ; GFX9-LABEL: name: test_urem_s32 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[COPY]], [[COPY1]] - ; GFX9: $vgpr0 = COPY [[UREM]](s32) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[COPY1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[COPY1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[COPY1]] + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[COPY]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[COPY1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[COPY1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY]](s32), [[MUL1]] + ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[COPY1]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[COPY1]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX9: $vgpr0 = COPY [[SELECT3]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_UREM %0, %1 @@ -41,27 +113,168 @@ ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX6: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[UV]], [[UV2]] - ; GFX6: [[UREM1:%[0-9]+]]:_(s32) = G_UREM [[UV1]], [[UV3]] - ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UREM]](s32), [[UREM1]](s32) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV2]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV2]] + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[UV]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[UV2]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[UV2]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV]](s32), [[MUL1]] + ; GFX6: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[UV2]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[UV2]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV3]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV3]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL2]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C1]] + ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] + ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] + ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[UV1]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[UV3]] + ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[UV3]] + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV1]](s32), [[MUL3]] + ; GFX6: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[UV3]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB6]], [[UV3]] + ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB7]], [[SUB6]] + ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD3]] + ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT3]](s32), [[SELECT7]](s32) ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX8-LABEL: name: test_urem_v2s32 ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX8: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[UV]], [[UV2]] - ; GFX8: [[UREM1:%[0-9]+]]:_(s32) = G_UREM [[UV1]], [[UV3]] - ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UREM]](s32), [[UREM1]](s32) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV2]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV2]] + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[UV]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[UV2]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[UV2]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV]](s32), [[MUL1]] + ; GFX8: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[UV2]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[UV2]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV3]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV3]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL2]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C1]] + ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] + ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] + ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[UV1]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[UV3]] + ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[UV3]] + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV1]](s32), [[MUL3]] + ; GFX8: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[UV3]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB6]], [[UV3]] + ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB7]], [[SUB6]] + ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD3]] + ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT3]](s32), [[SELECT7]](s32) ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) ; GFX9-LABEL: name: test_urem_v2s32 ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3 ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) - ; GFX9: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[UV]], [[UV2]] - ; GFX9: [[UREM1:%[0-9]+]]:_(s32) = G_UREM [[UV1]], [[UV3]] - ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UREM]](s32), [[UREM1]](s32) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[UV2]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[UV2]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[UV2]] + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C1]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[UV]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[UV2]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[UV]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[UV2]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV]](s32), [[MUL1]] + ; GFX9: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[UV2]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[UV2]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s1), [[SUB3]], [[SUB2]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[UV3]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C2]] + ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[UV3]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[UV3]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[MUL2]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C1]] + ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] + ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] + ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[UV1]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[UV3]] + ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[UV1]], [[MUL3]] + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[UV3]] + ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[UV1]](s32), [[MUL3]] + ; GFX9: [[AND1:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[UV3]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB6]], [[UV3]] + ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s1), [[SUB7]], [[SUB6]] + ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD3]] + ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SELECT3]](s32), [[SELECT7]](s32) ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s32>) = COPY $vgpr2_vgpr3 @@ -149,10 +362,34 @@ ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX6: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UREM]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX6: $vgpr0 = COPY [[AND2]](s32) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GFX6: $vgpr0 = COPY [[AND3]](s32) ; GFX8-LABEL: name: test_urem_s16 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -161,10 +398,34 @@ ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX8: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UREM]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX8: $vgpr0 = COPY [[AND2]](s32) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GFX8: $vgpr0 = COPY [[AND3]](s32) ; GFX9-LABEL: name: test_urem_s16 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 @@ -173,10 +434,34 @@ ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX9: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UREM]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; GFX9: $vgpr0 = COPY [[AND2]](s32) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; GFX9: $vgpr0 = COPY [[AND3]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 @@ -205,18 +490,65 @@ ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX6: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; GFX6: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX6: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; GFX6: [[UREM1:%[0-9]+]]:_(s32) = G_UREM [[AND2]], [[AND3]] - ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UREM]](s32) - ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UREM1]](s32) - ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] - ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; GFX6: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX6: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND4]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX6: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]] + ; GFX6: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[AND4]] + ; GFX6: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[AND4]] + ; GFX6: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] + ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] + ; GFX6: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX6: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] + ; GFX6: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[AND3]] + ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[AND4]] + ; GFX6: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[MUL3]] + ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[AND4]] + ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND3]](s32), [[MUL3]] + ; GFX6: [[AND5:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX6: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[AND4]] + ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB6]], [[AND4]] + ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s1), [[SUB7]], [[SUB6]] + ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD3]] + ; GFX6: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX6: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX6: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT7]](s32) + ; GFX6: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX6: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] ; GFX6: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX6: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX8-LABEL: name: test_urem_v2s16 @@ -232,18 +564,65 @@ ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX8: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; GFX8: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX8: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; GFX8: [[UREM1:%[0-9]+]]:_(s32) = G_UREM [[AND2]], [[AND3]] - ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UREM]](s32) - ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] - ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UREM1]](s32) - ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] - ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C]](s32) - ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SHL]] + ; GFX8: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX8: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND4]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX8: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]] + ; GFX8: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[AND4]] + ; GFX8: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[AND4]] + ; GFX8: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] + ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] + ; GFX8: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX8: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] + ; GFX8: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[AND3]] + ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[AND4]] + ; GFX8: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[MUL3]] + ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[AND4]] + ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND3]](s32), [[MUL3]] + ; GFX8: [[AND5:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX8: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[AND4]] + ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB6]], [[AND4]] + ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s1), [[SUB7]], [[SUB6]] + ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD3]] + ; GFX8: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX8: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C1]] + ; GFX8: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT7]](s32) + ; GFX8: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C1]] + ; GFX8: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C]](s32) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[SHL]] ; GFX8: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; GFX8: $vgpr0 = COPY [[BITCAST2]](<2 x s16>) ; GFX9-LABEL: name: test_urem_v2s16 @@ -259,14 +638,61 @@ ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]] ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32) ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] - ; GFX9: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C2]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C3]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; GFX9: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] + ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C1]] ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) - ; GFX9: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] - ; GFX9: [[UREM1:%[0-9]+]]:_(s32) = G_UREM [[AND2]], [[AND3]] - ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UREM]](s32) - ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UREM1]](s32) + ; GFX9: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; GFX9: [[UITOFP1:%[0-9]+]]:_(s32) = G_UITOFP [[AND4]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG1:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP1]](s32) + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL1:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG1]], [[C4]] + ; GFX9: [[FPTOUI1:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL1]](s32) + ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI1]], [[AND4]] + ; GFX9: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI1]], [[AND4]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[MUL2]] + ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH3]](s32), [[C3]] + ; GFX9: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB4]], [[MUL2]] + ; GFX9: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[SELECT4]], [[FPTOUI1]] + ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI1]], [[UMULH4]] + ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI1]], [[UMULH4]] + ; GFX9: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[ADD2]], [[SUB5]] + ; GFX9: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[SELECT5]], [[AND3]] + ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UMULH5]], [[AND4]] + ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[AND3]], [[MUL3]] + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB6]](s32), [[AND4]] + ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND3]](s32), [[MUL3]] + ; GFX9: [[AND5:%[0-9]+]]:_(s1) = G_AND [[ICMP4]], [[ICMP5]] + ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[SUB6]], [[AND4]] + ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SUB6]], [[AND4]] + ; GFX9: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[AND5]](s1), [[SUB7]], [[SUB6]] + ; GFX9: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[SELECT6]], [[ADD3]] + ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) + ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SELECT7]](s32) ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY6]](s32), [[COPY7]](s32) ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 @@ -289,8 +715,32 @@ ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX6: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UREM]](s32) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) ; GFX6: $vgpr0 = COPY [[COPY4]](s32) ; GFX8-LABEL: name: test_urem_s7 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -300,8 +750,32 @@ ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX8: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UREM]](s32) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) ; GFX8: $vgpr0 = COPY [[COPY4]](s32) ; GFX9-LABEL: name: test_urem_s7 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -311,8 +785,32 @@ ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX9: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UREM]](s32) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) ; GFX9: $vgpr0 = COPY [[COPY4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -337,8 +835,32 @@ ; GFX6: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; GFX6: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX6: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX6: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UREM]](s32) + ; GFX6: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX6: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX6: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX6: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX6: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX6: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX6: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] + ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX6: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) ; GFX6: $vgpr0 = COPY [[COPY4]](s32) ; GFX8-LABEL: name: test_urem_s17 ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -348,8 +870,32 @@ ; GFX8: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; GFX8: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX8: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX8: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UREM]](s32) + ; GFX8: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX8: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX8: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX8: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX8: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX8: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX8: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] + ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX8: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) ; GFX8: $vgpr0 = COPY [[COPY4]](s32) ; GFX9-LABEL: name: test_urem_s17 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 @@ -359,8 +905,32 @@ ; GFX9: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32) ; GFX9: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] - ; GFX9: [[UREM:%[0-9]+]]:_(s32) = G_UREM [[AND]], [[AND1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UREM]](s32) + ; GFX9: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[AND1]](s32) + ; GFX9: [[AMDGPU_RCP_IFLAG:%[0-9]+]]:_(s32) = G_AMDGPU_RCP_IFLAG [[UITOFP]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 0x41F0000000000000 + ; GFX9: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[AMDGPU_RCP_IFLAG]], [[C1]] + ; GFX9: [[FPTOUI:%[0-9]+]]:_(s32) = G_FPTOUI [[FMUL]](s32) + ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[FPTOUI]], [[AND1]] + ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[FPTOUI]], [[AND1]] + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[MUL]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UMULH]](s32), [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[MUL]] + ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[SELECT]], [[FPTOUI]] + ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[FPTOUI]], [[UMULH1]] + ; GFX9: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[SUB1]] + ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[SELECT1]], [[AND]] + ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UMULH2]], [[AND1]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[AND]], [[MUL1]] + ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[SUB2]](s32), [[AND1]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[AND]](s32), [[MUL1]] + ; GFX9: [[AND2:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP2]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[SUB2]], [[AND1]] + ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB2]], [[AND1]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s1), [[SUB3]], [[SUB2]] + ; GFX9: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[SELECT2]], [[ADD1]] + ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT3]](s32) ; GFX9: $vgpr0 = COPY [[COPY4]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 @@ -380,36 +950,36 @@ ; GFX6-LABEL: name: test_urem_s33 ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX6: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 - ; GFX6: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; GFX6: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY3]], [[C]] - ; GFX6: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[AND]], [[AND1]] - ; GFX6: [[COPY4:%[0-9]+]]:_(s64) = COPY [[UREM]](s64) - ; GFX6: $vgpr0_vgpr1 = COPY [[COPY4]](s64) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) + ; GFX6: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) + ; GFX6: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s33) + ; GFX6: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s33) + ; GFX6: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[ZEXT]], [[ZEXT1]] + ; GFX6: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[UREM]](s64) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) + ; GFX6: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX8-LABEL: name: test_urem_s33 ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX8: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 - ; GFX8: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; GFX8: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY3]], [[C]] - ; GFX8: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[AND]], [[AND1]] - ; GFX8: [[COPY4:%[0-9]+]]:_(s64) = COPY [[UREM]](s64) - ; GFX8: $vgpr0_vgpr1 = COPY [[COPY4]](s64) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) + ; GFX8: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) + ; GFX8: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s33) + ; GFX8: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s33) + ; GFX8: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[ZEXT]], [[ZEXT1]] + ; GFX8: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[UREM]](s64) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) + ; GFX8: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) ; GFX9-LABEL: name: test_urem_s33 ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 - ; GFX9: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934591 - ; GFX9: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[C]] - ; GFX9: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; GFX9: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY3]], [[C]] - ; GFX9: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[AND]], [[AND1]] - ; GFX9: [[COPY4:%[0-9]+]]:_(s64) = COPY [[UREM]](s64) - ; GFX9: $vgpr0_vgpr1 = COPY [[COPY4]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]](s64) + ; GFX9: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[COPY1]](s64) + ; GFX9: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s33) + ; GFX9: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC1]](s33) + ; GFX9: [[UREM:%[0-9]+]]:_(s64) = G_UREM [[ZEXT]], [[ZEXT1]] + ; GFX9: [[TRUNC2:%[0-9]+]]:_(s33) = G_TRUNC [[UREM]](s64) + ; GFX9: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC2]](s33) + ; GFX9: $vgpr0_vgpr1 = COPY [[ANYEXT]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s33) = G_TRUNC %0 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll @@ -1,169 +1,654 @@ -; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion -mtriple=amdgcn-amd-amdhsa -stop-after=irtranslator < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,GISEL %s +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,CGP %s + +; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. define i32 @udiv_i32(i32 %num, i32 %den) { - ; CHECK-LABEL: name: udiv_i32 - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[COPY]], [[COPY1]] - ; CHECK: $vgpr0 = COPY [[UDIV]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 +; GISEL-LABEL: udiv_i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 +; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2 +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0 +; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v2 +; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2 +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v1 +; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v2, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: udiv_i32: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 +; CGP-NEXT: v_mul_lo_u32 v3, 0, v1 +; CGP-NEXT: v_mul_lo_u32 v5, 0, v0 +; CGP-NEXT: v_rcp_f32_e32 v2, v2 +; CGP-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v2 +; CGP-NEXT: v_mul_lo_u32 v2, v6, v1 +; CGP-NEXT: v_mul_lo_u32 v7, v6, 0 +; CGP-NEXT: v_mul_hi_u32 v4, v6, v1 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_lshr_b64 v[3:4], v[2:3], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v3, v2, v8, vcc +; CGP-NEXT: v_mul_lo_u32 v2, v3, v6 +; CGP-NEXT: v_mul_lo_u32 v4, v3, 0 +; CGP-NEXT: v_mul_hi_u32 v3, v3, v6 +; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v7, v4 +; CGP-NEXT: v_add_i32_e64 v3, s[4:5], v4, v3 +; CGP-NEXT: v_lshr_b64 v[2:3], v[2:3], 32 +; CGP-NEXT: v_add_i32_e64 v3, s[4:5], v6, v2 +; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v6, v2 +; CGP-NEXT: v_cndmask_b32_e32 v3, v2, v3, vcc +; CGP-NEXT: v_mul_lo_u32 v2, v3, v0 +; CGP-NEXT: v_mul_lo_u32 v4, v3, 0 +; CGP-NEXT: v_mul_hi_u32 v3, v3, v0 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 +; CGP-NEXT: v_lshr_b64 v[2:3], v[2:3], 32 +; CGP-NEXT: v_mul_lo_u32 v3, v2, v1 +; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2 +; CGP-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, v0, v3 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v1 +; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v2, v4, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; CGP-NEXT: s_setpc_b64 s[30:31] %result = udiv i32 %num, %den ret i32 %result } define <2 x i32> @udiv_v2i32(<2 x i32> %num, <2 x i32> %den) { - ; CHECK-LABEL: name: udiv_v2i32 - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[UDIV:%[0-9]+]]:_(<2 x s32>) = G_UDIV [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UDIV]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 +; GISEL-LABEL: udiv_v2i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 +; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 +; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2 +; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3 +; GISEL-NEXT: v_mul_hi_u32 v9, v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 +; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4 +; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v4, v6 +; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7 +; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0 +; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1 +; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4 +; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, 1, v4 +; GISEL-NEXT: v_mul_lo_u32 v9, v5, v3 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v5 +; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, 1, v5 +; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v0, v6 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6 +; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v9 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v9 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3 +; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v7, v4, s[6:7] +; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v1, v10, v5, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v1, v11, v1, s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: udiv_v2i32: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 +; CGP-NEXT: v_mul_lo_u32 v5, 0, v2 +; CGP-NEXT: v_mul_lo_u32 v11, 0, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 +; CGP-NEXT: v_mul_lo_u32 v7, 0, v3 +; CGP-NEXT: v_mul_lo_u32 v12, 0, v1 +; CGP-NEXT: v_rcp_f32_e32 v4, v4 +; CGP-NEXT: v_rcp_f32_e32 v6, v6 +; CGP-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; CGP-NEXT: v_mul_f32_e32 v6, 0x4f800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v4 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v6 +; CGP-NEXT: v_mul_lo_u32 v4, v13, v2 +; CGP-NEXT: v_mul_lo_u32 v15, v13, 0 +; CGP-NEXT: v_mul_hi_u32 v8, v13, v2 +; CGP-NEXT: v_mul_lo_u32 v6, v14, v3 +; CGP-NEXT: v_mul_lo_u32 v16, v14, 0 +; CGP-NEXT: v_mul_hi_u32 v9, v14, v3 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v15 +; CGP-NEXT: v_sub_i32_e32 v17, vcc, 0, v4 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v16 +; CGP-NEXT: v_sub_i32_e32 v18, vcc, 0, v6 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_lshr_b64 v[8:9], v[4:5], 32 +; CGP-NEXT: v_lshr_b64 v[9:10], v[6:7], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 +; CGP-NEXT: v_cndmask_b32_e32 v5, v4, v17, vcc +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; CGP-NEXT: v_cndmask_b32_e64 v7, v6, v18, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v4, v5, v13 +; CGP-NEXT: v_mul_lo_u32 v8, v5, 0 +; CGP-NEXT: v_mul_hi_u32 v5, v5, v13 +; CGP-NEXT: v_mul_lo_u32 v6, v7, v14 +; CGP-NEXT: v_mul_lo_u32 v9, v7, 0 +; CGP-NEXT: v_mul_hi_u32 v7, v7, v14 +; CGP-NEXT: v_add_i32_e64 v8, s[6:7], v15, v8 +; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v16, v9 +; CGP-NEXT: v_add_i32_e64 v5, s[6:7], v8, v5 +; CGP-NEXT: v_add_i32_e64 v7, s[6:7], v9, v7 +; CGP-NEXT: v_lshr_b64 v[4:5], v[4:5], 32 +; CGP-NEXT: v_lshr_b64 v[5:6], v[6:7], 32 +; CGP-NEXT: v_add_i32_e64 v6, s[6:7], v13, v4 +; CGP-NEXT: v_sub_i32_e64 v4, s[6:7], v13, v4 +; CGP-NEXT: v_add_i32_e64 v7, s[6:7], v14, v5 +; CGP-NEXT: v_sub_i32_e64 v5, s[6:7], v14, v5 +; CGP-NEXT: v_cndmask_b32_e32 v6, v4, v6, vcc +; CGP-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v4, v6, v0 +; CGP-NEXT: v_mul_lo_u32 v7, v6, 0 +; CGP-NEXT: v_mul_hi_u32 v8, v6, v0 +; CGP-NEXT: v_mul_lo_u32 v6, v5, v1 +; CGP-NEXT: v_mul_lo_u32 v9, v5, 0 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v1 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v11, v7 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v12, v9 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_lshr_b64 v[4:5], v[4:5], 32 +; CGP-NEXT: v_lshr_b64 v[5:6], v[6:7], 32 +; CGP-NEXT: v_mul_lo_u32 v6, v4, v2 +; CGP-NEXT: v_add_i32_e32 v7, vcc, 1, v4 +; CGP-NEXT: v_subrev_i32_e32 v8, vcc, 1, v4 +; CGP-NEXT: v_mul_lo_u32 v9, v5, v3 +; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v5 +; CGP-NEXT: v_subrev_i32_e32 v11, vcc, 1, v5 +; CGP-NEXT: v_sub_i32_e32 v12, vcc, v0, v6 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v9 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v9 +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3 +; CGP-NEXT: s_and_b64 s[6:7], s[6:7], vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v4, v7, s[6:7] +; CGP-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v1, v5, v10, s[6:7] +; CGP-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; CGP-NEXT: v_cndmask_b32_e64 v1, v11, v1, s[4:5] +; CGP-NEXT: s_setpc_b64 s[30:31] %result = udiv <2 x i32> %num, %den ret <2 x i32> %result } define i32 @udiv_i32_pow2k_denom(i32 %num) { - ; CHECK-LABEL: name: udiv_i32_pow2k_denom - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4096 - ; CHECK: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[COPY]], [[C]] - ; CHECK: $vgpr0 = COPY [[UDIV]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 +; CHECK-LABEL: udiv_i32_pow2k_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_movk_i32 s6, 0x1000 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s6 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_lo_u32 v2, v1, s6 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, s6 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, 0, v2 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1 +; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v1, v2 +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mul_lo_u32 v2, v1, s6 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1 +; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, 1, v1 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v5 +; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v3, v1, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] %result = udiv i32 %num, 4096 ret i32 %result } define <2 x i32> @udiv_v2i32_pow2k_denom(<2 x i32> %num) { - ; CHECK-LABEL: name: udiv_v2i32_pow2k_denom - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4096 - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; CHECK: [[UDIV:%[0-9]+]]:_(<2 x s32>) = G_UDIV [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UDIV]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 +; CHECK-LABEL: udiv_v2i32_pow2k_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_movk_i32 s8, 0x1000 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s8 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v2 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, v3, s8 +; CHECK-NEXT: v_mul_hi_u32 v5, v3, s8 +; CHECK-NEXT: v_mul_lo_u32 v6, v2, s8 +; CHECK-NEXT: v_mul_hi_u32 v7, v2, s8 +; CHECK-NEXT: v_sub_i32_e32 v8, vcc, 0, v4 +; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v6 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v5, v6, v9, s[4:5] +; CHECK-NEXT: v_mul_hi_u32 v4, v4, v3 +; CHECK-NEXT: v_mul_hi_u32 v5, v5, v2 +; CHECK-NEXT: v_add_i32_e64 v6, s[6:7], v3, v4 +; CHECK-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v4 +; CHECK-NEXT: v_add_i32_e64 v4, s[6:7], v2, v5 +; CHECK-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5] +; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0 +; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1 +; CHECK-NEXT: v_mul_lo_u32 v4, v3, s8 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v3 +; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, 1, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, v2, s8 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v2 +; CHECK-NEXT: v_subrev_i32_e32 v9, vcc, 1, v2 +; CHECK-NEXT: v_sub_i32_e32 v10, vcc, v0, v4 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v7 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v7 +; CHECK-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v10 +; CHECK-NEXT: v_cmp_le_u32_e64 s[8:9], s8, v0 +; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v5, v3, s[6:7] +; CHECK-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] +; CHECK-NEXT: v_cndmask_b32_e64 v1, v8, v2, s[6:7] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[4:5] +; CHECK-NEXT: s_setpc_b64 s[30:31] %result = udiv <2 x i32> %num, ret <2 x i32> %result } define i32 @udiv_i32_oddk_denom(i32 %num) { - ; CHECK-LABEL: name: udiv_i32_oddk_denom - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1235195 - ; CHECK: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[COPY]], [[C]] - ; CHECK: $vgpr0 = COPY [[UDIV]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]] - ; CHECK: S_SETPC_B64_return [[COPY2]], implicit $vgpr0 +; CHECK-LABEL: udiv_i32_oddk_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s6 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_lo_u32 v2, v1, s6 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, s6 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, 0, v2 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1 +; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v1, v2 +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mul_lo_u32 v2, v1, s6 +; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v1 +; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, 1, v1 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v2 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 +; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s6, v5 +; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v3, v1, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] %result = udiv i32 %num, 1235195 ret i32 %result } define <2 x i32> @udiv_v2i32_oddk_denom(<2 x i32> %num) { - ; CHECK-LABEL: name: udiv_v2i32_oddk_denom - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1235195 - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; CHECK: [[UDIV:%[0-9]+]]:_(<2 x s32>) = G_UDIV [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UDIV]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0, implicit $vgpr1 +; CHECK-LABEL: udiv_v2i32_oddk_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b32 s8, 0x12d8fb +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s8 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v2 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, v3, s8 +; CHECK-NEXT: v_mul_hi_u32 v5, v3, s8 +; CHECK-NEXT: v_mul_lo_u32 v6, v2, s8 +; CHECK-NEXT: v_mul_hi_u32 v7, v2, s8 +; CHECK-NEXT: v_sub_i32_e32 v8, vcc, 0, v4 +; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v6 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v5, v6, v9, s[4:5] +; CHECK-NEXT: v_mul_hi_u32 v4, v4, v3 +; CHECK-NEXT: v_mul_hi_u32 v5, v5, v2 +; CHECK-NEXT: v_add_i32_e64 v6, s[6:7], v3, v4 +; CHECK-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v4 +; CHECK-NEXT: v_add_i32_e64 v4, s[6:7], v2, v5 +; CHECK-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5] +; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0 +; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1 +; CHECK-NEXT: v_mul_lo_u32 v4, v3, s8 +; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v3 +; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, 1, v3 +; CHECK-NEXT: v_mul_lo_u32 v7, v2, s8 +; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v2 +; CHECK-NEXT: v_subrev_i32_e32 v9, vcc, 1, v2 +; CHECK-NEXT: v_sub_i32_e32 v10, vcc, v0, v4 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v7 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v7 +; CHECK-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v10 +; CHECK-NEXT: v_cmp_le_u32_e64 s[8:9], s8, v0 +; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v5, v3, s[6:7] +; CHECK-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] +; CHECK-NEXT: v_cndmask_b32_e64 v1, v8, v2, s[6:7] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[4:5] +; CHECK-NEXT: s_setpc_b64 s[30:31] %result = udiv <2 x i32> %num, ret <2 x i32> %result } define i32 @udiv_i32_pow2_shl_denom(i32 %x, i32 %y) { - ; CHECK-LABEL: name: udiv_i32_pow2_shl_denom - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4096 - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[COPY1]](s32) - ; CHECK: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[COPY]], [[SHL]] - ; CHECK: $vgpr0 = COPY [[UDIV]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 +; CHECK-LABEL: udiv_i32_pow2_shl_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_lshl_b32_e32 v1, 0x1000, v1 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v1 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_lo_u32 v3, v2, v1 +; CHECK-NEXT: v_mul_hi_u32 v4, v2, v1 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2 +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 +; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; CHECK-NEXT: v_mul_hi_u32 v2, v2, v0 +; CHECK-NEXT: v_mul_lo_u32 v3, v2, v1 +; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v2 +; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2 +; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v0, v3 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v1 +; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v4, v2, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; CHECK-NEXT: s_setpc_b64 s[30:31] %shl.y = shl i32 4096, %y %r = udiv i32 %x, %shl.y ret i32 %r } define <2 x i32> @udiv_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) { - ; CHECK-LABEL: name: udiv_v2i32_pow2_shl_denom - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4096 - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; CHECK: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL [[BUILD_VECTOR2]], [[BUILD_VECTOR1]](<2 x s32>) - ; CHECK: [[UDIV:%[0-9]+]]:_(<2 x s32>) = G_UDIV [[BUILD_VECTOR]], [[SHL]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UDIV]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 +; GISEL-LABEL: udiv_v2i32_pow2_shl_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_movk_i32 s4, 0x1000 +; GISEL-NEXT: v_lshl_b32_e32 v2, s4, v2 +; GISEL-NEXT: v_lshl_b32_e32 v3, s4, v3 +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 +; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 +; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2 +; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3 +; GISEL-NEXT: v_mul_hi_u32 v9, v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 +; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4 +; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v4, v6 +; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7 +; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0 +; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1 +; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4 +; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, 1, v4 +; GISEL-NEXT: v_mul_lo_u32 v9, v5, v3 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v5 +; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, 1, v5 +; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v0, v6 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6 +; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v9 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v9 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3 +; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v7, v4, s[6:7] +; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v1, v10, v5, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v1, v11, v1, s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: udiv_v2i32_pow2_shl_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_movk_i32 s4, 0x1000 +; CGP-NEXT: v_mul_lo_u32 v9, 0, v0 +; CGP-NEXT: v_mul_lo_u32 v10, 0, v1 +; CGP-NEXT: v_lshl_b32_e32 v11, s4, v2 +; CGP-NEXT: v_lshl_b32_e32 v12, s4, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v11 +; CGP-NEXT: v_mul_lo_u32 v3, 0, v11 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, v12 +; CGP-NEXT: v_mul_lo_u32 v5, 0, v12 +; CGP-NEXT: v_rcp_f32_e32 v2, v2 +; CGP-NEXT: v_rcp_f32_e32 v4, v4 +; CGP-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CGP-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v4 +; CGP-NEXT: v_mul_lo_u32 v2, v13, v11 +; CGP-NEXT: v_mul_lo_u32 v15, v13, 0 +; CGP-NEXT: v_mul_hi_u32 v6, v13, v11 +; CGP-NEXT: v_mul_lo_u32 v4, v14, v12 +; CGP-NEXT: v_mul_lo_u32 v16, v14, 0 +; CGP-NEXT: v_mul_hi_u32 v7, v14, v12 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v15 +; CGP-NEXT: v_sub_i32_e32 v17, vcc, 0, v2 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v16 +; CGP-NEXT: v_sub_i32_e32 v18, vcc, 0, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CGP-NEXT: v_lshr_b64 v[6:7], v[2:3], 32 +; CGP-NEXT: v_lshr_b64 v[7:8], v[4:5], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 +; CGP-NEXT: v_cndmask_b32_e32 v3, v2, v17, vcc +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7 +; CGP-NEXT: v_cndmask_b32_e64 v5, v4, v18, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v2, v3, v13 +; CGP-NEXT: v_mul_lo_u32 v6, v3, 0 +; CGP-NEXT: v_mul_hi_u32 v3, v3, v13 +; CGP-NEXT: v_mul_lo_u32 v4, v5, v14 +; CGP-NEXT: v_mul_lo_u32 v7, v5, 0 +; CGP-NEXT: v_mul_hi_u32 v5, v5, v14 +; CGP-NEXT: v_add_i32_e64 v6, s[6:7], v15, v6 +; CGP-NEXT: v_add_i32_e64 v7, s[6:7], v16, v7 +; CGP-NEXT: v_add_i32_e64 v3, s[6:7], v6, v3 +; CGP-NEXT: v_add_i32_e64 v5, s[6:7], v7, v5 +; CGP-NEXT: v_lshr_b64 v[2:3], v[2:3], 32 +; CGP-NEXT: v_lshr_b64 v[3:4], v[4:5], 32 +; CGP-NEXT: v_add_i32_e64 v4, s[6:7], v13, v2 +; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v13, v2 +; CGP-NEXT: v_add_i32_e64 v5, s[6:7], v14, v3 +; CGP-NEXT: v_sub_i32_e64 v3, s[6:7], v14, v3 +; CGP-NEXT: v_cndmask_b32_e32 v4, v2, v4, vcc +; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v2, v4, v0 +; CGP-NEXT: v_mul_lo_u32 v5, v4, 0 +; CGP-NEXT: v_mul_hi_u32 v6, v4, v0 +; CGP-NEXT: v_mul_lo_u32 v4, v3, v1 +; CGP-NEXT: v_mul_lo_u32 v7, v3, 0 +; CGP-NEXT: v_mul_hi_u32 v8, v3, v1 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v9, v5 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v7 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_lshr_b64 v[2:3], v[2:3], 32 +; CGP-NEXT: v_lshr_b64 v[3:4], v[4:5], 32 +; CGP-NEXT: v_mul_lo_u32 v4, v2, v11 +; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2 +; CGP-NEXT: v_subrev_i32_e32 v6, vcc, 1, v2 +; CGP-NEXT: v_mul_lo_u32 v7, v3, v12 +; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v3 +; CGP-NEXT: v_subrev_i32_e32 v9, vcc, 1, v3 +; CGP-NEXT: v_sub_i32_e32 v10, vcc, v0, v4 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v7 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v7 +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v10, v11 +; CGP-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v12 +; CGP-NEXT: s_and_b64 s[6:7], s[6:7], vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v2, v5, s[6:7] +; CGP-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v1, v3, v8, s[6:7] +; CGP-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CGP-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[4:5] +; CGP-NEXT: s_setpc_b64 s[30:31] %shl.y = shl <2 x i32> , %y %r = udiv <2 x i32> %x, %shl.y ret <2 x i32> %r } define i32 @udiv_i32_24bit(i32 %num, i32 %den) { - ; CHECK-LABEL: name: udiv_i32_24bit - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] - ; CHECK: [[UDIV:%[0-9]+]]:_(s32) = G_UDIV [[AND]], [[AND1]] - ; CHECK: $vgpr0 = COPY [[UDIV]](s32) - ; CHECK: [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]] - ; CHECK: S_SETPC_B64_return [[COPY3]], implicit $vgpr0 +; GISEL-LABEL: udiv_i32_24bit: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s4, 0xffffff +; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 +; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 +; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 +; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2 +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0 +; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 +; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v2 +; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, 1, v2 +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v3 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v1 +; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v2, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: udiv_i32_24bit: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s4, 0xffffff +; CGP-NEXT: v_and_b32_e32 v3, s4, v0 +; CGP-NEXT: v_and_b32_e32 v4, s4, v1 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 +; CGP-NEXT: v_mul_lo_u32 v1, 0, v4 +; CGP-NEXT: v_mul_lo_u32 v5, 0, v3 +; CGP-NEXT: v_rcp_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v0 +; CGP-NEXT: v_mul_lo_u32 v0, v6, v4 +; CGP-NEXT: v_mul_lo_u32 v7, v6, 0 +; CGP-NEXT: v_mul_hi_u32 v2, v6, v4 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v7 +; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CGP-NEXT: v_lshr_b64 v[1:2], v[0:1], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v8, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v1, v6 +; CGP-NEXT: v_mul_lo_u32 v2, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v6 +; CGP-NEXT: v_add_i32_e64 v2, s[4:5], v7, v2 +; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v2, v1 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v6, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v6, v0 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v1, v3 +; CGP-NEXT: v_mul_lo_u32 v2, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_mul_lo_u32 v1, v0, v4 +; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0 +; CGP-NEXT: v_subrev_i32_e32 v5, vcc, 1, v0 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, v3, v1 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v4 +; CGP-NEXT: s_and_b64 s[4:5], s[4:5], vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc +; CGP-NEXT: s_setpc_b64 s[30:31] %num.mask = and i32 %num, 16777215 %den.mask = and i32 %den, 16777215 %result = udiv i32 %num.mask, %den.mask @@ -171,26 +656,149 @@ } define <2 x i32> @udiv_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) { - ; CHECK-LABEL: name: udiv_v2i32_24bit - ; CHECK: bb.1 (%ir-block.0): - ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) - ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16777215 - ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR2]] - ; CHECK: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; CHECK: [[UDIV:%[0-9]+]]:_(<2 x s32>) = G_UDIV [[AND]], [[AND1]] - ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UDIV]](<2 x s32>) - ; CHECK: $vgpr0 = COPY [[UV]](s32) - ; CHECK: $vgpr1 = COPY [[UV1]](s32) - ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] - ; CHECK: S_SETPC_B64_return [[COPY5]], implicit $vgpr0, implicit $vgpr1 +; GISEL-LABEL: udiv_v2i32_24bit: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s4, 0xffffff +; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 +; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 +; GISEL-NEXT: v_and_b32_e32 v2, s4, v2 +; GISEL-NEXT: v_and_b32_e32 v3, s4, v3 +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 +; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 +; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2 +; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3 +; GISEL-NEXT: v_mul_hi_u32 v9, v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 +; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4 +; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v4, v6 +; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7 +; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0 +; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1 +; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 +; GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v4 +; GISEL-NEXT: v_subrev_i32_e32 v8, vcc, 1, v4 +; GISEL-NEXT: v_mul_lo_u32 v9, v5, v3 +; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v5 +; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, 1, v5 +; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v0, v6 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v6 +; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v1, v9 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v9 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v0, v3 +; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v7, v4, s[6:7] +; GISEL-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v1, v10, v5, s[6:7] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v1, v11, v1, s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: udiv_v2i32_24bit: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s4, 0xffffff +; CGP-NEXT: v_and_b32_e32 v7, s4, v0 +; CGP-NEXT: v_and_b32_e32 v8, s4, v1 +; CGP-NEXT: v_and_b32_e32 v9, s4, v2 +; CGP-NEXT: v_and_b32_e32 v10, s4, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v9 +; CGP-NEXT: v_mul_lo_u32 v1, 0, v9 +; CGP-NEXT: v_mul_lo_u32 v11, 0, v7 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v10 +; CGP-NEXT: v_mul_lo_u32 v3, 0, v10 +; CGP-NEXT: v_mul_lo_u32 v12, 0, v8 +; CGP-NEXT: v_rcp_f32_e32 v0, v0 +; CGP-NEXT: v_rcp_f32_e32 v2, v2 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v2 +; CGP-NEXT: v_mul_lo_u32 v0, v13, v9 +; CGP-NEXT: v_mul_lo_u32 v15, v13, 0 +; CGP-NEXT: v_mul_hi_u32 v4, v13, v9 +; CGP-NEXT: v_mul_lo_u32 v2, v14, v10 +; CGP-NEXT: v_mul_lo_u32 v16, v14, 0 +; CGP-NEXT: v_mul_hi_u32 v5, v14, v10 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v15 +; CGP-NEXT: v_sub_i32_e32 v17, vcc, 0, v0 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v16 +; CGP-NEXT: v_sub_i32_e32 v18, vcc, 0, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CGP-NEXT: v_lshr_b64 v[4:5], v[0:1], 32 +; CGP-NEXT: v_lshr_b64 v[5:6], v[2:3], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v17, vcc +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v3, v2, v18, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v0, v1, v13 +; CGP-NEXT: v_mul_lo_u32 v4, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v13 +; CGP-NEXT: v_mul_lo_u32 v2, v3, v14 +; CGP-NEXT: v_mul_lo_u32 v5, v3, 0 +; CGP-NEXT: v_mul_hi_u32 v3, v3, v14 +; CGP-NEXT: v_add_i32_e64 v4, s[6:7], v15, v4 +; CGP-NEXT: v_add_i32_e64 v5, s[6:7], v16, v5 +; CGP-NEXT: v_add_i32_e64 v1, s[6:7], v4, v1 +; CGP-NEXT: v_add_i32_e64 v3, s[6:7], v5, v3 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_lshr_b64 v[1:2], v[2:3], 32 +; CGP-NEXT: v_add_i32_e64 v2, s[6:7], v13, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v13, v0 +; CGP-NEXT: v_add_i32_e64 v3, s[6:7], v14, v1 +; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v14, v1 +; CGP-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc +; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v0, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v3, v2, 0 +; CGP-NEXT: v_mul_hi_u32 v4, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v2, v1, v8 +; CGP-NEXT: v_mul_lo_u32 v5, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v6, v1, v8 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v3 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v5 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_lshr_b64 v[1:2], v[2:3], 32 +; CGP-NEXT: v_mul_lo_u32 v2, v0, v9 +; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0 +; CGP-NEXT: v_subrev_i32_e32 v4, vcc, 1, v0 +; CGP-NEXT: v_mul_lo_u32 v5, v1, v10 +; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v1 +; CGP-NEXT: v_subrev_i32_e32 v11, vcc, 1, v1 +; CGP-NEXT: v_sub_i32_e32 v12, vcc, v7, v2 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v2 +; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v8, v5 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v5 +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v9 +; CGP-NEXT: v_cmp_ge_u32_e64 s[8:9], v2, v10 +; CGP-NEXT: s_and_b64 s[6:7], s[6:7], vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v0, v3, s[6:7] +; CGP-NEXT: s_and_b64 s[6:7], s[8:9], s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[6:7] +; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; CGP-NEXT: v_cndmask_b32_e64 v1, v11, v1, s[4:5] +; CGP-NEXT: s_setpc_b64 s[30:31] %num.mask = and <2 x i32> %num, %den.mask = and <2 x i32> %den, %result = udiv <2 x i32> %num.mask, %den.mask Index: llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll @@ -0,0 +1,806 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,GISEL %s +; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdhsa < %s | FileCheck -check-prefixes=CHECK,CGP %s + +; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare. + +define i32 @urem_i32(i32 %num, i32 %den) { +; GISEL-LABEL: urem_i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 +; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2 +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0 +; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v2 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1 +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v3, v1 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2 +; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v1 +; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: urem_i32: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1 +; CGP-NEXT: v_mul_lo_u32 v3, 0, v1 +; CGP-NEXT: v_mul_lo_u32 v5, 0, v0 +; CGP-NEXT: v_rcp_f32_e32 v2, v2 +; CGP-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v2 +; CGP-NEXT: v_mul_lo_u32 v2, v6, v1 +; CGP-NEXT: v_mul_lo_u32 v7, v6, 0 +; CGP-NEXT: v_mul_hi_u32 v4, v6, v1 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7 +; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v2 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CGP-NEXT: v_lshr_b64 v[3:4], v[2:3], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CGP-NEXT: v_cndmask_b32_e32 v3, v2, v8, vcc +; CGP-NEXT: v_mul_lo_u32 v2, v3, v6 +; CGP-NEXT: v_mul_lo_u32 v4, v3, 0 +; CGP-NEXT: v_mul_hi_u32 v3, v3, v6 +; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v7, v4 +; CGP-NEXT: v_add_i32_e64 v3, s[4:5], v4, v3 +; CGP-NEXT: v_lshr_b64 v[2:3], v[2:3], 32 +; CGP-NEXT: v_add_i32_e64 v3, s[4:5], v6, v2 +; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v6, v2 +; CGP-NEXT: v_cndmask_b32_e32 v3, v2, v3, vcc +; CGP-NEXT: v_mul_lo_u32 v2, v3, v0 +; CGP-NEXT: v_mul_lo_u32 v4, v3, 0 +; CGP-NEXT: v_mul_hi_u32 v3, v3, v0 +; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3 +; CGP-NEXT: v_lshr_b64 v[2:3], v[2:3], 32 +; CGP-NEXT: v_mul_lo_u32 v2, v2, v1 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v2 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1 +; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v3, v1 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v1 +; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; CGP-NEXT: s_setpc_b64 s[30:31] + %result = urem i32 %num, %den + ret i32 %result +} + +define <2 x i32> @urem_v2i32(<2 x i32> %num, <2 x i32> %den) { +; GISEL-LABEL: urem_v2i32: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 +; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 +; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2 +; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3 +; GISEL-NEXT: v_mul_hi_u32 v9, v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 +; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4 +; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v4, v6 +; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7 +; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0 +; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1 +; GISEL-NEXT: v_mul_lo_u32 v4, v4, v2 +; GISEL-NEXT: v_mul_lo_u32 v5, v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v4 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v5 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v6, v2 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v6, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 +; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v6, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v7, v3 +; GISEL-NEXT: v_add_i32_e64 v2, s[8:9], v7, v3 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v5 +; GISEL-NEXT: v_sub_i32_e64 v1, s[10:11], v7, v3 +; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[8:9] +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: urem_v2i32: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2 +; CGP-NEXT: v_mul_lo_u32 v5, 0, v2 +; CGP-NEXT: v_mul_lo_u32 v11, 0, v0 +; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3 +; CGP-NEXT: v_mul_lo_u32 v7, 0, v3 +; CGP-NEXT: v_mul_lo_u32 v12, 0, v1 +; CGP-NEXT: v_rcp_f32_e32 v4, v4 +; CGP-NEXT: v_rcp_f32_e32 v6, v6 +; CGP-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; CGP-NEXT: v_mul_f32_e32 v6, 0x4f800000, v6 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v4 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v6 +; CGP-NEXT: v_mul_lo_u32 v4, v13, v2 +; CGP-NEXT: v_mul_lo_u32 v15, v13, 0 +; CGP-NEXT: v_mul_hi_u32 v8, v13, v2 +; CGP-NEXT: v_mul_lo_u32 v6, v14, v3 +; CGP-NEXT: v_mul_lo_u32 v16, v14, 0 +; CGP-NEXT: v_mul_hi_u32 v9, v14, v3 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v15 +; CGP-NEXT: v_sub_i32_e32 v17, vcc, 0, v4 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v16 +; CGP-NEXT: v_sub_i32_e32 v18, vcc, 0, v6 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9 +; CGP-NEXT: v_lshr_b64 v[8:9], v[4:5], 32 +; CGP-NEXT: v_lshr_b64 v[9:10], v[6:7], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 +; CGP-NEXT: v_cndmask_b32_e32 v5, v4, v17, vcc +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; CGP-NEXT: v_cndmask_b32_e64 v7, v6, v18, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v4, v5, v13 +; CGP-NEXT: v_mul_lo_u32 v8, v5, 0 +; CGP-NEXT: v_mul_hi_u32 v5, v5, v13 +; CGP-NEXT: v_mul_lo_u32 v6, v7, v14 +; CGP-NEXT: v_mul_lo_u32 v9, v7, 0 +; CGP-NEXT: v_mul_hi_u32 v7, v7, v14 +; CGP-NEXT: v_add_i32_e64 v8, s[6:7], v15, v8 +; CGP-NEXT: v_add_i32_e64 v9, s[6:7], v16, v9 +; CGP-NEXT: v_add_i32_e64 v5, s[6:7], v8, v5 +; CGP-NEXT: v_add_i32_e64 v7, s[6:7], v9, v7 +; CGP-NEXT: v_lshr_b64 v[4:5], v[4:5], 32 +; CGP-NEXT: v_lshr_b64 v[5:6], v[6:7], 32 +; CGP-NEXT: v_add_i32_e64 v6, s[6:7], v13, v4 +; CGP-NEXT: v_sub_i32_e64 v4, s[6:7], v13, v4 +; CGP-NEXT: v_add_i32_e64 v7, s[6:7], v14, v5 +; CGP-NEXT: v_sub_i32_e64 v5, s[6:7], v14, v5 +; CGP-NEXT: v_cndmask_b32_e32 v6, v4, v6, vcc +; CGP-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v4, v6, v0 +; CGP-NEXT: v_mul_lo_u32 v7, v6, 0 +; CGP-NEXT: v_mul_hi_u32 v8, v6, v0 +; CGP-NEXT: v_mul_lo_u32 v6, v5, v1 +; CGP-NEXT: v_mul_lo_u32 v9, v5, 0 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v1 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v11, v7 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v12, v9 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; CGP-NEXT: v_lshr_b64 v[4:5], v[4:5], 32 +; CGP-NEXT: v_lshr_b64 v[5:6], v[6:7], 32 +; CGP-NEXT: v_mul_lo_u32 v4, v4, v2 +; CGP-NEXT: v_mul_lo_u32 v5, v5, v3 +; CGP-NEXT: v_sub_i32_e32 v6, vcc, v0, v4 +; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v5 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v6, v2 +; CGP-NEXT: v_add_i32_e64 v8, s[4:5], v6, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v6, v2 +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v7, v3 +; CGP-NEXT: v_add_i32_e64 v2, s[8:9], v7, v3 +; CGP-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v5 +; CGP-NEXT: v_sub_i32_e64 v1, s[10:11], v7, v3 +; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; CGP-NEXT: s_and_b64 vcc, s[6:7], s[8:9] +; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[8:9] +; CGP-NEXT: s_setpc_b64 s[30:31] + %result = urem <2 x i32> %num, %den + ret <2 x i32> %result +} + +define i32 @urem_i32_pow2k_denom(i32 %num) { +; CHECK-LABEL: urem_i32_pow2k_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_movk_i32 s6, 0x1000 +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s6 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_lo_u32 v2, v1, s6 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, s6 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, 0, v2 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1 +; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v1, v2 +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mul_lo_u32 v1, v1, s6 +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v2 +; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], s6, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1 +; CHECK-NEXT: v_subrev_i32_e64 v0, s[6:7], s6, v2 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v3, v0, s[4:5] +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = urem i32 %num, 4096 + ret i32 %result +} + +define <2 x i32> @urem_v2i32_pow2k_denom(<2 x i32> %num) { +; CHECK-LABEL: urem_v2i32_pow2k_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_movk_i32 s10, 0x1000 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s10 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v2 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, v3, s10 +; CHECK-NEXT: v_mul_hi_u32 v5, v3, s10 +; CHECK-NEXT: v_mul_lo_u32 v6, v2, s10 +; CHECK-NEXT: v_mul_hi_u32 v7, v2, s10 +; CHECK-NEXT: v_sub_i32_e32 v8, vcc, 0, v4 +; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v6 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v5, v6, v9, s[4:5] +; CHECK-NEXT: v_mul_hi_u32 v4, v4, v3 +; CHECK-NEXT: v_mul_hi_u32 v5, v5, v2 +; CHECK-NEXT: v_add_i32_e64 v6, s[6:7], v3, v4 +; CHECK-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v4 +; CHECK-NEXT: v_add_i32_e64 v4, s[6:7], v2, v5 +; CHECK-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5] +; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0 +; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1 +; CHECK-NEXT: v_mul_lo_u32 v3, v3, s10 +; CHECK-NEXT: v_mul_lo_u32 v2, v2, s10 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v3 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v1, v2 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s10, v4 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], s10, v4 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 +; CHECK-NEXT: v_subrev_i32_e64 v0, s[6:7], s10, v4 +; CHECK-NEXT: v_cmp_le_u32_e64 s[6:7], s10, v5 +; CHECK-NEXT: v_add_i32_e64 v3, s[8:9], s10, v5 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v2 +; CHECK-NEXT: v_subrev_i32_e64 v1, s[10:11], s10, v5 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; CHECK-NEXT: s_and_b64 vcc, s[6:7], s[8:9] +; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v6, v0, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[8:9] +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = urem <2 x i32> %num, + ret <2 x i32> %result +} + +define i32 @urem_i32_oddk_denom(i32 %num) { +; CHECK-LABEL: urem_i32_oddk_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b32 s6, 0x12d8fb +; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s6 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_f32_e32 v1, 0x4f800000, v1 +; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1 +; CHECK-NEXT: v_mul_lo_u32 v2, v1, s6 +; CHECK-NEXT: v_mul_hi_u32 v3, v1, s6 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, 0, v2 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1 +; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], v1, v2 +; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v2 +; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; CHECK-NEXT: v_mul_hi_u32 v1, v1, v0 +; CHECK-NEXT: v_mul_lo_u32 v1, v1, s6 +; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v0, v1 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s6, v2 +; CHECK-NEXT: v_add_i32_e64 v3, s[4:5], s6, v2 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1 +; CHECK-NEXT: v_subrev_i32_e64 v0, s[6:7], s6, v2 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v3, v0, s[4:5] +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = urem i32 %num, 1235195 + ret i32 %result +} + +define <2 x i32> @urem_v2i32_oddk_denom(<2 x i32> %num) { +; CHECK-LABEL: urem_v2i32_oddk_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: s_mov_b32 s10, 0x12d8fb +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s10 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f800000, v2 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 +; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_lo_u32 v4, v3, s10 +; CHECK-NEXT: v_mul_hi_u32 v5, v3, s10 +; CHECK-NEXT: v_mul_lo_u32 v6, v2, s10 +; CHECK-NEXT: v_mul_hi_u32 v7, v2, s10 +; CHECK-NEXT: v_sub_i32_e32 v8, vcc, 0, v4 +; CHECK-NEXT: v_sub_i32_e32 v9, vcc, 0, v6 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7 +; CHECK-NEXT: v_cndmask_b32_e64 v5, v6, v9, s[4:5] +; CHECK-NEXT: v_mul_hi_u32 v4, v4, v3 +; CHECK-NEXT: v_mul_hi_u32 v5, v5, v2 +; CHECK-NEXT: v_add_i32_e64 v6, s[6:7], v3, v4 +; CHECK-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v4 +; CHECK-NEXT: v_add_i32_e64 v4, s[6:7], v2, v5 +; CHECK-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v5 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v6, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5] +; CHECK-NEXT: v_mul_hi_u32 v3, v3, v0 +; CHECK-NEXT: v_mul_hi_u32 v2, v2, v1 +; CHECK-NEXT: v_mul_lo_u32 v3, v3, s10 +; CHECK-NEXT: v_mul_lo_u32 v2, v2, s10 +; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v0, v3 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v1, v2 +; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s10, v4 +; CHECK-NEXT: v_add_i32_e64 v6, s[4:5], s10, v4 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3 +; CHECK-NEXT: v_subrev_i32_e64 v0, s[6:7], s10, v4 +; CHECK-NEXT: v_cmp_le_u32_e64 s[6:7], s10, v5 +; CHECK-NEXT: v_add_i32_e64 v3, s[8:9], s10, v5 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v2 +; CHECK-NEXT: v_subrev_i32_e64 v1, s[10:11], s10, v5 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; CHECK-NEXT: s_and_b64 vcc, s[6:7], s[8:9] +; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v6, v0, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[8:9] +; CHECK-NEXT: s_setpc_b64 s[30:31] + %result = urem <2 x i32> %num, + ret <2 x i32> %result +} + +define i32 @urem_i32_pow2_shl_denom(i32 %x, i32 %y) { +; CHECK-LABEL: urem_i32_pow2_shl_denom: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_lshl_b32_e32 v1, 0x1000, v1 +; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v1 +; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2 +; CHECK-NEXT: v_mul_lo_u32 v3, v2, v1 +; CHECK-NEXT: v_mul_hi_u32 v4, v2, v1 +; CHECK-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2 +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 +; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 +; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; CHECK-NEXT: v_mul_hi_u32 v2, v2, v0 +; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1 +; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v2 +; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1 +; CHECK-NEXT: v_add_i32_e64 v4, s[4:5], v3, v1 +; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2 +; CHECK-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v1 +; CHECK-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; CHECK-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; CHECK-NEXT: s_setpc_b64 s[30:31] + %shl.y = shl i32 4096, %y + %r = urem i32 %x, %shl.y + ret i32 %r +} + +define <2 x i32> @urem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) { +; GISEL-LABEL: urem_v2i32_pow2_shl_denom: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_movk_i32 s4, 0x1000 +; GISEL-NEXT: v_lshl_b32_e32 v2, s4, v2 +; GISEL-NEXT: v_lshl_b32_e32 v3, s4, v3 +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 +; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 +; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2 +; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3 +; GISEL-NEXT: v_mul_hi_u32 v9, v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 +; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4 +; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v4, v6 +; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7 +; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0 +; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1 +; GISEL-NEXT: v_mul_lo_u32 v4, v4, v2 +; GISEL-NEXT: v_mul_lo_u32 v5, v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v4 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v5 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v6, v2 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v6, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 +; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v6, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v7, v3 +; GISEL-NEXT: v_add_i32_e64 v2, s[8:9], v7, v3 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v5 +; GISEL-NEXT: v_sub_i32_e64 v1, s[10:11], v7, v3 +; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[8:9] +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: urem_v2i32_pow2_shl_denom: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_movk_i32 s4, 0x1000 +; CGP-NEXT: v_mul_lo_u32 v9, 0, v0 +; CGP-NEXT: v_mul_lo_u32 v10, 0, v1 +; CGP-NEXT: v_lshl_b32_e32 v11, s4, v2 +; CGP-NEXT: v_lshl_b32_e32 v12, s4, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v11 +; CGP-NEXT: v_mul_lo_u32 v3, 0, v11 +; CGP-NEXT: v_cvt_f32_u32_e32 v4, v12 +; CGP-NEXT: v_mul_lo_u32 v5, 0, v12 +; CGP-NEXT: v_rcp_f32_e32 v2, v2 +; CGP-NEXT: v_rcp_f32_e32 v4, v4 +; CGP-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CGP-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v4 +; CGP-NEXT: v_mul_lo_u32 v2, v13, v11 +; CGP-NEXT: v_mul_lo_u32 v15, v13, 0 +; CGP-NEXT: v_mul_hi_u32 v6, v13, v11 +; CGP-NEXT: v_mul_lo_u32 v4, v14, v12 +; CGP-NEXT: v_mul_lo_u32 v16, v14, 0 +; CGP-NEXT: v_mul_hi_u32 v7, v14, v12 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v15 +; CGP-NEXT: v_sub_i32_e32 v17, vcc, 0, v2 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v16 +; CGP-NEXT: v_sub_i32_e32 v18, vcc, 0, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; CGP-NEXT: v_lshr_b64 v[6:7], v[2:3], 32 +; CGP-NEXT: v_lshr_b64 v[7:8], v[4:5], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 +; CGP-NEXT: v_cndmask_b32_e32 v3, v2, v17, vcc +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v7 +; CGP-NEXT: v_cndmask_b32_e64 v5, v4, v18, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v2, v3, v13 +; CGP-NEXT: v_mul_lo_u32 v6, v3, 0 +; CGP-NEXT: v_mul_hi_u32 v3, v3, v13 +; CGP-NEXT: v_mul_lo_u32 v4, v5, v14 +; CGP-NEXT: v_mul_lo_u32 v7, v5, 0 +; CGP-NEXT: v_mul_hi_u32 v5, v5, v14 +; CGP-NEXT: v_add_i32_e64 v6, s[6:7], v15, v6 +; CGP-NEXT: v_add_i32_e64 v7, s[6:7], v16, v7 +; CGP-NEXT: v_add_i32_e64 v3, s[6:7], v6, v3 +; CGP-NEXT: v_add_i32_e64 v5, s[6:7], v7, v5 +; CGP-NEXT: v_lshr_b64 v[2:3], v[2:3], 32 +; CGP-NEXT: v_lshr_b64 v[3:4], v[4:5], 32 +; CGP-NEXT: v_add_i32_e64 v4, s[6:7], v13, v2 +; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v13, v2 +; CGP-NEXT: v_add_i32_e64 v5, s[6:7], v14, v3 +; CGP-NEXT: v_sub_i32_e64 v3, s[6:7], v14, v3 +; CGP-NEXT: v_cndmask_b32_e32 v4, v2, v4, vcc +; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v5, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v2, v4, v0 +; CGP-NEXT: v_mul_lo_u32 v5, v4, 0 +; CGP-NEXT: v_mul_hi_u32 v6, v4, v0 +; CGP-NEXT: v_mul_lo_u32 v4, v3, v1 +; CGP-NEXT: v_mul_lo_u32 v7, v3, 0 +; CGP-NEXT: v_mul_hi_u32 v8, v3, v1 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v9, v5 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v7 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; CGP-NEXT: v_lshr_b64 v[2:3], v[2:3], 32 +; CGP-NEXT: v_lshr_b64 v[3:4], v[4:5], 32 +; CGP-NEXT: v_mul_lo_u32 v2, v2, v11 +; CGP-NEXT: v_mul_lo_u32 v3, v3, v12 +; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 +; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v11 +; CGP-NEXT: v_add_i32_e64 v6, s[4:5], v4, v11 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v4, v11 +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v5, v12 +; CGP-NEXT: v_add_i32_e64 v2, s[8:9], v5, v12 +; CGP-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v3 +; CGP-NEXT: v_sub_i32_e64 v1, s[10:11], v5, v12 +; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc +; CGP-NEXT: s_and_b64 vcc, s[6:7], s[8:9] +; CGP-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v6, v0, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[8:9] +; CGP-NEXT: s_setpc_b64 s[30:31] + %shl.y = shl <2 x i32> , %y + %r = urem <2 x i32> %x, %shl.y + ret <2 x i32> %r +} + +define i32 @urem_i32_24bit(i32 %num, i32 %den) { +; GISEL-LABEL: urem_i32_24bit: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s4, 0xffffff +; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 +; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 +; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2 +; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1 +; GISEL-NEXT: v_mul_hi_u32 v4, v2, v1 +; GISEL-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GISEL-NEXT: v_mul_hi_u32 v3, v3, v2 +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v2, v3 +; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3 +; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GISEL-NEXT: v_mul_hi_u32 v2, v2, v0 +; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1 +; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v2 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1 +; GISEL-NEXT: v_add_i32_e64 v4, s[4:5], v3, v1 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2 +; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v3, v1 +; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: urem_i32_24bit: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s4, 0xffffff +; CGP-NEXT: v_and_b32_e32 v3, s4, v0 +; CGP-NEXT: v_and_b32_e32 v4, s4, v1 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4 +; CGP-NEXT: v_mul_lo_u32 v1, 0, v4 +; CGP-NEXT: v_mul_lo_u32 v5, 0, v3 +; CGP-NEXT: v_rcp_f32_e32 v0, v0 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v6, v0 +; CGP-NEXT: v_mul_lo_u32 v0, v6, v4 +; CGP-NEXT: v_mul_lo_u32 v7, v6, 0 +; CGP-NEXT: v_mul_hi_u32 v2, v6, v4 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v7 +; CGP-NEXT: v_sub_i32_e32 v8, vcc, 0, v0 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2 +; CGP-NEXT: v_lshr_b64 v[1:2], v[0:1], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v8, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v1, v6 +; CGP-NEXT: v_mul_lo_u32 v2, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v6 +; CGP-NEXT: v_add_i32_e64 v2, s[4:5], v7, v2 +; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v2, v1 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_add_i32_e64 v1, s[4:5], v6, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v6, v0 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc +; CGP-NEXT: v_mul_lo_u32 v0, v1, v3 +; CGP-NEXT: v_mul_lo_u32 v2, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v3 +; CGP-NEXT: v_add_i32_e32 v2, vcc, v5, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v1 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_mul_lo_u32 v0, v0, v4 +; CGP-NEXT: v_sub_i32_e32 v1, vcc, v3, v0 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4 +; CGP-NEXT: v_add_i32_e64 v2, s[4:5], v1, v4 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v1, v4 +; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5] +; CGP-NEXT: s_setpc_b64 s[30:31] + %num.mask = and i32 %num, 16777215 + %den.mask = and i32 %den, 16777215 + %result = urem i32 %num.mask, %den.mask + ret i32 %result +} + +define <2 x i32> @urem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) { +; GISEL-LABEL: urem_v2i32_24bit: +; GISEL: ; %bb.0: +; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_mov_b32 s4, 0xffffff +; GISEL-NEXT: v_and_b32_e32 v0, s4, v0 +; GISEL-NEXT: v_and_b32_e32 v1, s4, v1 +; GISEL-NEXT: v_and_b32_e32 v2, s4, v2 +; GISEL-NEXT: v_and_b32_e32 v3, s4, v3 +; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v2 +; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v3 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4 +; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f800000, v4 +; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f800000, v5 +; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4 +; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GISEL-NEXT: v_mul_lo_u32 v6, v4, v2 +; GISEL-NEXT: v_mul_hi_u32 v7, v4, v2 +; GISEL-NEXT: v_mul_lo_u32 v8, v5, v3 +; GISEL-NEXT: v_mul_hi_u32 v9, v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v6 +; GISEL-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 +; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc +; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9 +; GISEL-NEXT: v_cndmask_b32_e64 v7, v8, v11, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v6, v6, v4 +; GISEL-NEXT: v_mul_hi_u32 v7, v7, v5 +; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v4, v6 +; GISEL-NEXT: v_sub_i32_e64 v4, s[6:7], v4, v6 +; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v5, v7 +; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], v5, v7 +; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v6, s[4:5] +; GISEL-NEXT: v_mul_hi_u32 v4, v4, v0 +; GISEL-NEXT: v_mul_hi_u32 v5, v5, v1 +; GISEL-NEXT: v_mul_lo_u32 v4, v4, v2 +; GISEL-NEXT: v_mul_lo_u32 v5, v5, v3 +; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v4 +; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v5 +; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v6, v2 +; GISEL-NEXT: v_add_i32_e64 v8, s[4:5], v6, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4 +; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v6, v2 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v7, v3 +; GISEL-NEXT: v_add_i32_e64 v2, s[8:9], v7, v3 +; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v1, v5 +; GISEL-NEXT: v_sub_i32_e64 v1, s[10:11], v7, v3 +; GISEL-NEXT: s_and_b64 vcc, vcc, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc +; GISEL-NEXT: s_and_b64 vcc, s[6:7], s[8:9] +; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc +; GISEL-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[4:5] +; GISEL-NEXT: v_cndmask_b32_e64 v1, v2, v1, s[8:9] +; GISEL-NEXT: s_setpc_b64 s[30:31] +; +; CGP-LABEL: urem_v2i32_24bit: +; CGP: ; %bb.0: +; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CGP-NEXT: s_mov_b32 s4, 0xffffff +; CGP-NEXT: v_and_b32_e32 v7, s4, v0 +; CGP-NEXT: v_and_b32_e32 v8, s4, v1 +; CGP-NEXT: v_and_b32_e32 v9, s4, v2 +; CGP-NEXT: v_and_b32_e32 v10, s4, v3 +; CGP-NEXT: v_cvt_f32_u32_e32 v0, v9 +; CGP-NEXT: v_mul_lo_u32 v1, 0, v9 +; CGP-NEXT: v_mul_lo_u32 v11, 0, v7 +; CGP-NEXT: v_cvt_f32_u32_e32 v2, v10 +; CGP-NEXT: v_mul_lo_u32 v3, 0, v10 +; CGP-NEXT: v_mul_lo_u32 v12, 0, v8 +; CGP-NEXT: v_rcp_f32_e32 v0, v0 +; CGP-NEXT: v_rcp_f32_e32 v2, v2 +; CGP-NEXT: v_mul_f32_e32 v0, 0x4f800000, v0 +; CGP-NEXT: v_mul_f32_e32 v2, 0x4f800000, v2 +; CGP-NEXT: v_cvt_u32_f32_e32 v13, v0 +; CGP-NEXT: v_cvt_u32_f32_e32 v14, v2 +; CGP-NEXT: v_mul_lo_u32 v0, v13, v9 +; CGP-NEXT: v_mul_lo_u32 v15, v13, 0 +; CGP-NEXT: v_mul_hi_u32 v4, v13, v9 +; CGP-NEXT: v_mul_lo_u32 v2, v14, v10 +; CGP-NEXT: v_mul_lo_u32 v16, v14, 0 +; CGP-NEXT: v_mul_hi_u32 v5, v14, v10 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v15 +; CGP-NEXT: v_sub_i32_e32 v17, vcc, 0, v0 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v16 +; CGP-NEXT: v_sub_i32_e32 v18, vcc, 0, v2 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v5 +; CGP-NEXT: v_lshr_b64 v[4:5], v[0:1], 32 +; CGP-NEXT: v_lshr_b64 v[5:6], v[2:3], 32 +; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 +; CGP-NEXT: v_cndmask_b32_e32 v1, v0, v17, vcc +; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v5 +; CGP-NEXT: v_cndmask_b32_e64 v3, v2, v18, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v0, v1, v13 +; CGP-NEXT: v_mul_lo_u32 v4, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v1, v1, v13 +; CGP-NEXT: v_mul_lo_u32 v2, v3, v14 +; CGP-NEXT: v_mul_lo_u32 v5, v3, 0 +; CGP-NEXT: v_mul_hi_u32 v3, v3, v14 +; CGP-NEXT: v_add_i32_e64 v4, s[6:7], v15, v4 +; CGP-NEXT: v_add_i32_e64 v5, s[6:7], v16, v5 +; CGP-NEXT: v_add_i32_e64 v1, s[6:7], v4, v1 +; CGP-NEXT: v_add_i32_e64 v3, s[6:7], v5, v3 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_lshr_b64 v[1:2], v[2:3], 32 +; CGP-NEXT: v_add_i32_e64 v2, s[6:7], v13, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v13, v0 +; CGP-NEXT: v_add_i32_e64 v3, s[6:7], v14, v1 +; CGP-NEXT: v_sub_i32_e64 v1, s[6:7], v14, v1 +; CGP-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc +; CGP-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[4:5] +; CGP-NEXT: v_mul_lo_u32 v0, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v3, v2, 0 +; CGP-NEXT: v_mul_hi_u32 v4, v2, v7 +; CGP-NEXT: v_mul_lo_u32 v2, v1, v8 +; CGP-NEXT: v_mul_lo_u32 v5, v1, 0 +; CGP-NEXT: v_mul_hi_u32 v6, v1, v8 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v3 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v5 +; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4 +; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v6 +; CGP-NEXT: v_lshr_b64 v[0:1], v[0:1], 32 +; CGP-NEXT: v_lshr_b64 v[1:2], v[2:3], 32 +; CGP-NEXT: v_mul_lo_u32 v0, v0, v9 +; CGP-NEXT: v_mul_lo_u32 v1, v1, v10 +; CGP-NEXT: v_sub_i32_e32 v2, vcc, v7, v0 +; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v1 +; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v9 +; CGP-NEXT: v_add_i32_e64 v4, s[4:5], v2, v9 +; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v0 +; CGP-NEXT: v_sub_i32_e64 v0, s[6:7], v2, v9 +; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v3, v10 +; CGP-NEXT: v_add_i32_e64 v5, s[8:9], v3, v10 +; CGP-NEXT: v_cmp_ge_u32_e64 s[8:9], v8, v1 +; CGP-NEXT: v_sub_i32_e64 v1, s[10:11], v3, v10 +; CGP-NEXT: s_and_b64 vcc, vcc, s[4:5] +; CGP-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; CGP-NEXT: s_and_b64 vcc, s[6:7], s[8:9] +; CGP-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc +; CGP-NEXT: v_cndmask_b32_e64 v0, v4, v0, s[4:5] +; CGP-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[8:9] +; CGP-NEXT: s_setpc_b64 s[30:31] + %num.mask = and <2 x i32> %num, + %den.mask = and <2 x i32> %den, + %result = urem <2 x i32> %num.mask, %den.mask + ret <2 x i32> %result +}