Index: include/llvm/CodeGen/GlobalISel/LegalizerHelper.h =================================================================== --- include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -163,6 +163,17 @@ LLT PartTy, ArrayRef PartRegs, LLT LeftoverTy = LLT(), ArrayRef LeftoverRegs = {}); + /// Perform generic multiplication of values held in multiple registers. + /// Generated instructions use only types NarrowTy and i1. + /// Destination can be same or two times size of the source. + void MultiplyRegisters(SmallVectorImpl &DstRegs, + ArrayRef Src1Regs, + ArrayRef Src2Regs, LLT NarrowTy); + + /// Add and return Carry from addition. + unsigned AddAndGetCarry(unsigned Dest, unsigned Src1, unsigned Src2, + LLT NarrowTy); + LegalizeResult fewerElementsVectorImplicitDef(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy); @@ -199,7 +210,7 @@ LLT HalfTy, LLT ShiftAmtTy); LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty); - LegalizeResult narrowScalarMul(MachineInstr &MI, unsigned TypeIdx, LLT Ty); + LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty); LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty); Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -535,7 +535,7 @@ return Legalized; } case TargetOpcode::G_MUL: - return narrowScalarMul(MI, TypeIdx, NarrowTy); + return narrowScalarMul(MI, NarrowTy); case TargetOpcode::G_EXTRACT: return narrowScalarExtract(MI, TypeIdx, NarrowTy); case TargetOpcode::G_INSERT: @@ -2544,52 +2544,110 @@ } } +unsigned LegalizerHelper::AddAndGetCarry(unsigned Dest, unsigned Src1, + unsigned Src2, LLT NarrowTy) { + unsigned Carry = MRI.createGenericVirtualRegister(LLT::scalar(1)); + MIRBuilder.buildInstr(TargetOpcode::G_UADDO, {Dest, Carry}, {Src1, Src2}); + unsigned CarryZext = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildZExt(CarryZext, Carry); + return CarryZext; +} + +void LegalizerHelper::MultiplyRegisters(SmallVectorImpl &DstRegs, + ArrayRef Src1Regs, + ArrayRef Src2Regs, + LLT NarrowTy) { + unsigned SrcParts = Src1Regs.size(); + unsigned DstParts = DstRegs.size(); + + unsigned DstIdx = 0; // Low bits of the result. + unsigned FactorSum = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildMul(FactorSum, Src1Regs[DstIdx], Src2Regs[DstIdx]); + DstRegs[DstIdx] = FactorSum; + + unsigned CarrySumPrevDstIdx; + SmallVector Factors; + + for (DstIdx = 1; DstIdx < DstParts; DstIdx++) { + // Collect low parts of muls for DstIdx. + for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1; + i <= std::min(DstIdx, SrcParts - 1); ++i) { + unsigned CurrMul = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildMul(CurrMul, Src1Regs[DstIdx - i], Src2Regs[i]); + Factors.push_back(CurrMul); + } + // Collect high parts of muls from previous DstIdx. + for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts; + i <= std::min(DstIdx - 1, SrcParts - 1); ++i) { + unsigned CurrMul = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildUMulH(CurrMul, Src1Regs[DstIdx - 1 - i], Src2Regs[i]); + Factors.push_back(CurrMul); + } + // Add CarrySum from additons calculated for previous DstIdx. + if (DstIdx != 1) { + Factors.push_back(CarrySumPrevDstIdx); + } + + FactorSum = MRI.createGenericVirtualRegister(NarrowTy); + unsigned CarrySum; + // Add all factors and accumulate all carries into CarrySum. + if (DstIdx != DstParts - 1) { + CarrySum = AddAndGetCarry(FactorSum, Factors[0], Factors[1], NarrowTy); + for (unsigned i = 2; i < Factors.size(); ++i) { + unsigned FactorSumPrev = FactorSum; + FactorSum = MRI.createGenericVirtualRegister(NarrowTy); + unsigned CarryZext = + AddAndGetCarry(FactorSum, FactorSumPrev, Factors[i], NarrowTy); + unsigned CarrySumPrev = CarrySum; + CarrySum = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildAdd(CarrySum, CarrySumPrev, CarryZext); + } + } else { + // Since value for the next index is not calculated, neither is CarrySum. + MIRBuilder.buildAdd(FactorSum, Factors[0], Factors[1]); + for (unsigned i = 2; i < Factors.size(); ++i) { + unsigned FactorSumPrev = FactorSum; + FactorSum = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.buildAdd(FactorSum, FactorSumPrev, Factors[i]); + } + } + + CarrySumPrevDstIdx = CarrySum; + DstRegs[DstIdx] = FactorSum; + Factors.clear(); + } +} + LegalizerHelper::LegalizeResult -LegalizerHelper::narrowScalarMul(MachineInstr &MI, unsigned TypeIdx, LLT NewTy) { +LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) { unsigned DstReg = MI.getOperand(0).getReg(); - unsigned Src0 = MI.getOperand(1).getReg(); - unsigned Src1 = MI.getOperand(2).getReg(); + unsigned Src1 = MI.getOperand(1).getReg(); + unsigned Src2 = MI.getOperand(2).getReg(); + LLT Ty = MRI.getType(DstReg); if (Ty.isVector()) return UnableToLegalize; - unsigned Size = Ty.getSizeInBits(); - unsigned NewSize = Size / 2; - if (Size != 2 * NewSize) + unsigned SrcSize = MRI.getType(Src1).getSizeInBits(); + unsigned DstSize = Ty.getSizeInBits(); + unsigned NarrowSize = NarrowTy.getSizeInBits(); + if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0) return UnableToLegalize; - LLT HalfTy = LLT::scalar(NewSize); - // TODO: if HalfTy != NewTy, handle the breakdown all at once? + unsigned NumDstParts = DstSize / NarrowSize; + unsigned NumSrcParts = SrcSize / NarrowSize; - unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty); - unsigned Lo = MRI.createGenericVirtualRegister(HalfTy); - unsigned Hi = MRI.createGenericVirtualRegister(HalfTy); - unsigned ExtLo = MRI.createGenericVirtualRegister(Ty); - unsigned ExtHi = MRI.createGenericVirtualRegister(Ty); - unsigned ShiftedHi = MRI.createGenericVirtualRegister(Ty); + SmallVector Src1Parts, Src2Parts, DstRegs; + extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts); + extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts); + DstRegs.resize(NumDstParts); + MultiplyRegisters(DstRegs, Src1Parts, Src2Parts, NarrowTy); - SmallVector Src0Parts; - SmallVector Src1Parts; - - extractParts(Src0, HalfTy, 2, Src0Parts); - extractParts(Src1, HalfTy, 2, Src1Parts); - - MIRBuilder.buildMul(Lo, Src0Parts[0], Src1Parts[0]); - - // TODO: Use smulh or umulh depending on what the target has. - MIRBuilder.buildUMulH(Hi, Src0Parts[1], Src1Parts[1]); - - MIRBuilder.buildConstant(ShiftAmt, NewSize); - MIRBuilder.buildAnyExt(ExtHi, Hi); - MIRBuilder.buildShl(ShiftedHi, ExtHi, ShiftAmt); - - MIRBuilder.buildZExt(ExtLo, Lo); - MIRBuilder.buildOr(DstReg, ExtLo, ShiftedHi); + MIRBuilder.buildMerge(DstReg, DstRegs); MI.eraseFromParent(); return Legalized; } - LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { Index: lib/Target/Mips/MipsLegalizerInfo.cpp =================================================================== --- lib/Target/Mips/MipsLegalizerInfo.cpp +++ lib/Target/Mips/MipsLegalizerInfo.cpp @@ -24,14 +24,10 @@ const LLT s64 = LLT::scalar(64); const LLT p0 = LLT::pointer(0, 32); - getActionDefinitionsBuilder({G_ADD, G_SUB}) + getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL}) .legalFor({s32}) .clampScalar(0, s32, s32); - getActionDefinitionsBuilder(G_MUL) - .legalFor({s32}) - .minScalar(0, s32); - getActionDefinitionsBuilder({G_UADDO, G_UADDE, G_USUBO, G_USUBE, G_UMULO}) .lowerFor({{s32, s1}}); Index: test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir @@ -51,14 +51,13 @@ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64) ; CHECK: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV2]] - ; CHECK: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]] - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UMULH]](s32) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[TRUNC]](s32) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[MUL]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[ZEXT]], [[SHL]] - ; CHECK: $vgpr0_vgpr1 = COPY [[OR]](s64) + ; CHECK: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV2]] + ; CHECK: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]] + ; CHECK: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV2]] + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[MUL]](s32), [[ADD1]](s32) + ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64) %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s64) = COPY $vgpr2_vgpr3 %2:_(s64) = G_MUL %0, %1 Index: test/CodeGen/Mips/GlobalISel/legalizer/mul.mir =================================================================== --- test/CodeGen/Mips/GlobalISel/legalizer/mul.mir +++ test/CodeGen/Mips/GlobalISel/legalizer/mul.mir @@ -9,6 +9,8 @@ define void @mul_i16_sext() {entry: ret void} define void @mul_i16_zext() {entry: ret void} define void @mul_i16_aext() {entry: ret void} + define void @mul_i64() {entry: ret void} + define void @mul_i128() {entry: ret void} define void @umul_with_overflow(i32 %lhs, i32 %rhs, i32* %pmul, i1* %pcarry_flag) { ret void } ... @@ -211,6 +213,160 @@ $v0 = COPY %5(s32) RetRA implicit $v0 +... +--- +name: mul_i64 +alignment: 2 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1, $a2, $a3 + + ; MIPS32-LABEL: name: mul_i64 + ; MIPS32: liveins: $a0, $a1, $a2, $a3 + ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY]] + ; MIPS32: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[COPY3]], [[COPY]] + ; MIPS32: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[COPY2]], [[COPY1]] + ; MIPS32: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[COPY2]], [[COPY]] + ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; MIPS32: $v0 = COPY [[MUL]](s32) + ; MIPS32: $v1 = COPY [[ADD1]](s32) + ; MIPS32: RetRA implicit $v0, implicit $v1 + %2:_(s32) = COPY $a0 + %3:_(s32) = COPY $a1 + %0:_(s64) = G_MERGE_VALUES %2(s32), %3(s32) + %4:_(s32) = COPY $a2 + %5:_(s32) = COPY $a3 + %1:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s64) = G_MUL %1, %0 + %7:_(s32), %8:_(s32) = G_UNMERGE_VALUES %6(s64) + $v0 = COPY %7(s32) + $v1 = COPY %8(s32) + RetRA implicit $v0, implicit $v1 + +... +--- +name: mul_i128 +alignment: 2 +tracksRegLiveness: true +fixedStack: + - { id: 0, offset: 28, size: 4, alignment: 4, stack-id: 0, isImmutable: true } + - { id: 1, offset: 24, size: 4, alignment: 8, stack-id: 0, isImmutable: true } + - { id: 2, offset: 20, size: 4, alignment: 4, stack-id: 0, isImmutable: true } + - { id: 3, offset: 16, size: 4, alignment: 8, stack-id: 0, isImmutable: true } +body: | + bb.1.entry: + liveins: $a0, $a1, $a2, $a3 + + ; MIPS32-LABEL: name: mul_i128 + ; MIPS32: liveins: $a0, $a1, $a2, $a3 + ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY $a2 + ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY $a3 + ; MIPS32: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; MIPS32: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load 4 from %fixed-stack.0, align 8) + ; MIPS32: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1 + ; MIPS32: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load 4 from %fixed-stack.1) + ; MIPS32: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2 + ; MIPS32: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load 4 from %fixed-stack.2, align 8) + ; MIPS32: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3 + ; MIPS32: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load 4 from %fixed-stack.3) + ; MIPS32: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY]] + ; MIPS32: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY]] + ; MIPS32: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY1]] + ; MIPS32: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY]] + ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[MUL1]], [[MUL2]] + ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD]](s32), [[MUL2]] + ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) + ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[ADD]], [[UMULH]] + ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD1]](s32), [[UMULH]] + ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32) + ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] + ; MIPS32: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[AND1]] + ; MIPS32: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[LOAD2]], [[COPY]] + ; MIPS32: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY1]] + ; MIPS32: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY2]] + ; MIPS32: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[LOAD1]], [[COPY]] + ; MIPS32: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY1]] + ; MIPS32: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]] + ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[MUL4]] + ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32: [[COPY6:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32) + ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY6]], [[C2]] + ; MIPS32: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[MUL5]] + ; MIPS32: [[ICMP3:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD4]](s32), [[MUL5]] + ; MIPS32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32: [[COPY7:%[0-9]+]]:_(s32) = COPY [[ICMP3]](s32) + ; MIPS32: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C3]] + ; MIPS32: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[AND2]], [[AND3]] + ; MIPS32: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[UMULH1]] + ; MIPS32: [[ICMP4:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD6]](s32), [[UMULH1]] + ; MIPS32: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ICMP4]](s32) + ; MIPS32: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C4]] + ; MIPS32: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[ADD5]], [[AND4]] + ; MIPS32: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[ADD6]], [[UMULH2]] + ; MIPS32: [[ICMP5:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD8]](s32), [[UMULH2]] + ; MIPS32: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32: [[COPY9:%[0-9]+]]:_(s32) = COPY [[ICMP5]](s32) + ; MIPS32: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C5]] + ; MIPS32: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[ADD7]], [[AND5]] + ; MIPS32: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[ADD8]], [[ADD2]] + ; MIPS32: [[ICMP6:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD10]](s32), [[ADD2]] + ; MIPS32: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32: [[COPY10:%[0-9]+]]:_(s32) = COPY [[ICMP6]](s32) + ; MIPS32: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY10]], [[C6]] + ; MIPS32: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[ADD9]], [[AND6]] + ; MIPS32: [[MUL6:%[0-9]+]]:_(s32) = G_MUL [[LOAD3]], [[COPY]] + ; MIPS32: [[MUL7:%[0-9]+]]:_(s32) = G_MUL [[LOAD2]], [[COPY1]] + ; MIPS32: [[MUL8:%[0-9]+]]:_(s32) = G_MUL [[LOAD1]], [[COPY2]] + ; MIPS32: [[MUL9:%[0-9]+]]:_(s32) = G_MUL [[LOAD]], [[COPY3]] + ; MIPS32: [[UMULH3:%[0-9]+]]:_(s32) = G_UMULH [[LOAD2]], [[COPY]] + ; MIPS32: [[UMULH4:%[0-9]+]]:_(s32) = G_UMULH [[LOAD1]], [[COPY1]] + ; MIPS32: [[UMULH5:%[0-9]+]]:_(s32) = G_UMULH [[LOAD]], [[COPY2]] + ; MIPS32: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[MUL6]], [[MUL7]] + ; MIPS32: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[ADD12]], [[MUL8]] + ; MIPS32: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[ADD13]], [[MUL9]] + ; MIPS32: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[ADD14]], [[UMULH3]] + ; MIPS32: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[ADD15]], [[UMULH4]] + ; MIPS32: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[ADD16]], [[UMULH5]] + ; MIPS32: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[ADD17]], [[ADD11]] + ; MIPS32: $v0 = COPY [[MUL]](s32) + ; MIPS32: $v1 = COPY [[ADD1]](s32) + ; MIPS32: $a0 = COPY [[ADD10]](s32) + ; MIPS32: $a1 = COPY [[ADD18]](s32) + ; MIPS32: RetRA implicit $v0, implicit $v1, implicit $a0, implicit $a1 + %2:_(s32) = COPY $a0 + %3:_(s32) = COPY $a1 + %4:_(s32) = COPY $a2 + %5:_(s32) = COPY $a3 + %0:_(s128) = G_MERGE_VALUES %2(s32), %3(s32), %4(s32), %5(s32) + %10:_(p0) = G_FRAME_INDEX %fixed-stack.3 + %6:_(s32) = G_LOAD %10(p0) :: (load 4 from %fixed-stack.3, align 8) + %11:_(p0) = G_FRAME_INDEX %fixed-stack.2 + %7:_(s32) = G_LOAD %11(p0) :: (load 4 from %fixed-stack.2) + %12:_(p0) = G_FRAME_INDEX %fixed-stack.1 + %8:_(s32) = G_LOAD %12(p0) :: (load 4 from %fixed-stack.1, align 8) + %13:_(p0) = G_FRAME_INDEX %fixed-stack.0 + %9:_(s32) = G_LOAD %13(p0) :: (load 4 from %fixed-stack.0) + %1:_(s128) = G_MERGE_VALUES %6(s32), %7(s32), %8(s32), %9(s32) + %14:_(s128) = G_MUL %1, %0 + %15:_(s32), %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(s128) + $v0 = COPY %15(s32) + $v1 = COPY %16(s32) + $a0 = COPY %17(s32) + $a1 = COPY %18(s32) + RetRA implicit $v0, implicit $v1, implicit $a0, implicit $a1 + ... --- name: umul_with_overflow Index: test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll =================================================================== --- test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll +++ test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll @@ -87,6 +87,113 @@ ret i16 %mul } +define i64 @mul_i64(i64 %a, i64 %b) { +; MIPS32-LABEL: mul_i64: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: mul $2, $6, $4 +; MIPS32-NEXT: mul $7, $7, $4 +; MIPS32-NEXT: mul $5, $6, $5 +; MIPS32-NEXT: multu $6, $4 +; MIPS32-NEXT: mfhi $4 +; MIPS32-NEXT: addu $5, $7, $5 +; MIPS32-NEXT: addu $3, $5, $4 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %mul = mul i64 %b, %a + ret i64 %mul +} + +define i128 @mul_i128(i128 %a, i128 %b) { +; MIPS32-LABEL: mul_i128: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: addiu $1, $sp, 16 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: addiu $2, $sp, 20 +; MIPS32-NEXT: lw $2, 0($2) +; MIPS32-NEXT: addiu $3, $sp, 24 +; MIPS32-NEXT: lw $3, 0($3) +; MIPS32-NEXT: addiu $8, $sp, 28 +; MIPS32-NEXT: lw $8, 0($8) +; MIPS32-NEXT: mul $9, $1, $4 +; MIPS32-NEXT: mul $10, $2, $4 +; MIPS32-NEXT: mul $11, $1, $5 +; MIPS32-NEXT: multu $1, $4 +; MIPS32-NEXT: mfhi $12 +; MIPS32-NEXT: addu $10, $10, $11 +; MIPS32-NEXT: sltu $11, $10, $11 +; MIPS32-NEXT: lui $13, 0 +; MIPS32-NEXT: ori $13, $13, 1 +; MIPS32-NEXT: and $11, $11, $13 +; MIPS32-NEXT: addu $10, $10, $12 +; MIPS32-NEXT: sltu $12, $10, $12 +; MIPS32-NEXT: lui $13, 0 +; MIPS32-NEXT: ori $13, $13, 1 +; MIPS32-NEXT: and $12, $12, $13 +; MIPS32-NEXT: addu $11, $11, $12 +; MIPS32-NEXT: mul $12, $3, $4 +; MIPS32-NEXT: mul $13, $2, $5 +; MIPS32-NEXT: mul $14, $1, $6 +; MIPS32-NEXT: multu $2, $4 +; MIPS32-NEXT: mfhi $15 +; MIPS32-NEXT: multu $1, $5 +; MIPS32-NEXT: mfhi $24 +; MIPS32-NEXT: addu $12, $12, $13 +; MIPS32-NEXT: sltu $13, $12, $13 +; MIPS32-NEXT: lui $25, 0 +; MIPS32-NEXT: ori $25, $25, 1 +; MIPS32-NEXT: and $13, $13, $25 +; MIPS32-NEXT: addu $12, $12, $14 +; MIPS32-NEXT: sltu $14, $12, $14 +; MIPS32-NEXT: lui $25, 0 +; MIPS32-NEXT: ori $25, $25, 1 +; MIPS32-NEXT: and $14, $14, $25 +; MIPS32-NEXT: addu $13, $13, $14 +; MIPS32-NEXT: addu $12, $12, $15 +; MIPS32-NEXT: sltu $14, $12, $15 +; MIPS32-NEXT: lui $15, 0 +; MIPS32-NEXT: ori $15, $15, 1 +; MIPS32-NEXT: and $14, $14, $15 +; MIPS32-NEXT: addu $13, $13, $14 +; MIPS32-NEXT: addu $12, $12, $24 +; MIPS32-NEXT: sltu $14, $12, $24 +; MIPS32-NEXT: lui $15, 0 +; MIPS32-NEXT: ori $15, $15, 1 +; MIPS32-NEXT: and $14, $14, $15 +; MIPS32-NEXT: addu $13, $13, $14 +; MIPS32-NEXT: addu $12, $12, $11 +; MIPS32-NEXT: sltu $11, $12, $11 +; MIPS32-NEXT: lui $14, 0 +; MIPS32-NEXT: ori $14, $14, 1 +; MIPS32-NEXT: and $11, $11, $14 +; MIPS32-NEXT: addu $11, $13, $11 +; MIPS32-NEXT: mul $8, $8, $4 +; MIPS32-NEXT: mul $13, $3, $5 +; MIPS32-NEXT: mul $14, $2, $6 +; MIPS32-NEXT: mul $7, $1, $7 +; MIPS32-NEXT: multu $3, $4 +; MIPS32-NEXT: mfhi $3 +; MIPS32-NEXT: multu $2, $5 +; MIPS32-NEXT: mfhi $2 +; MIPS32-NEXT: multu $1, $6 +; MIPS32-NEXT: mfhi $1 +; MIPS32-NEXT: addu $4, $8, $13 +; MIPS32-NEXT: addu $4, $4, $14 +; MIPS32-NEXT: addu $4, $4, $7 +; MIPS32-NEXT: addu $3, $4, $3 +; MIPS32-NEXT: addu $2, $3, $2 +; MIPS32-NEXT: addu $1, $2, $1 +; MIPS32-NEXT: addu $5, $1, $11 +; MIPS32-NEXT: move $2, $9 +; MIPS32-NEXT: move $3, $10 +; MIPS32-NEXT: move $4, $12 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %mul = mul i128 %b, %a + ret i128 %mul +} + declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) define void @umul_with_overflow(i32 %lhs, i32 %rhs, i32* %pmul, i1* %pcarry_flag) { ; MIPS32-LABEL: umul_with_overflow: