diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -3939,33 +3939,32 @@ // %1 = { iNh, i1 } @umul.with.overflow.iNh(iNh %LHS.HI, iNh %RHS.LO) // %2 = { iNh, i1 } @umul.with.overflow.iNh(iNh %RHS.HI, iNh %LHS.LO) // %3 = mul nuw iN (%LHS.LOW as iN), (%RHS.LOW as iN) - // %4 = add iN (%1.0 as iN) << Nh, (%2.0 as iN) << Nh - // %5 = { iN, i1 } @uadd.with.overflow.iN( %4, %3 ) + // %4 = add iNh %1.0, %2.0 as iN + // %5 = { iNh, i1 } @uadd.with.overflow.iNh(iNh %4, iNh %3.HIGH) // - // %res = { %5.0, %0 || %1.1 || %2.1 || %5.1 } + // %lo = %3.LO + // %hi = %5.0 + // %ovf = %0 || %1.1 || %2.1 || %5.1 SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); SDValue LHSHigh, LHSLow, RHSHigh, RHSLow; GetExpandedInteger(LHS, LHSLow, LHSHigh); GetExpandedInteger(RHS, RHSLow, RHSHigh); EVT HalfVT = LHSLow.getValueType(); EVT BitVT = N->getValueType(1); - SDVTList VTHalfMulO = DAG.getVTList(HalfVT, BitVT); - SDVTList VTFullAddO = DAG.getVTList(VT, BitVT); + SDVTList VTHalfWithO = DAG.getVTList(HalfVT, BitVT); SDValue HalfZero = DAG.getConstant(0, dl, HalfVT); SDValue Overflow = DAG.getNode(ISD::AND, dl, BitVT, DAG.getSetCC(dl, BitVT, LHSHigh, HalfZero, ISD::SETNE), DAG.getSetCC(dl, BitVT, RHSHigh, HalfZero, ISD::SETNE)); - SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, LHSHigh, RHSLow); + SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfWithO, LHSHigh, RHSLow); Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, One.getValue(1)); - SDValue OneInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero, - One.getValue(0)); - SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, RHSHigh, LHSLow); + SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfWithO, RHSHigh, LHSLow); Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Two.getValue(1)); - SDValue TwoInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero, - Two.getValue(0)); + + SDValue HighSum = DAG.getNode(ISD::ADD, dl, HalfVT, One, Two); // Cannot use `UMUL_LOHI` directly, because some 32-bit targets (ARM) do not // know how to expand `i64,i64 = umul_lohi a, b` and abort (why isn’t this @@ -3976,10 +3975,10 @@ SDValue Three = DAG.getNode(ISD::MUL, dl, VT, DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LHSLow), DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RHSLow)); - SDValue Four = DAG.getNode(ISD::ADD, dl, VT, OneInHigh, TwoInHigh); - SDValue Five = DAG.getNode(ISD::UADDO, dl, VTFullAddO, Three, Four); - Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Five.getValue(1)); - SplitInteger(Five, Lo, Hi); + SplitInteger(Three, Lo, Hi); + + Hi = DAG.getNode(ISD::UADDO, dl, VTHalfWithO, Hi, HighSum); + Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Hi.getValue(1)); ReplaceValueWith(SDValue(N, 1), Overflow); return; } diff --git a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll --- a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll @@ -4,31 +4,27 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; AARCH-LABEL: muloti_test: ; AARCH: // %bb.0: // %start -; AARCH-NEXT: mul x8, x3, x0 -; AARCH-NEXT: umulh x9, x0, x2 -; AARCH-NEXT: madd x11, x1, x2, x8 -; AARCH-NEXT: add x8, x9, x11 -; AARCH-NEXT: cmp x8, x9 -; AARCH-NEXT: cset w9, lo -; AARCH-NEXT: cmp x11, #0 // =0 -; AARCH-NEXT: csel w9, wzr, w9, eq ; AARCH-NEXT: cmp x3, #0 // =0 -; AARCH-NEXT: umulh x10, x1, x2 -; AARCH-NEXT: cset w12, ne +; AARCH-NEXT: umulh x8, x1, x2 +; AARCH-NEXT: cset w10, ne ; AARCH-NEXT: cmp x1, #0 // =0 -; AARCH-NEXT: umulh x11, x3, x0 -; AARCH-NEXT: cset w13, ne -; AARCH-NEXT: cmp xzr, x10 -; AARCH-NEXT: and w10, w13, w12 -; AARCH-NEXT: cset w12, ne -; AARCH-NEXT: cmp xzr, x11 -; AARCH-NEXT: orr w10, w10, w12 +; AARCH-NEXT: mul x9, x3, x0 ; AARCH-NEXT: cset w11, ne +; AARCH-NEXT: cmp xzr, x8 +; AARCH-NEXT: umulh x8, x3, x0 +; AARCH-NEXT: madd x9, x1, x2, x9 +; AARCH-NEXT: and w10, w11, w10 +; AARCH-NEXT: cset w11, ne +; AARCH-NEXT: cmp xzr, x8 +; AARCH-NEXT: umulh x8, x0, x2 ; AARCH-NEXT: orr w10, w10, w11 -; AARCH-NEXT: orr w9, w10, w9 +; AARCH-NEXT: cset w11, ne +; AARCH-NEXT: adds x1, x8, x9 +; AARCH-NEXT: orr w8, w10, w11 +; AARCH-NEXT: cset w9, hs +; AARCH-NEXT: orr w8, w8, w9 ; AARCH-NEXT: mul x0, x0, x2 -; AARCH-DAG: mov x1, x8 -; AARCH-DAG: mov w2, w9 +; AARCH-NEXT: mov w2, w8 ; AARCH-NEXT: ret start: %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 diff --git a/llvm/test/CodeGen/AArch64/vec_umulo.ll b/llvm/test/CodeGen/AArch64/vec_umulo.ll --- a/llvm/test/CodeGen/AArch64/vec_umulo.ll +++ b/llvm/test/CodeGen/AArch64/vec_umulo.ll @@ -325,59 +325,53 @@ define <2 x i32> @umulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2) nounwind { ; CHECK-LABEL: umulo_v2i128: ; CHECK: // %bb.0: -; CHECK-NEXT: mul x9, x7, x2 -; CHECK-NEXT: umulh x10, x2, x6 -; CHECK-NEXT: madd x9, x3, x6, x9 -; CHECK-NEXT: add x15, x10, x9 -; CHECK-NEXT: cmp x15, x10 -; CHECK-NEXT: cset w10, lo -; CHECK-NEXT: cmp x9, #0 // =0 -; CHECK-NEXT: csel w10, wzr, w10, eq ; CHECK-NEXT: cmp x7, #0 // =0 -; CHECK-NEXT: umulh x11, x3, x6 -; CHECK-NEXT: mul x13, x5, x0 -; CHECK-NEXT: cset w17, ne +; CHECK-NEXT: umulh x8, x3, x6 +; CHECK-NEXT: cset w13, ne ; CHECK-NEXT: cmp x3, #0 // =0 -; CHECK-NEXT: umulh x12, x7, x2 -; CHECK-NEXT: umulh x9, x0, x4 -; CHECK-NEXT: madd x13, x1, x4, x13 -; CHECK-NEXT: cset w18, ne -; CHECK-NEXT: cmp xzr, x11 +; CHECK-NEXT: umulh x9, x7, x2 +; CHECK-NEXT: mul x10, x7, x2 +; CHECK-NEXT: cset w14, ne +; CHECK-NEXT: cmp xzr, x8 ; CHECK-NEXT: ldr x8, [sp] -; CHECK-NEXT: add x11, x9, x13 -; CHECK-NEXT: and w17, w18, w17 -; CHECK-NEXT: cset w18, ne -; CHECK-NEXT: cmp xzr, x12 -; CHECK-NEXT: orr w12, w17, w18 -; CHECK-NEXT: cset w17, ne -; CHECK-NEXT: cmp x11, x9 -; CHECK-NEXT: orr w9, w12, w17 -; CHECK-NEXT: cset w12, lo -; CHECK-NEXT: cmp x13, #0 // =0 -; CHECK-NEXT: mul x14, x2, x6 -; CHECK-NEXT: csel w12, wzr, w12, eq -; CHECK-NEXT: cmp x5, #0 // =0 -; CHECK-NEXT: stp x14, x15, [x8, #16] -; CHECK-NEXT: umulh x14, x1, x4 -; CHECK-NEXT: cset w13, ne -; CHECK-NEXT: cmp x1, #0 // =0 -; CHECK-NEXT: umulh x16, x5, x0 -; CHECK-NEXT: cset w17, ne -; CHECK-NEXT: cmp xzr, x14 -; CHECK-NEXT: and w13, w17, w13 +; CHECK-NEXT: umulh x11, x2, x6 +; CHECK-NEXT: madd x10, x3, x6, x10 +; CHECK-NEXT: and w13, w14, w13 ; CHECK-NEXT: cset w14, ne -; CHECK-NEXT: cmp xzr, x16 +; CHECK-NEXT: cmp xzr, x9 ; CHECK-NEXT: orr w13, w13, w14 ; CHECK-NEXT: cset w14, ne +; CHECK-NEXT: adds x10, x11, x10 +; CHECK-NEXT: mul x12, x2, x6 ; CHECK-NEXT: orr w13, w13, w14 -; CHECK-NEXT: orr w12, w13, w12 +; CHECK-NEXT: cset w14, hs +; CHECK-NEXT: cmp x5, #0 // =0 +; CHECK-NEXT: umulh x17, x1, x4 +; CHECK-NEXT: stp x12, x10, [x8, #16] +; CHECK-NEXT: cset w10, ne +; CHECK-NEXT: cmp x1, #0 // =0 +; CHECK-NEXT: umulh x9, x5, x0 +; CHECK-NEXT: mul x11, x5, x0 +; CHECK-NEXT: cset w12, ne +; CHECK-NEXT: cmp xzr, x17 +; CHECK-NEXT: umulh x15, x0, x4 +; CHECK-NEXT: madd x11, x1, x4, x11 +; CHECK-NEXT: and w10, w12, w10 +; CHECK-NEXT: cset w12, ne +; CHECK-NEXT: cmp xzr, x9 +; CHECK-NEXT: orr w9, w10, w12 +; CHECK-NEXT: cset w10, ne +; CHECK-NEXT: adds x11, x15, x11 +; CHECK-NEXT: orr w9, w9, w10 +; CHECK-NEXT: cset w10, hs ; CHECK-NEXT: orr w9, w9, w10 -; CHECK-NEXT: fmov s0, w12 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: orr w10, w13, w14 +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: mov v0.s[1], w10 ; CHECK-NEXT: shl v0.2s, v0.2s, #31 -; CHECK-NEXT: mul x15, x0, x4 +; CHECK-NEXT: mul x16, x0, x4 ; CHECK-NEXT: sshr v0.2s, v0.2s, #31 -; CHECK-NEXT: stp x15, x11, [x8] +; CHECK-NEXT: stp x16, x11, [x8] ; CHECK-NEXT: ret %t = call {<2 x i128>, <2 x i1>} @llvm.umul.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0 diff --git a/llvm/test/CodeGen/PowerPC/pr45448.ll b/llvm/test/CodeGen/PowerPC/pr45448.ll --- a/llvm/test/CodeGen/PowerPC/pr45448.ll +++ b/llvm/test/CodeGen/PowerPC/pr45448.ll @@ -23,17 +23,14 @@ ; CHECK-NEXT: cmpdi r3, 0 ; CHECK-NEXT: sradi r4, r3, 63 ; CHECK-NEXT: rldic r5, r5, 4, 32 -; CHECK-NEXT: crnot 4*cr5+gt, eq +; CHECK-NEXT: crnot 4*cr5+lt, eq ; CHECK-NEXT: mulhdu r3, r3, r5 ; CHECK-NEXT: maddld r6, r4, r5, r3 -; CHECK-NEXT: cmpld r6, r3 -; CHECK-NEXT: mulld r3, r4, r5 -; CHECK-NEXT: cmpldi cr1, r3, 0 -; CHECK-NEXT: crandc 4*cr5+lt, lt, 4*cr1+eq +; CHECK-NEXT: cmpld cr1, r6, r3 ; CHECK-NEXT: mulhdu. r3, r4, r5 -; CHECK-NEXT: bc 4, 4*cr5+gt, .LBB0_10 +; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_10 ; CHECK-NEXT: # %bb.8: # %L670 -; CHECK-NEXT: crorc 4*cr5+lt, 4*cr5+lt, eq +; CHECK-NEXT: crorc 4*cr5+lt, 4*cr1+lt, eq ; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB0_10 ; CHECK-NEXT: # %bb.9: # %L917 ; CHECK-NEXT: .LBB0_10: # %L994 diff --git a/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll --- a/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll @@ -5,125 +5,118 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; PPC64-LABEL: muloti_test: ; PPC64: # %bb.0: # %start -; PPC64-NEXT: mulhdu. 8, 3, 6 -; PPC64-NEXT: mcrf 1, 0 -; PPC64-NEXT: mulld 8, 5, 4 -; PPC64-NEXT: cmpdi 3, 0 -; PPC64-NEXT: mulld 3, 3, 6 -; PPC64-NEXT: cmpdi 5, 5, 0 -; PPC64-NEXT: add 3, 3, 8 -; PPC64-NEXT: crnor 20, 22, 2 -; PPC64-NEXT: mulhdu 8, 4, 6 -; PPC64-NEXT: cmpldi 3, 0 -; PPC64-NEXT: add 3, 8, 3 -; PPC64-NEXT: cmpld 6, 3, 8 -; PPC64-NEXT: crandc 21, 24, 2 -; PPC64-NEXT: crorc 20, 20, 6 -; PPC64-NEXT: mulhdu. 5, 5, 4 -; PPC64-NEXT: crorc 20, 20, 2 -; PPC64-NEXT: li 7, 1 -; PPC64-NEXT: crnor 20, 20, 21 +; PPC64-NEXT: addic 8, 5, -1 +; PPC64-NEXT: mulhdu 9, 5, 4 +; PPC64-NEXT: mulld 10, 5, 4 +; PPC64-NEXT: subfe 5, 8, 5 +; PPC64-NEXT: mulld 8, 3, 6 +; PPC64-NEXT: add 8, 8, 10 +; PPC64-NEXT: addic 10, 3, -1 +; PPC64-NEXT: mulhdu 7, 3, 6 +; PPC64-NEXT: subfe 3, 10, 3 +; PPC64-NEXT: and 5, 3, 5 +; PPC64-NEXT: addic 3, 7, -1 +; PPC64-NEXT: subfe 7, 3, 7 +; PPC64-NEXT: or 5, 5, 7 +; PPC64-NEXT: mulhdu 10, 4, 6 +; PPC64-NEXT: addic 7, 9, -1 +; PPC64-NEXT: add 3, 10, 8 +; PPC64-NEXT: subfe 7, 7, 9 +; PPC64-NEXT: or 5, 5, 7 +; PPC64-NEXT: subc 7, 3, 10 +; PPC64-NEXT: subfe 7, 3, 3 +; PPC64-NEXT: neg 7, 7 +; PPC64-NEXT: or 5, 5, 7 ; PPC64-NEXT: mulld 4, 4, 6 -; PPC64-NEXT: bc 12, 20, .LBB0_2 -; PPC64-NEXT: # %bb.1: # %start -; PPC64-NEXT: ori 5, 7, 0 -; PPC64-NEXT: blr -; PPC64-NEXT: .LBB0_2: # %start -; PPC64-NEXT: li 5, 0 ; PPC64-NEXT: blr ; ; PPC32-LABEL: muloti_test: ; PPC32: # %bb.0: # %start ; PPC32-NEXT: mflr 0 ; PPC32-NEXT: stw 0, 4(1) -; PPC32-NEXT: stwu 1, -80(1) -; PPC32-NEXT: stw 26, 56(1) # 4-byte Folded Spill +; PPC32-NEXT: stwu 1, -64(1) +; PPC32-NEXT: stw 24, 32(1) # 4-byte Folded Spill ; PPC32-NEXT: mfcr 12 -; PPC32-NEXT: stw 27, 60(1) # 4-byte Folded Spill -; PPC32-NEXT: mr 27, 4 -; PPC32-NEXT: stw 29, 68(1) # 4-byte Folded Spill -; PPC32-NEXT: mr 29, 7 -; PPC32-NEXT: stw 30, 72(1) # 4-byte Folded Spill -; PPC32-NEXT: mr 30, 8 -; PPC32-NEXT: mr 26, 3 +; PPC32-NEXT: stw 26, 40(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 26, 7 +; PPC32-NEXT: stw 28, 48(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 28, 4 +; PPC32-NEXT: stw 29, 52(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 29, 8 +; PPC32-NEXT: mr 24, 3 ; PPC32-NEXT: li 3, 0 ; PPC32-NEXT: li 4, 0 ; PPC32-NEXT: li 7, 0 ; PPC32-NEXT: li 8, 0 -; PPC32-NEXT: stw 20, 32(1) # 4-byte Folded Spill -; PPC32-NEXT: stw 21, 36(1) # 4-byte Folded Spill -; PPC32-NEXT: stw 22, 40(1) # 4-byte Folded Spill -; PPC32-NEXT: stw 23, 44(1) # 4-byte Folded Spill -; PPC32-NEXT: mr 23, 6 -; PPC32-NEXT: stw 24, 48(1) # 4-byte Folded Spill -; PPC32-NEXT: mr 24, 5 -; PPC32-NEXT: stw 25, 52(1) # 4-byte Folded Spill -; PPC32-NEXT: mr 25, 10 -; PPC32-NEXT: stw 28, 64(1) # 4-byte Folded Spill -; PPC32-NEXT: mr 28, 9 -; PPC32-NEXT: stw 12, 28(1) +; PPC32-NEXT: stw 23, 28(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 23, 5 +; PPC32-NEXT: stw 25, 36(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 25, 9 +; PPC32-NEXT: stw 27, 44(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 27, 6 +; PPC32-NEXT: stw 30, 56(1) # 4-byte Folded Spill +; PPC32-NEXT: mr 30, 10 +; PPC32-NEXT: stw 12, 24(1) ; PPC32-NEXT: bl __multi3 -; PPC32-NEXT: mr 7, 4 -; PPC32-NEXT: mullw 4, 24, 30 -; PPC32-NEXT: cmpwi 5, 24, 0 -; PPC32-NEXT: cmpwi 6, 26, 0 -; PPC32-NEXT: cmpwi 7, 28, 0 -; PPC32-NEXT: crnor 9, 30, 26 -; PPC32-NEXT: mullw 8, 29, 23 -; PPC32-NEXT: add 21, 8, 4 -; PPC32-NEXT: mullw 11, 28, 27 -; PPC32-NEXT: mullw 12, 26, 25 -; PPC32-NEXT: add 11, 12, 11 -; PPC32-NEXT: cmplwi 7, 11, 0 -; PPC32-NEXT: mulhwu 9, 30, 23 -; PPC32-NEXT: add 12, 9, 21 -; PPC32-NEXT: cmplw 6, 12, 9 -; PPC32-NEXT: mulhwu 10, 27, 25 -; PPC32-NEXT: mullw 0, 30, 23 -; PPC32-NEXT: mullw 22, 27, 25 -; PPC32-NEXT: addc 4, 22, 0 -; PPC32-NEXT: add 0, 10, 11 -; PPC32-NEXT: adde 8, 0, 12 -; PPC32-NEXT: addc 4, 7, 4 -; PPC32-NEXT: adde 8, 3, 8 -; PPC32-NEXT: xor 22, 4, 7 -; PPC32-NEXT: xor 20, 8, 3 -; PPC32-NEXT: or. 22, 22, 20 -; PPC32-NEXT: mcrf 1, 0 -; PPC32-NEXT: cmpwi 29, 0 -; PPC32-NEXT: crnor 8, 22, 2 -; PPC32-NEXT: mulhwu. 23, 29, 23 +; PPC32-NEXT: mulhwu. 9, 26, 27 +; PPC32-NEXT: mfcr 9 # cr0 +; PPC32-NEXT: cmpwi 2, 26, 0 +; PPC32-NEXT: stw 9, 20(1) +; PPC32-NEXT: cmpwi 3, 23, 0 +; PPC32-NEXT: crnor 12, 14, 10 +; PPC32-NEXT: cmpwi 4, 24, 0 +; PPC32-NEXT: mulhwu. 9, 23, 29 ; PPC32-NEXT: mcrf 5, 0 -; PPC32-NEXT: cmplwi 21, 0 -; PPC32-NEXT: crandc 10, 24, 2 -; PPC32-NEXT: cmplw 3, 0, 10 -; PPC32-NEXT: crandc 11, 12, 30 +; PPC32-NEXT: cmpwi 1, 25, 0 +; PPC32-NEXT: crnor 4, 6, 18 ; PPC32-NEXT: mulhwu. 9, 24, 30 ; PPC32-NEXT: mcrf 6, 0 -; PPC32-NEXT: cmplw 4, 7 -; PPC32-NEXT: cmplw 7, 8, 3 -; PPC32-NEXT: crand 12, 30, 0 -; PPC32-NEXT: crandc 13, 28, 30 -; PPC32-NEXT: mulhwu. 3, 26, 25 +; PPC32-NEXT: mulhwu. 0, 25, 28 ; PPC32-NEXT: mcrf 7, 0 -; PPC32-NEXT: cror 0, 12, 13 -; PPC32-NEXT: crandc 12, 0, 6 -; PPC32-NEXT: crorc 20, 8, 22 -; PPC32-NEXT: crorc 20, 20, 26 -; PPC32-NEXT: mulhwu. 3, 28, 27 -; PPC32-NEXT: mcrf 1, 0 -; PPC32-NEXT: crorc 25, 9, 30 -; PPC32-NEXT: or. 3, 27, 26 -; PPC32-NEXT: cror 24, 20, 10 -; PPC32-NEXT: mcrf 5, 0 -; PPC32-NEXT: crorc 25, 25, 6 -; PPC32-NEXT: or. 3, 30, 29 -; PPC32-NEXT: cror 25, 25, 11 -; PPC32-NEXT: crnor 20, 2, 22 -; PPC32-NEXT: lwz 12, 28(1) -; PPC32-NEXT: cror 20, 20, 25 -; PPC32-NEXT: cror 20, 20, 24 -; PPC32-NEXT: crnor 20, 20, 12 +; PPC32-NEXT: or. 0, 28, 24 +; PPC32-NEXT: mcrf 2, 0 +; PPC32-NEXT: or. 0, 29, 26 +; PPC32-NEXT: crnor 5, 2, 10 +; PPC32-NEXT: mullw 10, 26, 27 +; PPC32-NEXT: lwz 26, 20(1) +; PPC32-NEXT: mullw 9, 23, 29 +; PPC32-NEXT: add 9, 10, 9 +; PPC32-NEXT: mtcrf 128, 26 # cr0 +; PPC32-NEXT: crorc 6, 12, 2 +; PPC32-NEXT: crorc 20, 6, 22 +; PPC32-NEXT: mulhwu 7, 29, 27 +; PPC32-NEXT: add 9, 7, 9 +; PPC32-NEXT: cmplw 9, 7 +; PPC32-NEXT: crorc 21, 4, 26 +; PPC32-NEXT: cror 20, 20, 0 +; PPC32-NEXT: crorc 21, 21, 30 +; PPC32-NEXT: mullw 11, 25, 28 +; PPC32-NEXT: mullw 12, 24, 30 +; PPC32-NEXT: add 10, 12, 11 +; PPC32-NEXT: lwz 12, 24(1) +; PPC32-NEXT: lwz 26, 40(1) # 4-byte Folded Reload +; PPC32-NEXT: mulhwu 8, 28, 30 +; PPC32-NEXT: add 10, 8, 10 +; PPC32-NEXT: cmplw 10, 8 +; PPC32-NEXT: cror 21, 21, 0 +; PPC32-NEXT: cror 21, 5, 21 +; PPC32-NEXT: cror 20, 21, 20 +; PPC32-NEXT: mullw 0, 29, 27 +; PPC32-NEXT: mtcrf 32, 12 # cr2 +; PPC32-NEXT: mtcrf 16, 12 # cr3 +; PPC32-NEXT: mtcrf 8, 12 # cr4 +; PPC32-NEXT: lwz 29, 52(1) # 4-byte Folded Reload +; PPC32-NEXT: mullw 7, 28, 30 +; PPC32-NEXT: addc 7, 7, 0 +; PPC32-NEXT: adde 11, 10, 9 +; PPC32-NEXT: addc 9, 4, 7 +; PPC32-NEXT: adde 8, 3, 11 +; PPC32-NEXT: cmplw 6, 9, 4 +; PPC32-NEXT: cmplw 8, 3 +; PPC32-NEXT: crand 22, 2, 24 +; PPC32-NEXT: crandc 23, 0, 2 +; PPC32-NEXT: cror 22, 22, 23 +; PPC32-NEXT: crnor 20, 20, 22 ; PPC32-NEXT: li 3, 1 ; PPC32-NEXT: bc 12, 20, .LBB0_2 ; PPC32-NEXT: # %bb.1: # %start @@ -133,21 +126,15 @@ ; PPC32-NEXT: li 7, 0 ; PPC32-NEXT: .LBB0_3: # %start ; PPC32-NEXT: mr 3, 8 -; PPC32-NEXT: mtcrf 32, 12 # cr2 -; PPC32-NEXT: mtcrf 16, 12 # cr3 -; PPC32-NEXT: lwz 30, 72(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 29, 68(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 28, 64(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 27, 60(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 26, 56(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 25, 52(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 24, 48(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 23, 44(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 22, 40(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 21, 36(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 20, 32(1) # 4-byte Folded Reload -; PPC32-NEXT: lwz 0, 84(1) -; PPC32-NEXT: addi 1, 1, 80 +; PPC32-NEXT: mr 4, 9 +; PPC32-NEXT: lwz 30, 56(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 28, 48(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 27, 44(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 25, 36(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 24, 32(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 23, 28(1) # 4-byte Folded Reload +; PPC32-NEXT: lwz 0, 68(1) +; PPC32-NEXT: addi 1, 1, 64 ; PPC32-NEXT: mtlr 0 ; PPC32-NEXT: blr start: diff --git a/llvm/test/CodeGen/SPARC/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/SPARC/umulo-128-legalisation-lowering.ll --- a/llvm/test/CodeGen/SPARC/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/SPARC/umulo-128-legalisation-lowering.ll @@ -6,40 +6,37 @@ ; SPARC-LABEL: muloti_test: ; SPARC: .cfi_startproc ; SPARC-NEXT: ! %bb.0: ! %start -; SPARC-NEXT: save %sp, -128, %sp +; SPARC-NEXT: save %sp, -120, %sp ; SPARC-NEXT: .cfi_def_cfa_register %fp ; SPARC-NEXT: .cfi_window_save ; SPARC-NEXT: .cfi_register %o7, %i7 -; SPARC-NEXT: ld [%fp+92], %l3 +; SPARC-NEXT: ld [%fp+92], %l4 ; SPARC-NEXT: ld [%fp+96], %g2 ; SPARC-NEXT: umul %i2, %i5, %g3 ; SPARC-NEXT: rd %y, %g4 -; SPARC-NEXT: st %g4, [%fp+-20] ! 4-byte Folded Spill +; SPARC-NEXT: st %g4, [%fp+-12] ! 4-byte Folded Spill ; SPARC-NEXT: umul %i4, %i3, %g4 ; SPARC-NEXT: rd %y, %l0 -; SPARC-NEXT: st %l0, [%fp+-24] ! 4-byte Folded Spill +; SPARC-NEXT: st %l0, [%fp+-16] ! 4-byte Folded Spill ; SPARC-NEXT: st %g2, [%sp+96] -; SPARC-NEXT: st %i5, [%fp+-8] ! 4-byte Folded Spill ; SPARC-NEXT: umul %i5, %i3, %l0 -; SPARC-NEXT: rd %y, %l5 -; SPARC-NEXT: st %l3, [%sp+92] -; SPARC-NEXT: umul %l3, %i1, %l4 -; SPARC-NEXT: rd %y, %i5 -; SPARC-NEXT: st %i5, [%fp+-12] ! 4-byte Folded Spill -; SPARC-NEXT: add %g4, %g3, %l2 -; SPARC-NEXT: mov %i0, %i5 -; SPARC-NEXT: umul %i0, %g2, %g3 -; SPARC-NEXT: rd %y, %i0 -; SPARC-NEXT: st %i0, [%fp+-16] ! 4-byte Folded Spill -; SPARC-NEXT: add %l5, %l2, %l1 -; SPARC-NEXT: st %i1, [%fp+-4] ! 4-byte Folded Spill -; SPARC-NEXT: umul %i1, %g2, %g2 ; SPARC-NEXT: rd %y, %l6 -; SPARC-NEXT: add %g3, %l4, %i0 -; SPARC-NEXT: add %l6, %i0, %l7 -; SPARC-NEXT: addcc %g2, %l0, %l4 +; SPARC-NEXT: st %l4, [%sp+92] +; SPARC-NEXT: umul %l4, %i1, %l2 +; SPARC-NEXT: rd %y, %l1 +; SPARC-NEXT: st %l1, [%fp+-4] ! 4-byte Folded Spill +; SPARC-NEXT: add %g4, %g3, %g3 +; SPARC-NEXT: umul %i0, %g2, %g4 +; SPARC-NEXT: rd %y, %l1 +; SPARC-NEXT: st %l1, [%fp+-8] ! 4-byte Folded Spill +; SPARC-NEXT: add %l6, %g3, %l3 +; SPARC-NEXT: umul %i1, %g2, %g2 +; SPARC-NEXT: rd %y, %l1 +; SPARC-NEXT: add %g4, %l2, %g3 +; SPARC-NEXT: add %l1, %g3, %l2 +; SPARC-NEXT: addcc %g2, %l0, %l7 ; SPARC-NEXT: mov %g0, %l0 -; SPARC-NEXT: addxcc %l7, %l1, %i1 +; SPARC-NEXT: addxcc %l2, %l3, %l5 ; SPARC-NEXT: mov %l0, %o0 ; SPARC-NEXT: mov %l0, %o1 ; SPARC-NEXT: mov %i2, %o2 @@ -47,139 +44,115 @@ ; SPARC-NEXT: mov %l0, %o4 ; SPARC-NEXT: call __multi3 ; SPARC-NEXT: mov %l0, %o5 -; SPARC-NEXT: addcc %o1, %l4, %i3 -; SPARC-NEXT: addxcc %o0, %i1, %g2 +; SPARC-NEXT: addcc %o1, %l7, %i3 +; SPARC-NEXT: addxcc %o0, %l5, %g2 ; SPARC-NEXT: mov 1, %g3 ; SPARC-NEXT: cmp %g2, %o0 ; SPARC-NEXT: bcs .LBB0_2 -; SPARC-NEXT: mov %g3, %g4 +; SPARC-NEXT: mov %g3, %o4 ; SPARC-NEXT: ! %bb.1: ! %start -; SPARC-NEXT: mov %l0, %g4 +; SPARC-NEXT: mov %l0, %o4 ; SPARC-NEXT: .LBB0_2: ! %start ; SPARC-NEXT: cmp %i3, %o1 ; SPARC-NEXT: bcs .LBB0_4 -; SPARC-NEXT: mov %g3, %o4 +; SPARC-NEXT: mov %g3, %g4 ; SPARC-NEXT: ! %bb.3: ! %start -; SPARC-NEXT: mov %l0, %o4 +; SPARC-NEXT: mov %l0, %g4 ; SPARC-NEXT: .LBB0_4: ! %start ; SPARC-NEXT: cmp %g2, %o0 ; SPARC-NEXT: be .LBB0_6 ; SPARC-NEXT: nop ; SPARC-NEXT: ! %bb.5: ! %start -; SPARC-NEXT: mov %g4, %o4 +; SPARC-NEXT: mov %o4, %g4 ; SPARC-NEXT: .LBB0_6: ! %start -; SPARC-NEXT: xor %g2, %o0, %i1 -; SPARC-NEXT: xor %i3, %o1, %g4 -; SPARC-NEXT: or %g4, %i1, %i1 -; SPARC-NEXT: cmp %i1, 0 -; SPARC-NEXT: be .LBB0_8 -; SPARC-NEXT: mov %l0, %g4 +; SPARC-NEXT: cmp %i2, 0 +; SPARC-NEXT: bne .LBB0_8 +; SPARC-NEXT: mov %g3, %i2 ; SPARC-NEXT: ! %bb.7: ! %start -; SPARC-NEXT: mov %o4, %g4 +; SPARC-NEXT: mov %l0, %i2 ; SPARC-NEXT: .LBB0_8: ! %start -; SPARC-NEXT: cmp %l1, %l5 -; SPARC-NEXT: mov %g3, %l1 -; SPARC-NEXT: bcs .LBB0_10 -; SPARC-NEXT: mov %i5, %i1 +; SPARC-NEXT: cmp %i4, 0 +; SPARC-NEXT: bne .LBB0_10 +; SPARC-NEXT: mov %g3, %o1 ; SPARC-NEXT: ! %bb.9: ! %start -; SPARC-NEXT: mov %l0, %l1 +; SPARC-NEXT: mov %l0, %o1 ; SPARC-NEXT: .LBB0_10: ! %start -; SPARC-NEXT: cmp %l2, 0 -; SPARC-NEXT: be .LBB0_12 -; SPARC-NEXT: mov %l0, %o0 +; SPARC-NEXT: ld [%fp+-16], %l5 ! 4-byte Folded Reload +; SPARC-NEXT: cmp %l5, 0 +; SPARC-NEXT: bne .LBB0_12 +; SPARC-NEXT: mov %g3, %o0 ; SPARC-NEXT: ! %bb.11: ! %start -; SPARC-NEXT: mov %l1, %o0 +; SPARC-NEXT: mov %l0, %o0 ; SPARC-NEXT: .LBB0_12: ! %start -; SPARC-NEXT: cmp %i2, 0 +; SPARC-NEXT: ld [%fp+-12], %l5 ! 4-byte Folded Reload +; SPARC-NEXT: cmp %l5, 0 ; SPARC-NEXT: bne .LBB0_14 -; SPARC-NEXT: mov %g3, %i2 +; SPARC-NEXT: mov %g3, %l5 ; SPARC-NEXT: ! %bb.13: ! %start -; SPARC-NEXT: mov %l0, %i2 +; SPARC-NEXT: mov %l0, %l5 ; SPARC-NEXT: .LBB0_14: ! %start -; SPARC-NEXT: cmp %i4, 0 -; SPARC-NEXT: bne .LBB0_16 -; SPARC-NEXT: mov %g3, %o1 +; SPARC-NEXT: cmp %l3, %l6 +; SPARC-NEXT: bcs .LBB0_16 +; SPARC-NEXT: mov %g3, %l3 ; SPARC-NEXT: ! %bb.15: ! %start -; SPARC-NEXT: mov %l0, %o1 +; SPARC-NEXT: mov %l0, %l3 ; SPARC-NEXT: .LBB0_16: ! %start -; SPARC-NEXT: ld [%fp+-24], %i5 ! 4-byte Folded Reload -; SPARC-NEXT: cmp %i5, 0 +; SPARC-NEXT: cmp %l4, 0 ; SPARC-NEXT: bne .LBB0_18 -; SPARC-NEXT: mov %g3, %l5 +; SPARC-NEXT: mov %g3, %l4 ; SPARC-NEXT: ! %bb.17: ! %start -; SPARC-NEXT: mov %l0, %l5 +; SPARC-NEXT: mov %l0, %l4 ; SPARC-NEXT: .LBB0_18: ! %start -; SPARC-NEXT: ld [%fp+-20], %i5 ! 4-byte Folded Reload -; SPARC-NEXT: cmp %i5, 0 +; SPARC-NEXT: cmp %i0, 0 ; SPARC-NEXT: bne .LBB0_20 -; SPARC-NEXT: mov %g3, %l1 +; SPARC-NEXT: mov %g3, %l7 ; SPARC-NEXT: ! %bb.19: ! %start -; SPARC-NEXT: mov %l0, %l1 +; SPARC-NEXT: mov %l0, %l7 ; SPARC-NEXT: .LBB0_20: ! %start -; SPARC-NEXT: cmp %l7, %l6 -; SPARC-NEXT: bcs .LBB0_22 +; SPARC-NEXT: ld [%fp+-8], %l6 ! 4-byte Folded Reload +; SPARC-NEXT: cmp %l6, 0 +; SPARC-NEXT: bne .LBB0_22 ; SPARC-NEXT: mov %g3, %l6 ; SPARC-NEXT: ! %bb.21: ! %start ; SPARC-NEXT: mov %l0, %l6 ; SPARC-NEXT: .LBB0_22: ! %start -; SPARC-NEXT: cmp %i0, 0 -; SPARC-NEXT: be .LBB0_24 -; SPARC-NEXT: mov %l0, %l2 +; SPARC-NEXT: and %o1, %i2, %i2 +; SPARC-NEXT: ld [%fp+-4], %o1 ! 4-byte Folded Reload +; SPARC-NEXT: cmp %o1, 0 +; SPARC-NEXT: and %l7, %l4, %o1 +; SPARC-NEXT: bne .LBB0_24 +; SPARC-NEXT: mov %g3, %l4 ; SPARC-NEXT: ! %bb.23: ! %start -; SPARC-NEXT: mov %l6, %l2 +; SPARC-NEXT: mov %l0, %l4 ; SPARC-NEXT: .LBB0_24: ! %start -; SPARC-NEXT: cmp %l3, 0 -; SPARC-NEXT: bne .LBB0_26 -; SPARC-NEXT: mov %g3, %l3 +; SPARC-NEXT: or %i2, %o0, %l7 +; SPARC-NEXT: cmp %l2, %l1 +; SPARC-NEXT: or %o1, %l6, %l2 +; SPARC-NEXT: bcs .LBB0_26 +; SPARC-NEXT: mov %g3, %i2 ; SPARC-NEXT: ! %bb.25: ! %start -; SPARC-NEXT: mov %l0, %l3 +; SPARC-NEXT: mov %l0, %i2 ; SPARC-NEXT: .LBB0_26: ! %start -; SPARC-NEXT: cmp %i1, 0 +; SPARC-NEXT: or %l7, %l5, %l1 +; SPARC-NEXT: or %i5, %i4, %i4 +; SPARC-NEXT: cmp %i4, 0 +; SPARC-NEXT: or %l2, %l4, %l2 ; SPARC-NEXT: bne .LBB0_28 -; SPARC-NEXT: mov %g3, %l4 +; SPARC-NEXT: mov %g3, %i4 ; SPARC-NEXT: ! %bb.27: ! %start -; SPARC-NEXT: mov %l0, %l4 +; SPARC-NEXT: mov %l0, %i4 ; SPARC-NEXT: .LBB0_28: ! %start -; SPARC-NEXT: and %o1, %i2, %i2 -; SPARC-NEXT: ld [%fp+-16], %i0 ! 4-byte Folded Reload +; SPARC-NEXT: or %l1, %l3, %i5 +; SPARC-NEXT: or %i1, %i0, %i0 ; SPARC-NEXT: cmp %i0, 0 -; SPARC-NEXT: and %l4, %l3, %l4 ; SPARC-NEXT: bne .LBB0_30 -; SPARC-NEXT: mov %g3, %l6 +; SPARC-NEXT: or %l2, %i2, %i0 ; SPARC-NEXT: ! %bb.29: ! %start -; SPARC-NEXT: mov %l0, %l6 -; SPARC-NEXT: .LBB0_30: ! %start -; SPARC-NEXT: or %i2, %l5, %l3 -; SPARC-NEXT: ld [%fp+-12], %i0 ! 4-byte Folded Reload -; SPARC-NEXT: cmp %i0, 0 -; SPARC-NEXT: or %l4, %l6, %i2 -; SPARC-NEXT: bne .LBB0_32 -; SPARC-NEXT: mov %g3, %l4 -; SPARC-NEXT: ! %bb.31: ! %start -; SPARC-NEXT: mov %l0, %l4 -; SPARC-NEXT: .LBB0_32: ! %start -; SPARC-NEXT: or %l3, %l1, %l1 -; SPARC-NEXT: ld [%fp+-8], %i0 ! 4-byte Folded Reload -; SPARC-NEXT: or %i0, %i4, %i0 -; SPARC-NEXT: cmp %i0, 0 -; SPARC-NEXT: or %i2, %l4, %i5 -; SPARC-NEXT: bne .LBB0_34 -; SPARC-NEXT: mov %g3, %i2 -; SPARC-NEXT: ! %bb.33: ! %start -; SPARC-NEXT: mov %l0, %i2 -; SPARC-NEXT: .LBB0_34: ! %start -; SPARC-NEXT: or %l1, %o0, %i4 -; SPARC-NEXT: ld [%fp+-4], %i0 ! 4-byte Folded Reload -; SPARC-NEXT: or %i0, %i1, %i0 -; SPARC-NEXT: cmp %i0, 0 -; SPARC-NEXT: bne .LBB0_36 -; SPARC-NEXT: or %i5, %l2, %i0 -; SPARC-NEXT: ! %bb.35: ! %start ; SPARC-NEXT: mov %l0, %g3 -; SPARC-NEXT: .LBB0_36: ! %start -; SPARC-NEXT: and %g3, %i2, %i1 +; SPARC-NEXT: .LBB0_30: ! %start +; SPARC-NEXT: and %g3, %i4, %i1 ; SPARC-NEXT: or %i1, %i0, %i0 -; SPARC-NEXT: or %i0, %i4, %i0 +; SPARC-NEXT: or %i0, %i5, %i0 ; SPARC-NEXT: or %i0, %g4, %i0 ; SPARC-NEXT: and %i0, 1, %i4 ; SPARC-NEXT: mov %g2, %i0 @@ -218,25 +191,23 @@ ; SPARC64-NEXT: add %o0, %i5, %i1 ; SPARC64-NEXT: mov %g0, %i3 ; SPARC64-NEXT: cmp %i1, %o0 -; SPARC64-NEXT: mov %i3, %g2 -; SPARC64-NEXT: movcs %xcc, 1, %g2 -; SPARC64-NEXT: cmp %i5, 0 -; SPARC64-NEXT: move %xcc, 0, %g2 +; SPARC64-NEXT: mov %i3, %i5 +; SPARC64-NEXT: movcs %xcc, 1, %i5 ; SPARC64-NEXT: cmp %i4, 0 ; SPARC64-NEXT: mov %i3, %i4 ; SPARC64-NEXT: movne %xcc, 1, %i4 ; SPARC64-NEXT: cmp %l0, 0 -; SPARC64-NEXT: mov %i3, %i5 -; SPARC64-NEXT: movne %xcc, 1, %i5 +; SPARC64-NEXT: mov %i3, %g2 +; SPARC64-NEXT: movne %xcc, 1, %g2 ; SPARC64-NEXT: cmp %i2, 0 ; SPARC64-NEXT: mov %i3, %i2 ; SPARC64-NEXT: movne %xcc, 1, %i2 ; SPARC64-NEXT: cmp %i0, 0 ; SPARC64-NEXT: movne %xcc, 1, %i3 ; SPARC64-NEXT: and %i3, %i2, %i0 -; SPARC64-NEXT: or %i0, %i5, %i0 -; SPARC64-NEXT: or %i0, %i4, %i0 ; SPARC64-NEXT: or %i0, %g2, %i0 +; SPARC64-NEXT: or %i0, %i4, %i0 +; SPARC64-NEXT: or %i0, %i5, %i0 ; SPARC64-NEXT: srl %i0, 0, %i2 ; SPARC64-NEXT: mov %i1, %i0 ; SPARC64-NEXT: ret