Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2705,25 +2705,56 @@ EVT VT = N->getValueType(0); SDLoc dl(N); - // A divide for UMULO should be faster than a function call. if (N->getOpcode() == ISD::UMULO) { + // This section expands the operation into the following sequence of + // instructions. `iNh` here refers to a type which has half the bit width of + // the type the original operation operated on. + // + // %0 = %LHS.HI != 0 && %RHS.HI != 0 + // %1 = { iNh, i1 } @umul.with.overflow.iNh(iNh %LHS.HI, iNh %RHS.LO) + // %2 = { iNh, i1 } @umul.with.overflow.iNh(iNh %RHS.HI, iNh %LHS.LO) + // %3 = mul nuw iN (%LHS.LOW as iN), (%RHS.LOW as iN) + // %4 = add iN (%1.0 as iN) << Nh, (%2.0 as iN) << Nh + // %5 = { iN, i1 } @uadd.with.overflow.iN( %4, %3 ) + // + // %res = { %5.0, %0 || %1.1 || %2.1 || %5.1 } SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); - - SDValue MUL = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS); - SplitInteger(MUL, Lo, Hi); - - // A divide for UMULO will be faster than a function call. Select to - // make sure we aren't using 0. - SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(VT), - RHS, DAG.getConstant(0, dl, VT), ISD::SETEQ); - SDValue NotZero = DAG.getSelect(dl, VT, isZero, - DAG.getConstant(1, dl, VT), RHS); - SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero); - SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS, - ISD::SETNE); - Overflow = DAG.getSelect(dl, N->getValueType(1), isZero, - DAG.getConstant(0, dl, N->getValueType(1)), - Overflow); + SDValue LHSHigh, LHSLow, RHSHigh, RHSLow; + SplitInteger(LHS, LHSLow, LHSHigh); + SplitInteger(RHS, RHSLow, RHSHigh); + EVT HalfVT = LHSLow.getValueType() + , BitVT = N->getValueType(1); + SDVTList VTHalfMulO = DAG.getVTList(HalfVT, BitVT); + SDVTList VTFullAddO = DAG.getVTList(VT, BitVT); + + SDValue HalfZero = DAG.getConstant(0, dl, HalfVT); + SDValue Overflow = DAG.getNode(ISD::AND, dl, BitVT, + DAG.getSetCC(dl, BitVT, LHSHigh, HalfZero, ISD::SETNE), + DAG.getSetCC(dl, BitVT, RHSHigh, HalfZero, ISD::SETNE)); + + SDValue One = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, LHSHigh, RHSLow); + Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, One.getValue(1)); + SDValue OneInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero, + One.getValue(0)); + + SDValue Two = DAG.getNode(ISD::UMULO, dl, VTHalfMulO, RHSHigh, LHSLow); + Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Two.getValue(1)); + SDValue TwoInHigh = DAG.getNode(ISD::BUILD_PAIR, dl, VT, HalfZero, + Two.getValue(0)); + + // Cannot use `UMUL_LOHI` directly, because some 32-bit targets (ARM) do not + // know how to expand `i64,i64 = umul_lohi a, b` and abort (why isn’t this + // operation recursively legalized?). + // + // Many backends understand this pattern and will convert into LOHI + // themselves, if applicable. + SDValue Three = DAG.getNode(ISD::MUL, dl, VT, + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LHSLow), + DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RHSLow)); + SDValue Four = DAG.getNode(ISD::ADD, dl, VT, OneInHigh, TwoInHigh); + SDValue Five = DAG.getNode(ISD::UADDO, dl, VTFullAddO, Three, Four); + Overflow = DAG.getNode(ISD::OR, dl, BitVT, Overflow, Five.getValue(1)); + SplitInteger(Five, Lo, Hi); ReplaceValueWith(SDValue(N, 1), Overflow); return; } Index: llvm/trunk/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll +++ llvm/trunk/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu | FileCheck %s --check-prefixes=AARCH + +define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { +; AARCH-LABEL: muloti_test: +; AARCH: // %bb.0: // %start +; AARCH-NEXT: mul x8, x3, x0 +; AARCH-NEXT: umulh x9, x0, x2 +; AARCH-NEXT: madd x11, x1, x2, x8 +; AARCH-NEXT: add x8, x9, x11 +; AARCH-NEXT: cmp x8, x9 +; AARCH-NEXT: cset w9, lo +; AARCH-NEXT: cmp x11, #0 // =0 +; AARCH-NEXT: csel w9, wzr, w9, eq +; AARCH-NEXT: cmp x3, #0 // =0 +; AARCH-NEXT: umulh x10, x1, x2 +; AARCH-NEXT: cset w12, ne +; AARCH-NEXT: cmp x1, #0 // =0 +; AARCH-NEXT: umulh x11, x3, x0 +; AARCH-NEXT: cset w13, ne +; AARCH-NEXT: cmp xzr, x10 +; AARCH-NEXT: and w10, w13, w12 +; AARCH-NEXT: cset w12, ne +; AARCH-NEXT: cmp xzr, x11 +; AARCH-NEXT: orr w10, w10, w12 +; AARCH-NEXT: cset w11, ne +; AARCH-NEXT: orr w10, w10, w11 +; AARCH-NEXT: orr w9, w10, w9 +; AARCH-NEXT: mul x0, x0, x2 +; AARCH-NEXT: mov x1, x8 +; AARCH-NEXT: mov w2, w9 +; AARCH-NEXT: ret +start: + %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 + %1 = extractvalue { i128, i1 } %0, 0 + %2 = extractvalue { i128, i1 } %0, 1 + %3 = zext i1 %2 to i8 + %4 = insertvalue { i128, i8 } undef, i128 %1, 0 + %5 = insertvalue { i128, i8 } %4, i8 %3, 1 + ret { i128, i8 } %5 +} + +; Function Attrs: nounwind readnone speculatable +declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1 + +attributes #0 = { nounwind readnone uwtable } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { nounwind } Index: llvm/trunk/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll +++ llvm/trunk/test/CodeGen/ARM/umulo-128-legalisation-lowering.ll @@ -0,0 +1,210 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=armv6-unknown-linux-gnu | FileCheck %s --check-prefixes=ARMV6 +; RUN: llc < %s -mtriple=armv7-unknown-linux-gnu | FileCheck %s --check-prefixes=ARMV7 + +define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { +; ARMV6-LABEL: muloti_test: +; ARMV6: @ %bb.0: @ %start +; ARMV6-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; ARMV6-NEXT: sub sp, sp, #28 +; ARMV6-NEXT: mov r9, #0 +; ARMV6-NEXT: mov r11, r0 +; ARMV6-NEXT: ldr r7, [sp, #76] +; ARMV6-NEXT: mov r5, r3 +; ARMV6-NEXT: ldr r10, [sp, #72] +; ARMV6-NEXT: mov r1, r3 +; ARMV6-NEXT: mov r6, r2 +; ARMV6-NEXT: mov r0, r2 +; ARMV6-NEXT: mov r2, #0 +; ARMV6-NEXT: mov r3, #0 +; ARMV6-NEXT: str r9, [sp, #12] +; ARMV6-NEXT: str r9, [sp, #8] +; ARMV6-NEXT: str r7, [sp, #4] +; ARMV6-NEXT: str r10, [sp] +; ARMV6-NEXT: bl __multi3 +; ARMV6-NEXT: str r3, [sp, #20] @ 4-byte Spill +; ARMV6-NEXT: str r2, [sp, #16] @ 4-byte Spill +; ARMV6-NEXT: stm r11, {r0, r1} +; ARMV6-NEXT: ldr r0, [sp, #84] +; ARMV6-NEXT: ldr r3, [sp, #80] +; ARMV6-NEXT: ldr r8, [sp, #64] +; ARMV6-NEXT: umull r4, r0, r0, r6 +; ARMV6-NEXT: umull r2, r1, r5, r3 +; ARMV6-NEXT: add r2, r4, r2 +; ARMV6-NEXT: umull lr, r4, r3, r6 +; ARMV6-NEXT: umull r3, r6, r7, r8 +; ARMV6-NEXT: adds r12, r4, r2 +; ARMV6-NEXT: adc r2, r9, #0 +; ARMV6-NEXT: str r2, [sp, #24] @ 4-byte Spill +; ARMV6-NEXT: ldr r2, [sp, #68] +; ARMV6-NEXT: umull r4, r2, r2, r10 +; ARMV6-NEXT: add r3, r4, r3 +; ARMV6-NEXT: umull r4, r10, r8, r10 +; ARMV6-NEXT: adds r3, r10, r3 +; ARMV6-NEXT: adc r10, r9, #0 +; ARMV6-NEXT: adds r4, r4, lr +; ARMV6-NEXT: adc r12, r3, r12 +; ARMV6-NEXT: ldr r3, [sp, #16] @ 4-byte Reload +; ARMV6-NEXT: adds r4, r3, r4 +; ARMV6-NEXT: str r4, [r11, #8] +; ARMV6-NEXT: ldr r4, [sp, #20] @ 4-byte Reload +; ARMV6-NEXT: adcs r3, r4, r12 +; ARMV6-NEXT: str r3, [r11, #12] +; ARMV6-NEXT: ldr r3, [sp, #84] +; ARMV6-NEXT: adc r12, r9, #0 +; ARMV6-NEXT: cmp r5, #0 +; ARMV6-NEXT: movne r5, #1 +; ARMV6-NEXT: cmp r3, #0 +; ARMV6-NEXT: mov r4, r3 +; ARMV6-NEXT: movne r4, #1 +; ARMV6-NEXT: cmp r0, #0 +; ARMV6-NEXT: movne r0, #1 +; ARMV6-NEXT: cmp r1, #0 +; ARMV6-NEXT: and r5, r4, r5 +; ARMV6-NEXT: movne r1, #1 +; ARMV6-NEXT: orr r0, r5, r0 +; ARMV6-NEXT: ldr r5, [sp, #68] +; ARMV6-NEXT: orr r0, r0, r1 +; ARMV6-NEXT: ldr r1, [sp, #24] @ 4-byte Reload +; ARMV6-NEXT: cmp r7, #0 +; ARMV6-NEXT: orr r0, r0, r1 +; ARMV6-NEXT: movne r7, #1 +; ARMV6-NEXT: cmp r5, #0 +; ARMV6-NEXT: mov r1, r5 +; ARMV6-NEXT: movne r1, #1 +; ARMV6-NEXT: cmp r2, #0 +; ARMV6-NEXT: movne r2, #1 +; ARMV6-NEXT: and r1, r1, r7 +; ARMV6-NEXT: orr r1, r1, r2 +; ARMV6-NEXT: ldr r2, [sp, #80] +; ARMV6-NEXT: cmp r6, #0 +; ARMV6-NEXT: movne r6, #1 +; ARMV6-NEXT: orrs r2, r2, r3 +; ARMV6-NEXT: orr r1, r1, r6 +; ARMV6-NEXT: movne r2, #1 +; ARMV6-NEXT: orrs r7, r8, r5 +; ARMV6-NEXT: orr r1, r1, r10 +; ARMV6-NEXT: movne r7, #1 +; ARMV6-NEXT: and r2, r7, r2 +; ARMV6-NEXT: orr r1, r2, r1 +; ARMV6-NEXT: orr r0, r1, r0 +; ARMV6-NEXT: orr r0, r0, r12 +; ARMV6-NEXT: and r0, r0, #1 +; ARMV6-NEXT: strb r0, [r11, #16] +; ARMV6-NEXT: add sp, sp, #28 +; ARMV6-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; +; ARMV7-LABEL: muloti_test: +; ARMV7: @ %bb.0: @ %start +; ARMV7-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; ARMV7-NEXT: sub sp, sp, #44 +; ARMV7-NEXT: str r0, [sp, #40] @ 4-byte Spill +; ARMV7-NEXT: mov r0, #0 +; ARMV7-NEXT: ldr r8, [sp, #88] +; ARMV7-NEXT: mov r5, r3 +; ARMV7-NEXT: ldr r7, [sp, #92] +; ARMV7-NEXT: mov r1, r3 +; ARMV7-NEXT: mov r6, r2 +; ARMV7-NEXT: str r0, [sp, #8] +; ARMV7-NEXT: str r0, [sp, #12] +; ARMV7-NEXT: mov r0, r2 +; ARMV7-NEXT: mov r2, #0 +; ARMV7-NEXT: mov r3, #0 +; ARMV7-NEXT: str r8, [sp] +; ARMV7-NEXT: str r7, [sp, #4] +; ARMV7-NEXT: bl __multi3 +; ARMV7-NEXT: str r1, [sp, #28] @ 4-byte Spill +; ARMV7-NEXT: ldr r1, [sp, #80] +; ARMV7-NEXT: str r2, [sp, #24] @ 4-byte Spill +; ARMV7-NEXT: str r3, [sp, #20] @ 4-byte Spill +; ARMV7-NEXT: umull r2, r9, r7, r1 +; ARMV7-NEXT: str r0, [sp, #32] @ 4-byte Spill +; ARMV7-NEXT: ldr r4, [sp, #84] +; ARMV7-NEXT: ldr r0, [sp, #96] +; ARMV7-NEXT: umull r1, r3, r1, r8 +; ARMV7-NEXT: umull r12, r10, r4, r8 +; ARMV7-NEXT: str r1, [sp, #16] @ 4-byte Spill +; ARMV7-NEXT: umull lr, r1, r5, r0 +; ARMV7-NEXT: add r2, r12, r2 +; ARMV7-NEXT: umull r11, r8, r0, r6 +; ARMV7-NEXT: ldr r0, [sp, #100] +; ARMV7-NEXT: adds r2, r3, r2 +; ARMV7-NEXT: mov r12, #0 +; ARMV7-NEXT: umull r6, r0, r0, r6 +; ARMV7-NEXT: adc r3, r12, #0 +; ARMV7-NEXT: str r3, [sp, #36] @ 4-byte Spill +; ARMV7-NEXT: add r3, r6, lr +; ARMV7-NEXT: ldr r6, [sp, #16] @ 4-byte Reload +; ARMV7-NEXT: adds r3, r8, r3 +; ARMV7-NEXT: adc lr, r12, #0 +; ARMV7-NEXT: adds r6, r6, r11 +; ARMV7-NEXT: adc r2, r2, r3 +; ARMV7-NEXT: ldr r3, [sp, #24] @ 4-byte Reload +; ARMV7-NEXT: mov r12, #0 +; ARMV7-NEXT: adds r3, r3, r6 +; ARMV7-NEXT: ldr r6, [sp, #20] @ 4-byte Reload +; ARMV7-NEXT: adcs r8, r6, r2 +; ARMV7-NEXT: ldr r6, [sp, #40] @ 4-byte Reload +; ARMV7-NEXT: ldr r2, [sp, #32] @ 4-byte Reload +; ARMV7-NEXT: str r2, [r6] +; ARMV7-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; ARMV7-NEXT: stmib r6, {r2, r3, r8} +; ARMV7-NEXT: adc r8, r12, #0 +; ARMV7-NEXT: cmp r5, #0 +; ARMV7-NEXT: ldr r2, [sp, #100] +; ARMV7-NEXT: movwne r5, #1 +; ARMV7-NEXT: cmp r2, #0 +; ARMV7-NEXT: mov r3, r2 +; ARMV7-NEXT: movwne r3, #1 +; ARMV7-NEXT: cmp r0, #0 +; ARMV7-NEXT: movwne r0, #1 +; ARMV7-NEXT: cmp r1, #0 +; ARMV7-NEXT: and r3, r3, r5 +; ARMV7-NEXT: movwne r1, #1 +; ARMV7-NEXT: orr r0, r3, r0 +; ARMV7-NEXT: cmp r7, #0 +; ARMV7-NEXT: orr r0, r0, r1 +; ARMV7-NEXT: ldr r1, [sp, #80] +; ARMV7-NEXT: movwne r7, #1 +; ARMV7-NEXT: cmp r4, #0 +; ARMV7-NEXT: orr r1, r1, r4 +; ARMV7-NEXT: movwne r4, #1 +; ARMV7-NEXT: cmp r10, #0 +; ARMV7-NEXT: and r3, r4, r7 +; ARMV7-NEXT: movwne r10, #1 +; ARMV7-NEXT: cmp r9, #0 +; ARMV7-NEXT: orr r3, r3, r10 +; ARMV7-NEXT: ldr r7, [sp, #36] @ 4-byte Reload +; ARMV7-NEXT: movwne r9, #1 +; ARMV7-NEXT: orr r3, r3, r9 +; ARMV7-NEXT: orr r3, r3, r7 +; ARMV7-NEXT: ldr r7, [sp, #96] +; ARMV7-NEXT: orr r0, r0, lr +; ARMV7-NEXT: orrs r7, r7, r2 +; ARMV7-NEXT: movwne r7, #1 +; ARMV7-NEXT: cmp r1, #0 +; ARMV7-NEXT: movwne r1, #1 +; ARMV7-NEXT: and r1, r1, r7 +; ARMV7-NEXT: orr r1, r1, r3 +; ARMV7-NEXT: orr r0, r1, r0 +; ARMV7-NEXT: orr r0, r0, r8 +; ARMV7-NEXT: and r0, r0, #1 +; ARMV7-NEXT: strb r0, [r6, #16] +; ARMV7-NEXT: add sp, sp, #44 +; ARMV7-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +start: + %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 + %1 = extractvalue { i128, i1 } %0, 0 + %2 = extractvalue { i128, i1 } %0, 1 + %3 = zext i1 %2 to i8 + %4 = insertvalue { i128, i8 } undef, i128 %1, 0 + %5 = insertvalue { i128, i8 } %4, i8 %3, 1 + ret { i128, i8 } %5 +} + +; Function Attrs: nounwind readnone speculatable +declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1 + +attributes #0 = { nounwind readnone uwtable } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { nounwind } Index: llvm/trunk/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll +++ llvm/trunk/test/CodeGen/ARM/umulo-64-legalisation-lowering.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=armv6-unknown-linux-gnu | FileCheck %s --check-prefixes=ARMV6 +; RUN: llc < %s -mtriple=armv7-unknown-linux-gnu | FileCheck %s --check-prefixes=ARMV7 + +define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 { +; ARMV6-LABEL: mulodi_test: +; ARMV6: @ %bb.0: @ %start +; ARMV6-NEXT: push {r4, r5, r6, lr} +; ARMV6-NEXT: umull r12, lr, r3, r0 +; ARMV6-NEXT: mov r6, #0 +; ARMV6-NEXT: umull r4, r5, r1, r2 +; ARMV6-NEXT: umull r0, r2, r0, r2 +; ARMV6-NEXT: add r4, r4, r12 +; ARMV6-NEXT: adds r12, r2, r4 +; ARMV6-NEXT: adc r2, r6, #0 +; ARMV6-NEXT: cmp r3, #0 +; ARMV6-NEXT: movne r3, #1 +; ARMV6-NEXT: cmp r1, #0 +; ARMV6-NEXT: movne r1, #1 +; ARMV6-NEXT: cmp r5, #0 +; ARMV6-NEXT: and r1, r1, r3 +; ARMV6-NEXT: movne r5, #1 +; ARMV6-NEXT: cmp lr, #0 +; ARMV6-NEXT: orr r1, r1, r5 +; ARMV6-NEXT: movne lr, #1 +; ARMV6-NEXT: orr r1, r1, lr +; ARMV6-NEXT: orr r2, r1, r2 +; ARMV6-NEXT: mov r1, r12 +; ARMV6-NEXT: pop {r4, r5, r6, pc} +; +; ARMV7-LABEL: mulodi_test: +; ARMV7: @ %bb.0: @ %start +; ARMV7-NEXT: push {r4, r5, r11, lr} +; ARMV7-NEXT: umull r12, lr, r1, r2 +; ARMV7-NEXT: cmp r3, #0 +; ARMV7-NEXT: umull r4, r5, r3, r0 +; ARMV7-NEXT: movwne r3, #1 +; ARMV7-NEXT: cmp r1, #0 +; ARMV7-NEXT: movwne r1, #1 +; ARMV7-NEXT: umull r0, r2, r0, r2 +; ARMV7-NEXT: cmp lr, #0 +; ARMV7-NEXT: and r1, r1, r3 +; ARMV7-NEXT: movwne lr, #1 +; ARMV7-NEXT: cmp r5, #0 +; ARMV7-NEXT: orr r1, r1, lr +; ARMV7-NEXT: movwne r5, #1 +; ARMV7-NEXT: orr r3, r1, r5 +; ARMV7-NEXT: add r1, r12, r4 +; ARMV7-NEXT: mov r5, #0 +; ARMV7-NEXT: adds r1, r2, r1 +; ARMV7-NEXT: adc r2, r5, #0 +; ARMV7-NEXT: orr r2, r3, r2 +; ARMV7-NEXT: pop {r4, r5, r11, pc} +start: + %0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %l, i64 %r) #2 + %1 = extractvalue { i64, i1 } %0, 0 + %2 = extractvalue { i64, i1 } %0, 1 + %3 = zext i1 %2 to i8 + %4 = insertvalue { i64, i8 } undef, i64 %1, 0 + %5 = insertvalue { i64, i8 } %4, i8 %3, 1 + ret { i64, i8 } %5 +} + +; Function Attrs: nounwind readnone speculatable +declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #1 + +attributes #0 = { nounwind readnone uwtable } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { nounwind } Index: llvm/trunk/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll +++ llvm/trunk/test/CodeGen/PowerPC/umulo-128-legalisation-lowering.ll @@ -0,0 +1,177 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu | FileCheck %s --check-prefixes=PPC64 +; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s --check-prefixes=PPC32 + +define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { + +; PPC64-LABEL muloti_test: +; PPC64: mulld 8, 5, 4 +; PPC64-NEXT: cmpdi 5, 3, 0 +; PPC64-NEXT: mulhdu. 9, 3, 6 +; PPC64-NEXT: mulld 3, 3, 6 +; PPC64-NEXT: mcrf 1, 0 +; PPC64-NEXT: add 3, 3, 8 +; PPC64-NEXT: cmpdi 5, 0 +; PPC64-NEXT: crnor 20, 2, 22 +; PPC64-NEXT: cmpldi 3, 0 +; PPC64-NEXT: mulhdu 8, 4, 6 +; PPC64-NEXT: add 3, 8, 3 +; PPC64-NEXT: cmpld 6, 3, 8 +; PPC64-NEXT: crandc 21, 24, 2 +; PPC64-NEXT: crorc 20, 20, 6 +; PPC64-NEXT: li 7, 1 +; PPC64-NEXT: mulhdu. 5, 5, 4 +; PPC64-NEXT: crorc 20, 20, 2 +; PPC64-NEXT: crnor 20, 20, 21 +; PPC64-NEXT: mulld 4, 4, 6 +; PPC64-NEXT: bc 12, 20, .LBB0_2 +; PPC64: ori 5, 7, 0 +; PPC64-NEXT: blr +; PPC64-NEXT: .LBB0_2: +; PPC64-NEXT: addi 5, 0, 0 +; PPC64-NEXT: blr +; +; PPC32-LABEL muloti_test: +; PPC32: mflr 0 +; PPC32-NEXT: stw 0, 4(1) +; PPC32-NEXT: stwu 1, -80(1) +; PPC32-NEXT: .cfi_def_cfa_offset 80 +; PPC32-NEXT: .cfi_offset lr, 4 +; PPC32-NEXT: .cfi_offset r20, -48 +; PPC32-NEXT: .cfi_offset r21, -44 +; PPC32-NEXT: .cfi_offset r22, -40 +; PPC32-NEXT: .cfi_offset r23, -36 +; PPC32-NEXT: .cfi_offset r24, -32 +; PPC32-NEXT: .cfi_offset r25, -28 +; PPC32-NEXT: .cfi_offset r26, -24 +; PPC32-NEXT: .cfi_offset r27, -20 +; PPC32-NEXT: .cfi_offset r28, -16 +; PPC32-NEXT: .cfi_offset r29, -12 +; PPC32-NEXT: .cfi_offset r30, -8 +; PPC32-NEXT: stw 26, 56(1) +; PPC32-NEXT: stw 27, 60(1) +; PPC32-NEXT: stw 29, 68(1) +; PPC32-NEXT: stw 30, 72(1) +; PPC32-NEXT: mfcr 12 +; PPC32-NEXT: mr 30, 8 +; PPC32-NEXT: mr 29, 7 +; PPC32-NEXT: mr 27, 4 +; PPC32-NEXT: mr 26, 3 +; PPC32-NEXT: li 3, 0 +; PPC32-NEXT: li 4, 0 +; PPC32-NEXT: li 7, 0 +; PPC32-NEXT: li 8, 0 +; PPC32-NEXT: stw 20, 32(1) +; PPC32-NEXT: stw 21, 36(1) +; PPC32-NEXT: stw 22, 40(1) +; PPC32-NEXT: stw 23, 44(1) +; PPC32-NEXT: stw 24, 48(1) +; PPC32-NEXT: stw 25, 52(1) +; PPC32-NEXT: stw 28, 64(1) +; PPC32-NEXT: mr 25, 10 +; PPC32-NEXT: stw 12, 28(1) +; PPC32-NEXT: mr 28, 9 +; PPC32-NEXT: mr 23, 6 +; PPC32-NEXT: mr 24, 5 +; PPC32-NEXT: bl __multi3@PLT +; PPC32-NEXT: mr 7, 4 +; PPC32-NEXT: mullw 4, 24, 30 +; PPC32-NEXT: mullw 8, 29, 23 +; PPC32-NEXT: mullw 10, 28, 27 +; PPC32-NEXT: mullw 11, 26, 25 +; PPC32-NEXT: mulhwu 9, 30, 23 +; PPC32-NEXT: mulhwu 12, 27, 25 +; PPC32-NEXT: mullw 0, 30, 23 +; PPC32-NEXT: mullw 22, 27, 25 +; PPC32-NEXT: add 21, 8, 4 +; PPC32-NEXT: add 10, 11, 10 +; PPC32-NEXT: addc 4, 22, 0 +; PPC32-NEXT: add 11, 9, 21 +; PPC32-NEXT: add 0, 12, 10 +; PPC32-NEXT: adde 8, 0, 11 +; PPC32-NEXT: addc 4, 7, 4 +; PPC32-NEXT: adde 8, 3, 8 +; PPC32-NEXT: xor 22, 4, 7 +; PPC32-NEXT: xor 20, 8, 3 +; PPC32-NEXT: or. 22, 22, 20 +; PPC32-NEXT: mcrf 1, 0 +; PPC32-NEXT: cmpwi 29, 0 +; PPC32-NEXT: cmpwi 5, 24, 0 +; PPC32-NEXT: cmpwi 6, 26, 0 +; PPC32-NEXT: cmpwi 7, 28, 0 +; PPC32-NEXT: crnor 8, 22, 2 +; PPC32-NEXT: mulhwu. 23, 29, 23 +; PPC32-NEXT: crnor 9, 30, 26 +; PPC32-NEXT: mcrf 5, 0 +; PPC32-NEXT: cmplwi 21, 0 +; PPC32-NEXT: cmplw 6, 11, 9 +; PPC32-NEXT: cmplwi 7, 10, 0 +; PPC32-NEXT: crandc 10, 24, 2 +; PPC32-NEXT: cmplw 3, 0, 12 +; PPC32-NEXT: mulhwu. 9, 24, 30 +; PPC32-NEXT: mcrf 6, 0 +; PPC32-NEXT: crandc 11, 12, 30 +; PPC32-NEXT: cmplw 4, 7 +; PPC32-NEXT: cmplw 7, 8, 3 +; PPC32-NEXT: crand 12, 30, 0 +; PPC32-NEXT: crandc 13, 28, 30 +; PPC32-NEXT: mulhwu. 3, 26, 25 +; PPC32-NEXT: mcrf 7, 0 +; PPC32-NEXT: cror 0, 12, 13 +; PPC32-NEXT: crandc 12, 0, 6 +; PPC32-NEXT: crorc 20, 8, 22 +; PPC32-NEXT: crorc 20, 20, 26 +; PPC32-NEXT: mulhwu. 3, 28, 27 +; PPC32-NEXT: mcrf 1, 0 +; PPC32-NEXT: crorc 25, 9, 30 +; PPC32-NEXT: or. 3, 27, 26 +; PPC32-NEXT: cror 24, 20, 10 +; PPC32-NEXT: mcrf 5, 0 +; PPC32-NEXT: crorc 25, 25, 6 +; PPC32-NEXT: or. 3, 30, 29 +; PPC32-NEXT: cror 25, 25, 11 +; PPC32-NEXT: crnor 20, 2, 22 +; PPC32-NEXT: lwz 12, 28(1) +; PPC32-NEXT: cror 20, 20, 25 +; PPC32-NEXT: cror 20, 20, 24 +; PPC32-NEXT: crnor 20, 20, 12 +; PPC32-NEXT: li 3, 1 +; PPC32-NEXT: bc 12, 20, .LBB0_2 +; PPC32: ori 7, 3, 0 +; PPC32-NEXT: b .LBB0_3 +; PPC32-NEXT:.LBB0_2: +; PPC32-NEXT: addi 7, 0, 0 +; PPC32-NEXT:.LBB0_3: +; PPC32-NEXT: mr 3, 8 +; PPC32-NEXT: mtcrf 32, 12 +; PPC32-NEXT: mtcrf 16, 12 +; PPC32-NEXT: lwz 30, 72(1) +; PPC32-NEXT: lwz 29, 68(1) +; PPC32-NEXT: lwz 28, 64(1) +; PPC32-NEXT: lwz 27, 60(1) +; PPC32-NEXT: lwz 26, 56(1) +; PPC32-NEXT: lwz 25, 52(1) +; PPC32-NEXT: lwz 24, 48(1) +; PPC32-NEXT: lwz 23, 44(1) +; PPC32-NEXT: lwz 22, 40(1) +; PPC32-NEXT: lwz 21, 36(1) +; PPC32-NEXT: lwz 20, 32(1) +; PPC32-NEXT: lwz 0, 84(1) +; PPC32-NEXT: addi 1, 1, 80 +; PPC32-NEXT: mtlr 0 +; PPC32-NEXT: blr +start: + %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 + %1 = extractvalue { i128, i1 } %0, 0 + %2 = extractvalue { i128, i1 } %0, 1 + %3 = zext i1 %2 to i8 + %4 = insertvalue { i128, i8 } undef, i128 %1, 0 + %5 = insertvalue { i128, i8 } %4, i8 %3, 1 + ret { i128, i8 } %5 +} + +; Function Attrs: nounwind readnone speculatable +declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1 + +attributes #0 = { nounwind readnone uwtable } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { nounwind } Index: llvm/trunk/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll =================================================================== --- llvm/trunk/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll +++ llvm/trunk/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll @@ -0,0 +1,175 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefixes=RISCV32 +; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s --check-prefixes=RISCV64 + +define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { +; RISCV32-LABEL: muloti_test: +; RISCV32: # %bb.0: # %start +; RISCV32-NEXT: addi sp, sp, -80 +; RISCV32-NEXT: sw ra, 76(sp) +; RISCV32-NEXT: sw s1, 72(sp) +; RISCV32-NEXT: sw s2, 68(sp) +; RISCV32-NEXT: sw s3, 64(sp) +; RISCV32-NEXT: sw s4, 60(sp) +; RISCV32-NEXT: sw s5, 56(sp) +; RISCV32-NEXT: sw s6, 52(sp) +; RISCV32-NEXT: sw s7, 48(sp) +; RISCV32-NEXT: mv s3, a2 +; RISCV32-NEXT: mv s1, a1 +; RISCV32-NEXT: mv s2, a0 +; RISCV32-NEXT: sw zero, 12(sp) +; RISCV32-NEXT: sw zero, 8(sp) +; RISCV32-NEXT: sw zero, 28(sp) +; RISCV32-NEXT: sw zero, 24(sp) +; RISCV32-NEXT: lw s5, 4(a2) +; RISCV32-NEXT: sw s5, 4(sp) +; RISCV32-NEXT: lw s6, 0(a2) +; RISCV32-NEXT: sw s6, 0(sp) +; RISCV32-NEXT: lw s4, 4(a1) +; RISCV32-NEXT: sw s4, 20(sp) +; RISCV32-NEXT: lw s7, 0(a1) +; RISCV32-NEXT: sw s7, 16(sp) +; RISCV32-NEXT: addi a0, sp, 32 +; RISCV32-NEXT: addi a1, sp, 16 +; RISCV32-NEXT: mv a2, sp +; RISCV32-NEXT: call __multi3 +; RISCV32-NEXT: lw t1, 12(s1) +; RISCV32-NEXT: lw a1, 8(s1) +; RISCV32-NEXT: mul a0, s5, a1 +; RISCV32-NEXT: mul a2, t1, s6 +; RISCV32-NEXT: add a0, a2, a0 +; RISCV32-NEXT: lw t5, 12(s3) +; RISCV32-NEXT: lw a3, 8(s3) +; RISCV32-NEXT: mul a2, s4, a3 +; RISCV32-NEXT: mul a4, t5, s7 +; RISCV32-NEXT: add a2, a4, a2 +; RISCV32-NEXT: mul a4, a3, s7 +; RISCV32-NEXT: mul a5, a1, s6 +; RISCV32-NEXT: add s1, a5, a4 +; RISCV32-NEXT: sltu a4, s1, a5 +; RISCV32-NEXT: mulhu a6, a3, s7 +; RISCV32-NEXT: add a7, a6, a2 +; RISCV32-NEXT: mulhu t2, a1, s6 +; RISCV32-NEXT: add t4, t2, a0 +; RISCV32-NEXT: add a0, t4, a7 +; RISCV32-NEXT: add a0, a0, a4 +; RISCV32-NEXT: xor a2, s5, zero +; RISCV32-NEXT: snez a2, a2 +; RISCV32-NEXT: xor a4, t1, zero +; RISCV32-NEXT: snez a4, a4 +; RISCV32-NEXT: and a2, a4, a2 +; RISCV32-NEXT: xor a4, s4, zero +; RISCV32-NEXT: snez a4, a4 +; RISCV32-NEXT: xor a5, t5, zero +; RISCV32-NEXT: snez a5, a5 +; RISCV32-NEXT: and a4, a5, a4 +; RISCV32-NEXT: mulhu a5, t5, s7 +; RISCV32-NEXT: xor a5, a5, zero +; RISCV32-NEXT: snez a5, a5 +; RISCV32-NEXT: or t0, a4, a5 +; RISCV32-NEXT: mulhu a4, t1, s6 +; RISCV32-NEXT: xor a4, a4, zero +; RISCV32-NEXT: snez a4, a4 +; RISCV32-NEXT: or t3, a2, a4 +; RISCV32-NEXT: lw a4, 44(sp) +; RISCV32-NEXT: add a5, a4, a0 +; RISCV32-NEXT: lw a2, 40(sp) +; RISCV32-NEXT: add a0, a2, s1 +; RISCV32-NEXT: sltu t6, a0, a2 +; RISCV32-NEXT: add s1, a5, t6 +; RISCV32-NEXT: beq s1, a4, .LBB0_2 +; RISCV32-NEXT: # %bb.1: # %start +; RISCV32-NEXT: sltu t6, s1, a4 +; RISCV32-NEXT: .LBB0_2: # %start +; RISCV32-NEXT: xor a4, s1, a4 +; RISCV32-NEXT: xor a2, a0, a2 +; RISCV32-NEXT: or a2, a2, a4 +; RISCV32-NEXT: sltu t2, t4, t2 +; RISCV32-NEXT: mulhu a4, s5, a1 +; RISCV32-NEXT: xor a4, a4, zero +; RISCV32-NEXT: snez a4, a4 +; RISCV32-NEXT: or t3, t3, a4 +; RISCV32-NEXT: sltu a6, a7, a6 +; RISCV32-NEXT: mulhu a4, s4, a3 +; RISCV32-NEXT: xor a4, a4, zero +; RISCV32-NEXT: snez a4, a4 +; RISCV32-NEXT: or a4, t0, a4 +; RISCV32-NEXT: lw a5, 36(sp) +; RISCV32-NEXT: sw a5, 4(s2) +; RISCV32-NEXT: lw a5, 32(sp) +; RISCV32-NEXT: sw a5, 0(s2) +; RISCV32-NEXT: sw a0, 8(s2) +; RISCV32-NEXT: sw s1, 12(s2) +; RISCV32-NEXT: mv a0, zero +; RISCV32-NEXT: beqz a2, .LBB0_4 +; RISCV32-NEXT: # %bb.3: # %start +; RISCV32-NEXT: mv a0, t6 +; RISCV32-NEXT: .LBB0_4: # %start +; RISCV32-NEXT: or a2, a4, a6 +; RISCV32-NEXT: or a4, t3, t2 +; RISCV32-NEXT: or a3, a3, t5 +; RISCV32-NEXT: or a1, a1, t1 +; RISCV32-NEXT: xor a1, a1, zero +; RISCV32-NEXT: xor a3, a3, zero +; RISCV32-NEXT: snez a3, a3 +; RISCV32-NEXT: snez a1, a1 +; RISCV32-NEXT: and a1, a1, a3 +; RISCV32-NEXT: or a1, a1, a4 +; RISCV32-NEXT: or a1, a1, a2 +; RISCV32-NEXT: or a0, a1, a0 +; RISCV32-NEXT: andi a0, a0, 1 +; RISCV32-NEXT: sb a0, 16(s2) +; RISCV32-NEXT: lw s7, 48(sp) +; RISCV32-NEXT: lw s6, 52(sp) +; RISCV32-NEXT: lw s5, 56(sp) +; RISCV32-NEXT: lw s4, 60(sp) +; RISCV32-NEXT: lw s3, 64(sp) +; RISCV32-NEXT: lw s2, 68(sp) +; RISCV32-NEXT: lw s1, 72(sp) +; RISCV32-NEXT: lw ra, 76(sp) +; RISCV32-NEXT: addi sp, sp, 80 +; RISCV32-NEXT: ret +; +; RISCV64-LABEL: muloti_test: +; RISCV64: # %bb.0: # %start +; RISCV64-NEXT: mul a6, a4, a1 +; RISCV64-NEXT: mul a5, a2, a3 +; RISCV64-NEXT: add a6, a5, a6 +; RISCV64-NEXT: mul a5, a1, a3 +; RISCV64-NEXT: sw a5, 0(a0) +; RISCV64-NEXT: mulhu a7, a1, a3 +; RISCV64-NEXT: add a5, a7, a6 +; RISCV64-NEXT: sw a5, 8(a0) +; RISCV64-NEXT: sltu a6, a5, a7 +; RISCV64-NEXT: xor a5, a4, zero +; RISCV64-NEXT: snez a7, a5 +; RISCV64-NEXT: xor a5, a2, zero +; RISCV64-NEXT: snez a5, a5 +; RISCV64-NEXT: and a5, a5, a7 +; RISCV64-NEXT: mulhu a2, a2, a3 +; RISCV64-NEXT: xor a2, a2, zero +; RISCV64-NEXT: snez a2, a2 +; RISCV64-NEXT: or a2, a5, a2 +; RISCV64-NEXT: mulhu a1, a4, a1 +; RISCV64-NEXT: xor a1, a1, zero +; RISCV64-NEXT: snez a1, a1 +; RISCV64-NEXT: or a1, a2, a1 +; RISCV64-NEXT: or a1, a1, a6 +; RISCV64-NEXT: sb a1, 16(a0) +; RISCV64-NEXT: ret +start: + %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 + %1 = extractvalue { i128, i1 } %0, 0 + %2 = extractvalue { i128, i1 } %0, 1 + %3 = zext i1 %2 to i8 + %4 = insertvalue { i128, i8 } undef, i128 %1, 0 + %5 = insertvalue { i128, i8 } %4, i8 %3, 1 + ret { i128, i8 } %5 +} + +; Function Attrs: nounwind readnone speculatable +declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1 + +attributes #0 = { nounwind readnone uwtable } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { nounwind } Index: llvm/trunk/test/CodeGen/SPARC/umulo-128-legalisation-lowering.ll =================================================================== --- llvm/trunk/test/CodeGen/SPARC/umulo-128-legalisation-lowering.ll +++ llvm/trunk/test/CodeGen/SPARC/umulo-128-legalisation-lowering.ll @@ -0,0 +1,259 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=sparc-unknown-linux-gnu | FileCheck %s --check-prefixes=SPARC +; RUN: llc < %s -mtriple=sparc64-unknown-linux-gnu | FileCheck %s --check-prefixes=SPARC64 + +define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { +; SPARC-LABEL: muloti_test: +; SPARC: .cfi_startproc +; SPARC-NEXT: ! %bb.0: ! %start +; SPARC-NEXT: save %sp, -128, %sp +; SPARC-NEXT: .cfi_def_cfa_register %fp +; SPARC-NEXT: .cfi_window_save +; SPARC-NEXT: .cfi_register 15, 31 +; SPARC-NEXT: ld [%fp+92], %l3 +; SPARC-NEXT: ld [%fp+96], %g2 +; SPARC-NEXT: umul %i2, %i5, %g3 +; SPARC-NEXT: rd %y, %g4 +; SPARC-NEXT: st %g4, [%fp+-20] ! 4-byte Folded Spill +; SPARC-NEXT: umul %i4, %i3, %g4 +; SPARC-NEXT: rd %y, %l0 +; SPARC-NEXT: st %l0, [%fp+-24] ! 4-byte Folded Spill +; SPARC-NEXT: st %g2, [%sp+96] +; SPARC-NEXT: st %i5, [%fp+-8] ! 4-byte Folded Spill +; SPARC-NEXT: umul %i5, %i3, %l0 +; SPARC-NEXT: rd %y, %l5 +; SPARC-NEXT: st %l3, [%sp+92] +; SPARC-NEXT: umul %l3, %i1, %l4 +; SPARC-NEXT: rd %y, %i5 +; SPARC-NEXT: st %i5, [%fp+-12] ! 4-byte Folded Spill +; SPARC-NEXT: add %g4, %g3, %l2 +; SPARC-NEXT: mov %i0, %i5 +; SPARC-NEXT: umul %i0, %g2, %g3 +; SPARC-NEXT: rd %y, %i0 +; SPARC-NEXT: st %i0, [%fp+-16] ! 4-byte Folded Spill +; SPARC-NEXT: add %l5, %l2, %l1 +; SPARC-NEXT: st %i1, [%fp+-4] ! 4-byte Folded Spill +; SPARC-NEXT: umul %i1, %g2, %g2 +; SPARC-NEXT: rd %y, %l6 +; SPARC-NEXT: add %g3, %l4, %i0 +; SPARC-NEXT: add %l6, %i0, %l7 +; SPARC-NEXT: addcc %g2, %l0, %l4 +; SPARC-NEXT: mov %g0, %l0 +; SPARC-NEXT: addxcc %l7, %l1, %i1 +; SPARC-NEXT: mov %l0, %o0 +; SPARC-NEXT: mov %l0, %o1 +; SPARC-NEXT: mov %i2, %o2 +; SPARC-NEXT: mov %i3, %o3 +; SPARC-NEXT: mov %l0, %o4 +; SPARC-NEXT: call __multi3 +; SPARC-NEXT: mov %l0, %o5 +; SPARC-NEXT: addcc %o1, %l4, %i3 +; SPARC-NEXT: addxcc %o0, %i1, %g2 +; SPARC-NEXT: mov 1, %g3 +; SPARC-NEXT: cmp %g2, %o0 +; SPARC-NEXT: bcs .LBB0_2 +; SPARC-NEXT: mov %g3, %g4 +; SPARC-NEXT: ! %bb.1: ! %start +; SPARC-NEXT: mov %l0, %g4 +; SPARC-NEXT: .LBB0_2: ! %start +; SPARC-NEXT: cmp %i3, %o1 +; SPARC-NEXT: bcs .LBB0_4 +; SPARC-NEXT: mov %g3, %o4 +; SPARC-NEXT: ! %bb.3: ! %start +; SPARC-NEXT: mov %l0, %o4 +; SPARC-NEXT: .LBB0_4: ! %start +; SPARC-NEXT: cmp %g2, %o0 +; SPARC-NEXT: be .LBB0_6 +; SPARC-NEXT: nop +; SPARC-NEXT: ! %bb.5: ! %start +; SPARC-NEXT: mov %g4, %o4 +; SPARC-NEXT: .LBB0_6: ! %start +; SPARC-NEXT: xor %g2, %o0, %i1 +; SPARC-NEXT: xor %i3, %o1, %g4 +; SPARC-NEXT: or %g4, %i1, %i1 +; SPARC-NEXT: cmp %i1, 0 +; SPARC-NEXT: be .LBB0_8 +; SPARC-NEXT: mov %l0, %g4 +; SPARC-NEXT: ! %bb.7: ! %start +; SPARC-NEXT: mov %o4, %g4 +; SPARC-NEXT: .LBB0_8: ! %start +; SPARC-NEXT: cmp %l1, %l5 +; SPARC-NEXT: mov %g3, %l1 +; SPARC-NEXT: bcs .LBB0_10 +; SPARC-NEXT: mov %i5, %i1 +; SPARC-NEXT: ! %bb.9: ! %start +; SPARC-NEXT: mov %l0, %l1 +; SPARC-NEXT: .LBB0_10: ! %start +; SPARC-NEXT: cmp %l2, 0 +; SPARC-NEXT: be .LBB0_12 +; SPARC-NEXT: mov %l0, %o0 +; SPARC-NEXT: ! %bb.11: ! %start +; SPARC-NEXT: mov %l1, %o0 +; SPARC-NEXT: .LBB0_12: ! %start +; SPARC-NEXT: cmp %i2, 0 +; SPARC-NEXT: bne .LBB0_14 +; SPARC-NEXT: mov %g3, %i2 +; SPARC-NEXT: ! %bb.13: ! %start +; SPARC-NEXT: mov %l0, %i2 +; SPARC-NEXT: .LBB0_14: ! %start +; SPARC-NEXT: cmp %i4, 0 +; SPARC-NEXT: bne .LBB0_16 +; SPARC-NEXT: mov %g3, %o1 +; SPARC-NEXT: ! %bb.15: ! %start +; SPARC-NEXT: mov %l0, %o1 +; SPARC-NEXT: .LBB0_16: ! %start +; SPARC-NEXT: ld [%fp+-24], %i5 ! 4-byte Folded Reload +; SPARC-NEXT: cmp %i5, 0 +; SPARC-NEXT: bne .LBB0_18 +; SPARC-NEXT: mov %g3, %l5 +; SPARC-NEXT: ! %bb.17: ! %start +; SPARC-NEXT: mov %l0, %l5 +; SPARC-NEXT: .LBB0_18: ! %start +; SPARC-NEXT: ld [%fp+-20], %i5 ! 4-byte Folded Reload +; SPARC-NEXT: cmp %i5, 0 +; SPARC-NEXT: bne .LBB0_20 +; SPARC-NEXT: mov %g3, %l1 +; SPARC-NEXT: ! %bb.19: ! %start +; SPARC-NEXT: mov %l0, %l1 +; SPARC-NEXT: .LBB0_20: ! %start +; SPARC-NEXT: cmp %l7, %l6 +; SPARC-NEXT: bcs .LBB0_22 +; SPARC-NEXT: mov %g3, %l6 +; SPARC-NEXT: ! %bb.21: ! %start +; SPARC-NEXT: mov %l0, %l6 +; SPARC-NEXT: .LBB0_22: ! %start +; SPARC-NEXT: cmp %i0, 0 +; SPARC-NEXT: be .LBB0_24 +; SPARC-NEXT: mov %l0, %l2 +; SPARC-NEXT: ! %bb.23: ! %start +; SPARC-NEXT: mov %l6, %l2 +; SPARC-NEXT: .LBB0_24: ! %start +; SPARC-NEXT: cmp %l3, 0 +; SPARC-NEXT: bne .LBB0_26 +; SPARC-NEXT: mov %g3, %l3 +; SPARC-NEXT: ! %bb.25: ! %start +; SPARC-NEXT: mov %l0, %l3 +; SPARC-NEXT: .LBB0_26: ! %start +; SPARC-NEXT: cmp %i1, 0 +; SPARC-NEXT: bne .LBB0_28 +; SPARC-NEXT: mov %g3, %l4 +; SPARC-NEXT: ! %bb.27: ! %start +; SPARC-NEXT: mov %l0, %l4 +; SPARC-NEXT: .LBB0_28: ! %start +; SPARC-NEXT: and %o1, %i2, %i2 +; SPARC-NEXT: ld [%fp+-16], %i0 ! 4-byte Folded Reload +; SPARC-NEXT: cmp %i0, 0 +; SPARC-NEXT: and %l4, %l3, %l4 +; SPARC-NEXT: bne .LBB0_30 +; SPARC-NEXT: mov %g3, %l6 +; SPARC-NEXT: ! %bb.29: ! %start +; SPARC-NEXT: mov %l0, %l6 +; SPARC-NEXT: .LBB0_30: ! %start +; SPARC-NEXT: or %i2, %l5, %l3 +; SPARC-NEXT: ld [%fp+-12], %i0 ! 4-byte Folded Reload +; SPARC-NEXT: cmp %i0, 0 +; SPARC-NEXT: or %l4, %l6, %i2 +; SPARC-NEXT: bne .LBB0_32 +; SPARC-NEXT: mov %g3, %l4 +; SPARC-NEXT: ! %bb.31: ! %start +; SPARC-NEXT: mov %l0, %l4 +; SPARC-NEXT: .LBB0_32: ! %start +; SPARC-NEXT: or %l3, %l1, %l1 +; SPARC-NEXT: ld [%fp+-8], %i0 ! 4-byte Folded Reload +; SPARC-NEXT: or %i0, %i4, %i0 +; SPARC-NEXT: cmp %i0, 0 +; SPARC-NEXT: or %i2, %l4, %i5 +; SPARC-NEXT: bne .LBB0_34 +; SPARC-NEXT: mov %g3, %i2 +; SPARC-NEXT: ! %bb.33: ! %start +; SPARC-NEXT: mov %l0, %i2 +; SPARC-NEXT: .LBB0_34: ! %start +; SPARC-NEXT: or %l1, %o0, %i4 +; SPARC-NEXT: ld [%fp+-4], %i0 ! 4-byte Folded Reload +; SPARC-NEXT: or %i0, %i1, %i0 +; SPARC-NEXT: cmp %i0, 0 +; SPARC-NEXT: bne .LBB0_36 +; SPARC-NEXT: or %i5, %l2, %i0 +; SPARC-NEXT: ! %bb.35: ! %start +; SPARC-NEXT: mov %l0, %g3 +; SPARC-NEXT: .LBB0_36: ! %start +; SPARC-NEXT: and %g3, %i2, %i1 +; SPARC-NEXT: or %i1, %i0, %i0 +; SPARC-NEXT: or %i0, %i4, %i0 +; SPARC-NEXT: or %i0, %g4, %i0 +; SPARC-NEXT: and %i0, 1, %i4 +; SPARC-NEXT: mov %g2, %i0 +; SPARC-NEXT: mov %i3, %i1 +; SPARC-NEXT: mov %o2, %i2 +; SPARC-NEXT: ret +; SPARC-NEXT: restore %g0, %o3, %o3 +; +; SPARC64-LABEL: muloti_test: +; SPARC64: .cfi_startproc +; SPARC64-NEXT: .register %g2, #scratch +; SPARC64-NEXT: ! %bb.0: ! %start +; SPARC64-NEXT: save %sp, -176, %sp +; SPARC64-NEXT: .cfi_def_cfa_register %fp +; SPARC64-NEXT: .cfi_window_save +; SPARC64-NEXT: .cfi_register 15, 31 +; SPARC64-NEXT: srax %i2, 63, %o0 +; SPARC64-NEXT: srax %i1, 63, %o2 +; SPARC64-NEXT: mov %i2, %o1 +; SPARC64-NEXT: call __multi3 +; SPARC64-NEXT: mov %i1, %o3 +; SPARC64-NEXT: mov %o0, %i4 +; SPARC64-NEXT: mov %o1, %i5 +; SPARC64-NEXT: srax %i0, 63, %o0 +; SPARC64-NEXT: srax %i3, 63, %o2 +; SPARC64-NEXT: mov %i0, %o1 +; SPARC64-NEXT: call __multi3 +; SPARC64-NEXT: mov %i3, %o3 +; SPARC64-NEXT: mov %o0, %l0 +; SPARC64-NEXT: add %o1, %i5, %i5 +; SPARC64-NEXT: mov 0, %o0 +; SPARC64-NEXT: mov %i1, %o1 +; SPARC64-NEXT: mov %o0, %o2 +; SPARC64-NEXT: call __multi3 +; SPARC64-NEXT: mov %i3, %o3 +; SPARC64-NEXT: add %o0, %i5, %i1 +; SPARC64-NEXT: mov %g0, %i3 +; SPARC64-NEXT: cmp %i1, %o0 +; SPARC64-NEXT: mov %i3, %g2 +; SPARC64-NEXT: movcs %xcc, 1, %g2 +; SPARC64-NEXT: cmp %i5, 0 +; SPARC64-NEXT: move %xcc, 0, %g2 +; SPARC64-NEXT: cmp %i4, 0 +; SPARC64-NEXT: mov %i3, %i4 +; SPARC64-NEXT: movne %xcc, 1, %i4 +; SPARC64-NEXT: cmp %l0, 0 +; SPARC64-NEXT: mov %i3, %i5 +; SPARC64-NEXT: movne %xcc, 1, %i5 +; SPARC64-NEXT: cmp %i2, 0 +; SPARC64-NEXT: mov %i3, %i2 +; SPARC64-NEXT: movne %xcc, 1, %i2 +; SPARC64-NEXT: cmp %i0, 0 +; SPARC64-NEXT: movne %xcc, 1, %i3 +; SPARC64-NEXT: and %i3, %i2, %i0 +; SPARC64-NEXT: or %i0, %i5, %i0 +; SPARC64-NEXT: or %i0, %i4, %i0 +; SPARC64-NEXT: or %i0, %g2, %i0 +; SPARC64-NEXT: srl %i0, 0, %i2 +; SPARC64-NEXT: mov %i1, %i0 +; SPARC64-NEXT: ret +; SPARC64-NEXT: restore %g0, %o1, %o1 +start: + %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 + %1 = extractvalue { i128, i1 } %0, 0 + %2 = extractvalue { i128, i1 } %0, 1 + %3 = zext i1 %2 to i8 + %4 = insertvalue { i128, i8 } undef, i128 %1, 0 + %5 = insertvalue { i128, i8 } %4, i8 %3, 1 + ret { i128, i8 } %5 +} + +; Function Attrs: nounwind readnone speculatable +declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1 + +attributes #0 = { nounwind readnone uwtable } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { nounwind } Index: llvm/trunk/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll +++ llvm/trunk/test/CodeGen/Thumb/umulo-128-legalisation-lowering.ll @@ -0,0 +1,183 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=thumb-eabi -mattr=+v6 | FileCheck %s --check-prefixes=THUMBV6 + +define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { +; THUMBV6-LABEL: muloti_test: +; THUMBV6: push {r4, r5, r6, r7, lr} +; THUMBV6: sub sp, #84 +; THUMBV6-NEXT: mov r6, r3 +; THUMBV6-NEXT: mov r7, r2 +; THUMBV6-NEXT: mov r4, r0 +; THUMBV6-NEXT: movs r5, #0 +; THUMBV6-NEXT: mov r0, sp +; THUMBV6-NEXT: str r5, [r0, #12] +; THUMBV6-NEXT: str r5, [r0, #8] +; THUMBV6-NEXT: ldr r1, [sp, #116] +; THUMBV6-NEXT: str r1, [sp, #68] @ 4-byte Spill +; THUMBV6-NEXT: str r1, [r0, #4] +; THUMBV6-NEXT: ldr r1, [sp, #112] +; THUMBV6-NEXT: str r1, [sp, #32] @ 4-byte Spill +; THUMBV6-NEXT: str r1, [r0] +; THUMBV6-NEXT: mov r0, r2 +; THUMBV6-NEXT: mov r1, r3 +; THUMBV6-NEXT: mov r2, r5 +; THUMBV6-NEXT: mov r3, r5 +; THUMBV6-NEXT: bl __multi3 +; THUMBV6-NEXT: str r2, [sp, #40] @ 4-byte Spill +; THUMBV6-NEXT: str r3, [sp, #44] @ 4-byte Spill +; THUMBV6-NEXT: str r4, [sp, #72] @ 4-byte Spill +; THUMBV6-NEXT: stm r4!, {r0, r1} +; THUMBV6-NEXT: ldr r4, [sp, #120] +; THUMBV6-NEXT: str r6, [sp, #60] @ 4-byte Spill +; THUMBV6-NEXT: mov r0, r6 +; THUMBV6-NEXT: mov r1, r5 +; THUMBV6-NEXT: mov r2, r4 +; THUMBV6-NEXT: mov r3, r5 +; THUMBV6-NEXT: bl __aeabi_lmul +; THUMBV6-NEXT: mov r6, r0 +; THUMBV6-NEXT: str r1, [sp, #52] @ 4-byte Spill +; THUMBV6-NEXT: ldr r0, [sp, #124] +; THUMBV6-NEXT: str r0, [sp, #80] @ 4-byte Spill +; THUMBV6-NEXT: mov r1, r5 +; THUMBV6-NEXT: mov r2, r7 +; THUMBV6-NEXT: mov r3, r5 +; THUMBV6-NEXT: bl __aeabi_lmul +; THUMBV6-NEXT: str r1, [sp, #28] @ 4-byte Spill +; THUMBV6-NEXT: adds r6, r0, r6 +; THUMBV6-NEXT: str r4, [sp, #64] @ 4-byte Spill +; THUMBV6-NEXT: mov r0, r4 +; THUMBV6-NEXT: mov r1, r5 +; THUMBV6-NEXT: mov r2, r7 +; THUMBV6-NEXT: mov r3, r5 +; THUMBV6-NEXT: bl __aeabi_lmul +; THUMBV6-NEXT: str r0, [sp, #24] @ 4-byte Spill +; THUMBV6-NEXT: adds r0, r1, r6 +; THUMBV6-NEXT: str r0, [sp, #20] @ 4-byte Spill +; THUMBV6-NEXT: mov r0, r5 +; THUMBV6-NEXT: adcs r0, r5 +; THUMBV6-NEXT: str r0, [sp, #48] @ 4-byte Spill +; THUMBV6-NEXT: ldr r7, [sp, #104] +; THUMBV6-NEXT: ldr r0, [sp, #68] @ 4-byte Reload +; THUMBV6-NEXT: mov r1, r5 +; THUMBV6-NEXT: mov r2, r7 +; THUMBV6-NEXT: mov r3, r5 +; THUMBV6-NEXT: bl __aeabi_lmul +; THUMBV6-NEXT: mov r6, r0 +; THUMBV6-NEXT: str r1, [sp, #56] @ 4-byte Spill +; THUMBV6-NEXT: ldr r0, [sp, #108] +; THUMBV6-NEXT: str r0, [sp, #76] @ 4-byte Spill +; THUMBV6-NEXT: mov r1, r5 +; THUMBV6-NEXT: ldr r4, [sp, #32] @ 4-byte Reload +; THUMBV6-NEXT: mov r2, r4 +; THUMBV6-NEXT: mov r3, r5 +; THUMBV6-NEXT: bl __aeabi_lmul +; THUMBV6-NEXT: str r1, [sp, #36] @ 4-byte Spill +; THUMBV6-NEXT: adds r6, r0, r6 +; THUMBV6-NEXT: mov r0, r7 +; THUMBV6-NEXT: mov r1, r5 +; THUMBV6-NEXT: mov r2, r4 +; THUMBV6-NEXT: mov r3, r5 +; THUMBV6-NEXT: bl __aeabi_lmul +; THUMBV6-NEXT: adds r2, r1, r6 +; THUMBV6-NEXT: mov r1, r5 +; THUMBV6-NEXT: adcs r1, r5 +; THUMBV6-NEXT: ldr r3, [sp, #24] @ 4-byte Reload +; THUMBV6-NEXT: adds r0, r0, r3 +; THUMBV6-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; THUMBV6-NEXT: adcs r2, r3 +; THUMBV6-NEXT: ldr r3, [sp, #40] @ 4-byte Reload +; THUMBV6-NEXT: adds r0, r3, r0 +; THUMBV6-NEXT: ldr r3, [sp, #72] @ 4-byte Reload +; THUMBV6-NEXT: str r0, [r3, #8] +; THUMBV6-NEXT: ldr r0, [sp, #44] @ 4-byte Reload +; THUMBV6-NEXT: adcs r2, r0 +; THUMBV6-NEXT: str r2, [r3, #12] +; THUMBV6-NEXT: ldr r2, [sp, #28] @ 4-byte Reload +; THUMBV6-NEXT: adcs r5, r5 +; THUMBV6-NEXT: movs r0, #1 +; THUMBV6-NEXT: cmp r2, #0 +; THUMBV6-NEXT: mov r3, r0 +; THUMBV6-NEXT: bne .LBB0_2 +; THUMBV6: mov r3, r2 +; THUMBV6: ldr r2, [sp, #60] @ 4-byte Reload +; THUMBV6-NEXT: cmp r2, #0 +; THUMBV6-NEXT: mov r4, r0 +; THUMBV6-NEXT: bne .LBB0_4 +; THUMBV6: mov r4, r2 +; THUMBV6: ldr r2, [sp, #80] @ 4-byte Reload +; THUMBV6-NEXT: cmp r2, #0 +; THUMBV6-NEXT: mov r2, r0 +; THUMBV6-NEXT: bne .LBB0_6 +; THUMBV6: ldr r2, [sp, #80] @ 4-byte Reload +; THUMBV6: ands r2, r4 +; THUMBV6-NEXT: orrs r2, r3 +; THUMBV6-NEXT: ldr r4, [sp, #52] @ 4-byte Reload +; THUMBV6-NEXT: cmp r4, #0 +; THUMBV6-NEXT: mov r3, r0 +; THUMBV6-NEXT: bne .LBB0_8 +; THUMBV6: mov r3, r4 +; THUMBV6: orrs r2, r3 +; THUMBV6-NEXT: ldr r3, [sp, #48] @ 4-byte Reload +; THUMBV6-NEXT: orrs r2, r3 +; THUMBV6-NEXT: ldr r3, [sp, #36] @ 4-byte Reload +; THUMBV6-NEXT: cmp r3, #0 +; THUMBV6-NEXT: mov r4, r0 +; THUMBV6-NEXT: bne .LBB0_10 +; THUMBV6: mov r4, r3 +; THUMBV6: ldr r3, [sp, #68] @ 4-byte Reload +; THUMBV6-NEXT: cmp r3, #0 +; THUMBV6-NEXT: mov r6, r0 +; THUMBV6-NEXT: bne .LBB0_12 +; THUMBV6: mov r6, r3 +; THUMBV6: ldr r3, [sp, #76] @ 4-byte Reload +; THUMBV6-NEXT: cmp r3, #0 +; THUMBV6-NEXT: mov r3, r0 +; THUMBV6-NEXT: bne .LBB0_14 +; THUMBV6: ldr r3, [sp, #76] @ 4-byte Reload +; THUMBV6: ands r3, r6 +; THUMBV6-NEXT: orrs r3, r4 +; THUMBV6-NEXT: ldr r6, [sp, #56] @ 4-byte Reload +; THUMBV6-NEXT: cmp r6, #0 +; THUMBV6-NEXT: mov r4, r0 +; THUMBV6-NEXT: bne .LBB0_16 +; THUMBV6: mov r4, r6 +; THUMBV6: orrs r3, r4 +; THUMBV6-NEXT: orrs r3, r1 +; THUMBV6-NEXT: ldr r4, [sp, #64] @ 4-byte Reload +; THUMBV6-NEXT: ldr r1, [sp, #80] @ 4-byte Reload +; THUMBV6-NEXT: orrs r4, r1 +; THUMBV6-NEXT: cmp r4, #0 +; THUMBV6-NEXT: mov r1, r0 +; THUMBV6-NEXT: bne .LBB0_18 +; THUMBV6: mov r1, r4 +; THUMBV6: ldr r4, [sp, #76] @ 4-byte Reload +; THUMBV6-NEXT: orrs r7, r4 +; THUMBV6-NEXT: cmp r7, #0 +; THUMBV6-NEXT: mov r4, r0 +; THUMBV6-NEXT: bne .LBB0_20 +; THUMBV6: mov r4, r7 +; THUMBV6: ands r4, r1 +; THUMBV6-NEXT: orrs r4, r3 +; THUMBV6-NEXT: orrs r4, r2 +; THUMBV6-NEXT: orrs r4, r5 +; THUMBV6-NEXT: ands r4, r0 +; THUMBV6-NEXT: ldr r0, [sp, #72] @ 4-byte Reload +; THUMBV6-NEXT: strb r4, [r0, #16] +; THUMBV6-NEXT: add sp, #84 +; THUMBV6-NEXT: pop {r4, r5, r6, r7, pc} +start: + %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 + %1 = extractvalue { i128, i1 } %0, 0 + %2 = extractvalue { i128, i1 } %0, 1 + %3 = zext i1 %2 to i8 + %4 = insertvalue { i128, i8 } undef, i128 %1, 0 + %5 = insertvalue { i128, i8 } %4, i8 %3, 1 + ret { i128, i8 } %5 +} + +; Function Attrs: nounwind readnone speculatable +declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1 + +attributes #0 = { nounwind readnone uwtable } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { nounwind } Index: llvm/trunk/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll +++ llvm/trunk/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=thumbv7-unknown-none-gnueabi | FileCheck %s --check-prefixes=THUMBV7 + +define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { +; THUMBV7-LABEL: muloti_test: +; THUMBV7: @ %bb.0: @ %start +; THUMBV7-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; THUMBV7-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} +; THUMBV7-NEXT: .pad #44 +; THUMBV7-NEXT: sub sp, #44 +; THUMBV7-NEXT: str r0, [sp, #40] @ 4-byte Spill +; THUMBV7-NEXT: movs r0, #0 +; THUMBV7-NEXT: ldrd r4, r7, [sp, #88] +; THUMBV7-NEXT: mov r5, r3 +; THUMBV7-NEXT: strd r4, r7, [sp] +; THUMBV7-NEXT: mov r1, r3 +; THUMBV7-NEXT: strd r0, r0, [sp, #8] +; THUMBV7-NEXT: mov r6, r2 +; THUMBV7-NEXT: mov r0, r2 +; THUMBV7-NEXT: movs r2, #0 +; THUMBV7-NEXT: movs r3, #0 +; THUMBV7-NEXT: bl __multi3 +; THUMBV7-NEXT: strd r1, r0, [sp, #32] +; THUMBV7-NEXT: strd r3, r2, [sp, #24] +; THUMBV7-NEXT: ldrd r2, r0, [sp, #96] +; THUMBV7-NEXT: ldr.w r9, [sp, #80] +; THUMBV7-NEXT: umull lr, r0, r0, r6 +; THUMBV7-NEXT: ldr.w r11, [sp, #84] +; THUMBV7-NEXT: umull r3, r1, r5, r2 +; THUMBV7-NEXT: umull r2, r12, r2, r6 +; THUMBV7-NEXT: add r3, lr +; THUMBV7-NEXT: umull r8, r10, r7, r9 +; THUMBV7-NEXT: str r2, [sp, #20] @ 4-byte Spill +; THUMBV7-NEXT: adds.w lr, r12, r3 +; THUMBV7-NEXT: umull r6, r9, r9, r4 +; THUMBV7-NEXT: mov.w r3, #0 +; THUMBV7-NEXT: adc r12, r3, #0 +; THUMBV7-NEXT: umull r2, r4, r11, r4 +; THUMBV7-NEXT: add r2, r8 +; THUMBV7-NEXT: mov.w r8, #0 +; THUMBV7-NEXT: adds.w r2, r2, r9 +; THUMBV7-NEXT: adc r9, r3, #0 +; THUMBV7-NEXT: ldr r3, [sp, #20] @ 4-byte Reload +; THUMBV7-NEXT: adds r3, r3, r6 +; THUMBV7-NEXT: ldr r6, [sp, #28] @ 4-byte Reload +; THUMBV7-NEXT: adc.w r2, r2, lr +; THUMBV7-NEXT: adds r3, r3, r6 +; THUMBV7-NEXT: ldr r6, [sp, #24] @ 4-byte Reload +; THUMBV7-NEXT: adcs r2, r6 +; THUMBV7-NEXT: ldrd r6, lr, [sp, #36] +; THUMBV7-NEXT: str.w r6, [lr] +; THUMBV7-NEXT: adc r8, r8, #0 +; THUMBV7-NEXT: ldr r6, [sp, #32] @ 4-byte Reload +; THUMBV7-NEXT: cmp r5, #0 +; THUMBV7-NEXT: strd r6, r3, [lr, #4] +; THUMBV7-NEXT: str.w r2, [lr, #12] +; THUMBV7-NEXT: it ne +; THUMBV7-NEXT: movne r5, #1 +; THUMBV7-NEXT: ldr r2, [sp, #100] +; THUMBV7-NEXT: cmp r2, #0 +; THUMBV7-NEXT: mov r3, r2 +; THUMBV7-NEXT: it ne +; THUMBV7-NEXT: movne r3, #1 +; THUMBV7-NEXT: cmp r0, #0 +; THUMBV7-NEXT: it ne +; THUMBV7-NEXT: movne r0, #1 +; THUMBV7-NEXT: cmp r1, #0 +; THUMBV7-NEXT: and.w r3, r3, r5 +; THUMBV7-NEXT: it ne +; THUMBV7-NEXT: movne r1, #1 +; THUMBV7-NEXT: orrs r0, r3 +; THUMBV7-NEXT: cmp r7, #0 +; THUMBV7-NEXT: orr.w r0, r0, r1 +; THUMBV7-NEXT: it ne +; THUMBV7-NEXT: movne r7, #1 +; THUMBV7-NEXT: cmp.w r11, #0 +; THUMBV7-NEXT: mov r1, r11 +; THUMBV7-NEXT: it ne +; THUMBV7-NEXT: movne r1, #1 +; THUMBV7-NEXT: cmp r4, #0 +; THUMBV7-NEXT: ldr r3, [sp, #96] +; THUMBV7-NEXT: it ne +; THUMBV7-NEXT: movne r4, #1 +; THUMBV7-NEXT: cmp.w r10, #0 +; THUMBV7-NEXT: and.w r1, r1, r7 +; THUMBV7-NEXT: it ne +; THUMBV7-NEXT: movne.w r10, #1 +; THUMBV7-NEXT: orrs r3, r2 +; THUMBV7-NEXT: ldr r2, [sp, #80] +; THUMBV7-NEXT: orr.w r1, r1, r4 +; THUMBV7-NEXT: it ne +; THUMBV7-NEXT: movne r3, #1 +; THUMBV7-NEXT: orr.w r1, r1, r10 +; THUMBV7-NEXT: orrs.w r7, r2, r11 +; THUMBV7-NEXT: orr.w r1, r1, r9 +; THUMBV7-NEXT: it ne +; THUMBV7-NEXT: movne r7, #1 +; THUMBV7-NEXT: orr.w r0, r0, r12 +; THUMBV7-NEXT: ands r3, r7 +; THUMBV7-NEXT: orrs r1, r3 +; THUMBV7-NEXT: orrs r0, r1 +; THUMBV7-NEXT: orr.w r0, r0, r8 +; THUMBV7-NEXT: and r0, r0, #1 +; THUMBV7-NEXT: strb.w r0, [lr, #16] +; THUMBV7-NEXT: add sp, #44 +; THUMBV7-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +start: + %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 + %1 = extractvalue { i128, i1 } %0, 0 + %2 = extractvalue { i128, i1 } %0, 1 + %3 = zext i1 %2 to i8 + %4 = insertvalue { i128, i8 } undef, i128 %1, 0 + %5 = insertvalue { i128, i8 } %4, i8 %3, 1 + ret { i128, i8 } %5 +} + +; Function Attrs: nounwind readnone speculatable +declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1 + +attributes #0 = { nounwind readnone uwtable } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { nounwind } Index: llvm/trunk/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll +++ llvm/trunk/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=thumbv7-unknown-none-gnueabi | FileCheck %s --check-prefixes=THUMBV7 + +define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 { +; THUMBV7-LABEL: mulodi_test: +; THUMBV7: @ %bb.0: @ %start +; THUMBV7-NEXT: .save {r4, r5, r6, lr} +; THUMBV7-NEXT: push {r4, r5, r6, lr} +; THUMBV7-NEXT: umull r12, lr, r3, r0 +; THUMBV7-NEXT: movs r6, #0 +; THUMBV7-NEXT: umull r4, r5, r1, r2 +; THUMBV7-NEXT: umull r0, r2, r0, r2 +; THUMBV7-NEXT: add r4, r12 +; THUMBV7-NEXT: adds.w r12, r2, r4 +; THUMBV7-NEXT: adc r2, r6, #0 +; THUMBV7-NEXT: cmp r3, #0 +; THUMBV7-NEXT: it ne +; THUMBV7-NEXT: movne r3, #1 +; THUMBV7-NEXT: cmp r1, #0 +; THUMBV7-NEXT: it ne +; THUMBV7-NEXT: movne r1, #1 +; THUMBV7-NEXT: cmp r5, #0 +; THUMBV7-NEXT: it ne +; THUMBV7-NEXT: movne r5, #1 +; THUMBV7-NEXT: ands r1, r3 +; THUMBV7-NEXT: cmp.w lr, #0 +; THUMBV7-NEXT: orr.w r1, r1, r5 +; THUMBV7-NEXT: it ne +; THUMBV7-NEXT: movne.w lr, #1 +; THUMBV7-NEXT: orr.w r1, r1, lr +; THUMBV7-NEXT: orrs r2, r1 +; THUMBV7-NEXT: mov r1, r12 +; THUMBV7-NEXT: pop {r4, r5, r6, pc} +start: + %0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %l, i64 %r) #2 + %1 = extractvalue { i64, i1 } %0, 0 + %2 = extractvalue { i64, i1 } %0, 1 + %3 = zext i1 %2 to i8 + %4 = insertvalue { i64, i8 } undef, i64 %1, 0 + %5 = insertvalue { i64, i8 } %4, i8 %3, 1 + ret { i64, i8 } %5 +} + +; Function Attrs: nounwind readnone speculatable +declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #1 + +attributes #0 = { nounwind readnone uwtable } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { nounwind } Index: llvm/trunk/test/CodeGen/WebAssembly/umulo-128-legalisation-lowering.ll =================================================================== --- llvm/trunk/test/CodeGen/WebAssembly/umulo-128-legalisation-lowering.ll +++ llvm/trunk/test/CodeGen/WebAssembly/umulo-128-legalisation-lowering.ll @@ -0,0 +1,94 @@ +; RUN: llc < %s -mtriple=wasm32 | FileCheck %s --check-prefixes=WASM32 +; NOTE: did not compile on wasm64 at the time the test was created! + +define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { +; WASM32-LABEL: muloti_test +; WASM32: get_global $push18=, __stack_pointer@GLOBAL +; WASM32: i32.const $push19=, 48 +; WASM32: i32.sub $push40=, $pop18, $pop19 +; WASM32: tee_local $push39=, 5, $pop40 +; WASM32: set_global __stack_pointer@GLOBAL, $pop39 +; WASM32: get_local $push41=, 5 +; WASM32: i32.const $push22=, 32 +; WASM32: i32.add $push23=, $pop41, $pop22 +; WASM32: get_local $push43=, 1 +; WASM32: i64.const $push0=, 0 +; WASM32: get_local $push42=, 3 +; WASM32: i64.const $push38=, 0 +; WASM32: call __multi3@FUNCTION, $pop23, $pop43, $pop0, $pop42, $pop38 +; WASM32: get_local $push44=, 5 +; WASM32: i32.const $push24=, 16 +; WASM32: i32.add $push25=, $pop44, $pop24 +; WASM32: get_local $push46=, 4 +; WASM32: i64.const $push37=, 0 +; WASM32: get_local $push45=, 1 +; WASM32: i64.const $push36=, 0 +; WASM32: call __multi3@FUNCTION, $pop25, $pop46, $pop37, $pop45, $pop36 +; WASM32: get_local $push49=, 5 +; WASM32: get_local $push48=, 2 +; WASM32: i64.const $push35=, 0 +; WASM32: get_local $push47=, 3 +; WASM32: i64.const $push34=, 0 +; WASM32: call __multi3@FUNCTION, $pop49, $pop48, $pop35, $pop47, $pop34 +; WASM32: get_local $push51=, 0 +; WASM32: get_local $push50=, 5 +; WASM32: i64.load $push1=, 32($pop50) +; WASM32: i64.store 0($pop51), $pop1 +; WASM32: get_local $push55=, 0 +; WASM32: get_local $push52=, 5 +; WASM32: i32.const $push5=, 40 +; WASM32: i32.add $push6=, $pop52, $pop5 +; WASM32: i64.load $push33=, 0($pop6) +; WASM32: tee_local $push32=, 1, $pop33 +; WASM32: get_local $push53=, 5 +; WASM32: i64.load $push3=, 0($pop53) +; WASM32: get_local $push54=, 5 +; WASM32: i64.load $push2=, 16($pop54) +; WASM32: i64.add $push4=, $pop3, $pop2 +; WASM32: i64.add $push31=, $pop32, $pop4 +; WASM32: tee_local $push30=, 3, $pop31 +; WASM32: i64.store 8($pop55), $pop30 +; WASM32: get_local $push62=, 0 +; WASM32: get_local $push56=, 2 +; WASM32: i64.const $push29=, 0 +; WASM32: i64.ne $push8=, $pop56, $pop29 +; WASM32: get_local $push57=, 4 +; WASM32: i64.const $push28=, 0 +; WASM32: i64.ne $push7=, $pop57, $pop28 +; WASM32: i32.and $push9=, $pop8, $pop7 +; WASM32: get_local $push58=, 5 +; WASM32: i64.load $push10=, 8($pop58) +; WASM32: i64.const $push27=, 0 +; WASM32: i64.ne $push11=, $pop10, $pop27 +; WASM32: i32.or $push12=, $pop9, $pop11 +; WASM32: get_local $push59=, 5 +; WASM32: i64.load $push13=, 24($pop59) +; WASM32: i64.const $push26=, 0 +; WASM32: i64.ne $push14=, $pop13, $pop26 +; WASM32: i32.or $push15=, $pop12, $pop14 +; WASM32: get_local $push61=, 3 +; WASM32: get_local $push60=, 1 +; WASM32: i64.lt_u $push16=, $pop61, $pop60 +; WASM32: i32.or $push17=, $pop15, $pop16 +; WASM32: i32.store8 16($pop62), $pop17 +; WASM32: get_local $push63=, 5 +; WASM32: i32.const $push20=, 48 +; WASM32: i32.add $push21=, $pop63, $pop20 +; WASM32: set_global __stack_pointer@GLOBAL, $pop21 + +start: + %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 + %1 = extractvalue { i128, i1 } %0, 0 + %2 = extractvalue { i128, i1 } %0, 1 + %3 = zext i1 %2 to i8 + %4 = insertvalue { i128, i8 } undef, i128 %1, 0 + %5 = insertvalue { i128, i8 } %4, i8 %3, 1 + ret { i128, i8 } %5 +} + +; Function Attrs: nounwind readnone speculatable +declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1 + +attributes #0 = { nounwind readnone uwtable } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { nounwind } Index: llvm/trunk/test/CodeGen/X86/muloti.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/muloti.ll +++ llvm/trunk/test/CodeGen/X86/muloti.ll @@ -32,50 +32,6 @@ ret %0 %tmp24 } -define %0 @foo(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp { -entry: -; CHECK: foo - %retval = alloca i128, align 16 - %coerce = alloca i128, align 16 - %a.addr = alloca i128, align 16 - %coerce1 = alloca i128, align 16 - %b.addr = alloca i128, align 16 - %0 = bitcast i128* %coerce to %0* - %1 = getelementptr %0, %0* %0, i32 0, i32 0 - store i64 %a.coerce0, i64* %1 - %2 = getelementptr %0, %0* %0, i32 0, i32 1 - store i64 %a.coerce1, i64* %2 - %a = load i128, i128* %coerce, align 16 - store i128 %a, i128* %a.addr, align 16 - %3 = bitcast i128* %coerce1 to %0* - %4 = getelementptr %0, %0* %3, i32 0, i32 0 - store i64 %b.coerce0, i64* %4 - %5 = getelementptr %0, %0* %3, i32 0, i32 1 - store i64 %b.coerce1, i64* %5 - %b = load i128, i128* %coerce1, align 16 - store i128 %b, i128* %b.addr, align 16 - %tmp = load i128, i128* %a.addr, align 16 - %tmp2 = load i128, i128* %b.addr, align 16 - %6 = call %1 @llvm.umul.with.overflow.i128(i128 %tmp, i128 %tmp2) -; CHECK: cmov -; CHECK: divti3 - %7 = extractvalue %1 %6, 0 - %8 = extractvalue %1 %6, 1 - br i1 %8, label %overflow, label %nooverflow - -overflow: ; preds = %entry - call void @llvm.trap() - unreachable - -nooverflow: ; preds = %entry - store i128 %7, i128* %retval - %9 = bitcast i128* %retval to %0* - %10 = load %0, %0* %9, align 1 - ret %0 %10 -} - -declare %1 @llvm.umul.with.overflow.i128(i128, i128) nounwind readnone - declare %1 @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone declare void @llvm.trap() nounwind Index: llvm/trunk/test/CodeGen/X86/select.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/select.ll +++ llvm/trunk/test/CodeGen/X86/select.ll @@ -53,6 +53,7 @@ ; GENERIC-NEXT: popq %rcx ; GENERIC-NEXT: retq ; GENERIC-NEXT: LBB1_1: ## %bb90 +; GENERIC-NEXT: ud2 ; ; ATOM-LABEL: test2: ; ATOM: ## %bb.0: ## %entry @@ -70,6 +71,7 @@ ; ATOM-NEXT: popq %rcx ; ATOM-NEXT: retq ; ATOM-NEXT: LBB1_1: ## %bb90 +; ATOM-NEXT: ud2 ; ; MCU-LABEL: test2: ; MCU: # %bb.0: # %entry @@ -636,71 +638,6 @@ ret i64 %cond } - -declare noalias i8* @_Znam(i64) noredzone - -define noalias i8* @test12(i64 %count) nounwind ssp noredzone { -; GENERIC-LABEL: test12: -; GENERIC: ## %bb.0: ## %entry -; GENERIC-NEXT: movl $4, %ecx -; GENERIC-NEXT: movq %rdi, %rax -; GENERIC-NEXT: mulq %rcx -; GENERIC-NEXT: movq $-1, %rdi -; GENERIC-NEXT: cmovnoq %rax, %rdi -; GENERIC-NEXT: jmp __Znam ## TAILCALL -; -; ATOM-LABEL: test12: -; ATOM: ## %bb.0: ## %entry -; ATOM-NEXT: movq %rdi, %rax -; ATOM-NEXT: movl $4, %ecx -; ATOM-NEXT: movq $-1, %rdi -; ATOM-NEXT: mulq %rcx -; ATOM-NEXT: cmovnoq %rax, %rdi -; ATOM-NEXT: jmp __Znam ## TAILCALL -; -; MCU-LABEL: test12: -; MCU: # %bb.0: # %entry -; MCU-NEXT: pushl %ebp -; MCU-NEXT: pushl %ebx -; MCU-NEXT: pushl %edi -; MCU-NEXT: pushl %esi -; MCU-NEXT: movl %edx, %ebx -; MCU-NEXT: movl %eax, %ebp -; MCU-NEXT: movl $4, %ecx -; MCU-NEXT: mull %ecx -; MCU-NEXT: movl %eax, %esi -; MCU-NEXT: leal (%edx,%ebx,4), %edi -; MCU-NEXT: movl %edi, %edx -; MCU-NEXT: pushl $0 -; MCU-NEXT: pushl $4 -; MCU-NEXT: calll __udivdi3 -; MCU-NEXT: addl $8, %esp -; MCU-NEXT: xorl %ebx, %edx -; MCU-NEXT: xorl %ebp, %eax -; MCU-NEXT: orl %edx, %eax -; MCU-NEXT: movl $-1, %eax -; MCU-NEXT: movl $-1, %edx -; MCU-NEXT: jne .LBB14_2 -; MCU-NEXT: # %bb.1: # %entry -; MCU-NEXT: movl %esi, %eax -; MCU-NEXT: movl %edi, %edx -; MCU-NEXT: .LBB14_2: # %entry -; MCU-NEXT: popl %esi -; MCU-NEXT: popl %edi -; MCU-NEXT: popl %ebx -; MCU-NEXT: popl %ebp -; MCU-NEXT: jmp _Znam # TAILCALL -entry: - %A = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %count, i64 4) - %B = extractvalue { i64, i1 } %A, 1 - %C = extractvalue { i64, i1 } %A, 0 - %D = select i1 %B, i64 -1, i64 %C - %call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone - ret i8* %call -} - -declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone - define i32 @test13(i32 %a, i32 %b) nounwind { ; GENERIC-LABEL: test13: ; GENERIC: ## %bb.0: @@ -862,10 +799,10 @@ ; MCU-LABEL: test18: ; MCU: # %bb.0: ; MCU-NEXT: cmpl $15, %eax -; MCU-NEXT: jl .LBB20_2 +; MCU-NEXT: jl .LBB19_2 ; MCU-NEXT: # %bb.1: ; MCU-NEXT: movl %ecx, %edx -; MCU-NEXT: .LBB20_2: +; MCU-NEXT: .LBB19_2: ; MCU-NEXT: movl %edx, %eax ; MCU-NEXT: retl %cmp = icmp slt i32 %x, 15 @@ -902,10 +839,10 @@ ; GENERIC-NEXT: cmovlel %edi, %eax ; GENERIC-NEXT: cmpl $-128, %eax ; GENERIC-NEXT: movb $-128, %cl -; GENERIC-NEXT: jl LBB22_2 +; GENERIC-NEXT: jl LBB21_2 ; GENERIC-NEXT: ## %bb.1: ; GENERIC-NEXT: movl %eax, %ecx -; GENERIC-NEXT: LBB22_2: +; GENERIC-NEXT: LBB21_2: ; GENERIC-NEXT: movb %cl, (%rsi) ; GENERIC-NEXT: retq ; @@ -916,10 +853,10 @@ ; ATOM-NEXT: movb $-128, %cl ; ATOM-NEXT: cmovlel %edi, %eax ; ATOM-NEXT: cmpl $-128, %eax -; ATOM-NEXT: jl LBB22_2 +; ATOM-NEXT: jl LBB21_2 ; ATOM-NEXT: ## %bb.1: ; ATOM-NEXT: movl %eax, %ecx -; ATOM-NEXT: LBB22_2: +; ATOM-NEXT: LBB21_2: ; ATOM-NEXT: movb %cl, (%rsi) ; ATOM-NEXT: retq ; @@ -927,16 +864,16 @@ ; MCU: # %bb.0: ; MCU-NEXT: cmpl $127, %eax ; MCU-NEXT: movl $127, %ecx -; MCU-NEXT: jg .LBB22_2 +; MCU-NEXT: jg .LBB21_2 ; MCU-NEXT: # %bb.1: ; MCU-NEXT: movl %eax, %ecx -; MCU-NEXT: .LBB22_2: +; MCU-NEXT: .LBB21_2: ; MCU-NEXT: cmpl $-128, %ecx ; MCU-NEXT: movb $-128, %al -; MCU-NEXT: jl .LBB22_4 +; MCU-NEXT: jl .LBB21_4 ; MCU-NEXT: # %bb.3: ; MCU-NEXT: movl %ecx, %eax -; MCU-NEXT: .LBB22_4: +; MCU-NEXT: .LBB21_4: ; MCU-NEXT: movb %al, (%edx) ; MCU-NEXT: retl %cmp = icmp sgt i32 %src, 127 @@ -976,16 +913,16 @@ ; MCU: # %bb.0: ; MCU-NEXT: cmpl $32767, %eax # imm = 0x7FFF ; MCU-NEXT: movl $32767, %ecx # imm = 0x7FFF -; MCU-NEXT: jg .LBB23_2 +; MCU-NEXT: jg .LBB22_2 ; MCU-NEXT: # %bb.1: ; MCU-NEXT: movl %eax, %ecx -; MCU-NEXT: .LBB23_2: +; MCU-NEXT: .LBB22_2: ; MCU-NEXT: cmpl $-32768, %ecx # imm = 0x8000 ; MCU-NEXT: movl $32768, %eax # imm = 0x8000 -; MCU-NEXT: jl .LBB23_4 +; MCU-NEXT: jl .LBB22_4 ; MCU-NEXT: # %bb.3: ; MCU-NEXT: movl %ecx, %eax -; MCU-NEXT: .LBB23_4: +; MCU-NEXT: .LBB22_4: ; MCU-NEXT: movw %ax, (%edx) ; MCU-NEXT: retl %cmp = icmp sgt i32 %src, 32767 @@ -1009,19 +946,19 @@ ; CHECK-NEXT: movl $-1, %eax ; CHECK-NEXT: movb $1, %cl ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB24_1: ## %CF +; CHECK-NEXT: LBB23_1: ## %CF ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: jne LBB24_1 +; CHECK-NEXT: jne LBB23_1 ; CHECK-NEXT: ## %bb.2: ## %CF250 -; CHECK-NEXT: ## in Loop: Header=BB24_1 Depth=1 -; CHECK-NEXT: jne LBB24_1 +; CHECK-NEXT: ## in Loop: Header=BB23_1 Depth=1 +; CHECK-NEXT: jne LBB23_1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: LBB24_3: ## %CF242 +; CHECK-NEXT: LBB23_3: ## %CF242 ; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cmpl %eax, %eax ; CHECK-NEXT: ucomiss %xmm0, %xmm0 -; CHECK-NEXT: jp LBB24_3 +; CHECK-NEXT: jp LBB23_3 ; CHECK-NEXT: ## %bb.4: ## %CF244 ; CHECK-NEXT: retq ; @@ -1030,24 +967,24 @@ ; MCU-NEXT: movl $-1, %ecx ; MCU-NEXT: movb $1, %al ; MCU-NEXT: .p2align 4, 0x90 -; MCU-NEXT: .LBB24_1: # %CF +; MCU-NEXT: .LBB23_1: # %CF ; MCU-NEXT: # =>This Inner Loop Header: Depth=1 ; MCU-NEXT: testb %al, %al -; MCU-NEXT: jne .LBB24_1 +; MCU-NEXT: jne .LBB23_1 ; MCU-NEXT: # %bb.2: # %CF250 -; MCU-NEXT: # in Loop: Header=BB24_1 Depth=1 -; MCU-NEXT: jne .LBB24_1 +; MCU-NEXT: # in Loop: Header=BB23_1 Depth=1 +; MCU-NEXT: jne .LBB23_1 ; MCU-NEXT: # %bb.3: # %CF242.preheader ; MCU-NEXT: fldz ; MCU-NEXT: .p2align 4, 0x90 -; MCU-NEXT: .LBB24_4: # %CF242 +; MCU-NEXT: .LBB23_4: # %CF242 ; MCU-NEXT: # =>This Inner Loop Header: Depth=1 ; MCU-NEXT: cmpl %eax, %ecx ; MCU-NEXT: fucom %st(0) ; MCU-NEXT: fnstsw %ax ; MCU-NEXT: # kill: def $ah killed $ah killed $ax ; MCU-NEXT: sahf -; MCU-NEXT: jp .LBB24_4 +; MCU-NEXT: jp .LBB23_4 ; MCU-NEXT: # %bb.5: # %CF244 ; MCU-NEXT: fstp %st(0) ; MCU-NEXT: retl @@ -1116,10 +1053,10 @@ ; MCU-LABEL: select_xor_1b: ; MCU: # %bb.0: # %entry ; MCU-NEXT: testb $1, %dl -; MCU-NEXT: je .LBB26_2 +; MCU-NEXT: je .LBB25_2 ; MCU-NEXT: # %bb.1: ; MCU-NEXT: xorl $43, %eax -; MCU-NEXT: .LBB26_2: # %entry +; MCU-NEXT: .LBB25_2: # %entry ; MCU-NEXT: # kill: def $ax killed $ax killed $eax ; MCU-NEXT: retl entry: @@ -1168,10 +1105,10 @@ ; MCU-LABEL: select_xor_2b: ; MCU: # %bb.0: # %entry ; MCU-NEXT: testb $1, %cl -; MCU-NEXT: je .LBB28_2 +; MCU-NEXT: je .LBB27_2 ; MCU-NEXT: # %bb.1: ; MCU-NEXT: xorl %edx, %eax -; MCU-NEXT: .LBB28_2: # %entry +; MCU-NEXT: .LBB27_2: # %entry ; MCU-NEXT: retl entry: %and = and i8 %cond, 1 @@ -1219,10 +1156,10 @@ ; MCU-LABEL: select_or_b: ; MCU: # %bb.0: # %entry ; MCU-NEXT: testb $1, %cl -; MCU-NEXT: je .LBB30_2 +; MCU-NEXT: je .LBB29_2 ; MCU-NEXT: # %bb.1: ; MCU-NEXT: orl %edx, %eax -; MCU-NEXT: .LBB30_2: # %entry +; MCU-NEXT: .LBB29_2: # %entry ; MCU-NEXT: retl entry: %and = and i8 %cond, 1 @@ -1270,10 +1207,10 @@ ; MCU-LABEL: select_or_1b: ; MCU: # %bb.0: # %entry ; MCU-NEXT: testb $1, %cl -; MCU-NEXT: je .LBB32_2 +; MCU-NEXT: je .LBB31_2 ; MCU-NEXT: # %bb.1: ; MCU-NEXT: orl %edx, %eax -; MCU-NEXT: .LBB32_2: # %entry +; MCU-NEXT: .LBB31_2: # %entry ; MCU-NEXT: retl entry: %and = and i32 %cond, 1 Index: llvm/trunk/test/CodeGen/X86/umulo-128-legalisation-lowering.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/umulo-128-legalisation-lowering.ll +++ llvm/trunk/test/CodeGen/X86/umulo-128-legalisation-lowering.ll @@ -0,0 +1,196 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s --check-prefixes=X86 + +define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { +; X64-LABEL: muloti_test: +; X64: # %bb.0: # %start +; X64-NEXT: movq %rdx, %r8 +; X64-NEXT: testq %rcx, %rcx +; X64-NEXT: setne %al +; X64-NEXT: testq %rsi, %rsi +; X64-NEXT: setne %r9b +; X64-NEXT: andb %al, %r9b +; X64-NEXT: movq %rsi, %rax +; X64-NEXT: mulq %rdx +; X64-NEXT: movq %rax, %rsi +; X64-NEXT: seto %r10b +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: seto %r11b +; X64-NEXT: orb %r10b, %r11b +; X64-NEXT: addq %rsi, %rcx +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: mulq %r8 +; X64-NEXT: addq %rcx, %rdx +; X64-NEXT: setb %cl +; X64-NEXT: orb %r11b, %cl +; X64-NEXT: orb %r9b, %cl +; X64-NEXT: retq +; +; X86-LABEL: muloti_test: +; X86: # %bb.0: # %start +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: pushl %ebx +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: pushl %edi +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 20 +; X86-NEXT: subl $28, %esp +; X86-NEXT: .cfi_def_cfa_offset 48 +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: mull %ebx +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-NEXT: movl %esi, %eax +; X86-NEXT: mull %edi +; X86-NEXT: movl %eax, %esi +; X86-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-NEXT: addl %ecx, %esi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: mull %ebx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: addl %esi, %ecx +; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %ebp +; X86-NEXT: movl %eax, %edi +; X86-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %ebx +; X86-NEXT: movl %eax, %esi +; X86-NEXT: seto {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-NEXT: addl %edi, %esi +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: mull %ebp +; X86-NEXT: movl %ebp, %ebx +; X86-NEXT: movl %edx, %edi +; X86-NEXT: addl %esi, %edi +; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %ecx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl %ebx, %ecx +; X86-NEXT: mull %ebx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %ecx +; X86-NEXT: movl %edx, %esi +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: addl %ebx, %ecx +; X86-NEXT: adcl $0, %esi +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: mull %ebp +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: addl %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: adcl %esi, %ebx +; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %ebp +; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl %eax, %esi +; X86-NEXT: addl %ebx, %esi +; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; X86-NEXT: adcl %eax, %ecx +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: mull %edx +; X86-NEXT: movl %edx, %ebp +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: mull %edx +; X86-NEXT: addl %ebx, %eax +; X86-NEXT: adcl %ebp, %edx +; X86-NEXT: addl %esi, %eax +; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: adcl %edi, %edx +; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; X86-NEXT: setne %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: testl %esi, %esi +; X86-NEXT: setne %ch +; X86-NEXT: andb %cl, %ch +; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload +; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload +; X86-NEXT: orb %ch, %cl +; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; X86-NEXT: setne %cl +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: testl %edi, %edi +; X86-NEXT: setne %bh +; X86-NEXT: andb %cl, %bh +; X86-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload +; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload +; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; X86-NEXT: orl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: setne %bl +; X86-NEXT: orl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl %esi, (%ecx) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: movl %esi, 4(%ecx) +; X86-NEXT: movl %eax, 8(%ecx) +; X86-NEXT: movl %edx, 12(%ecx) +; X86-NEXT: setne %al +; X86-NEXT: andb %bl, %al +; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload +; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload +; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload +; X86-NEXT: orb %bh, %al +; X86-NEXT: andb $1, %al +; X86-NEXT: movb %al, 16(%ecx) +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: addl $28, %esp +; X86-NEXT: .cfi_def_cfa_offset 20 +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: popl %edi +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: popl %ebx +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl $4 +start: + %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 + %1 = extractvalue { i128, i1 } %0, 0 + %2 = extractvalue { i128, i1 } %0, 1 + %3 = zext i1 %2 to i8 + %4 = insertvalue { i128, i8 } undef, i128 %1, 0 + %5 = insertvalue { i128, i8 } %4, i8 %3, 1 + ret { i128, i8 } %5 +} + +; Function Attrs: nounwind readnone speculatable +declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1 + +attributes #0 = { nounwind readnone uwtable } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { nounwind } Index: llvm/trunk/test/CodeGen/X86/umulo-64-legalisation-lowering.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/umulo-64-legalisation-lowering.ll +++ llvm/trunk/test/CodeGen/X86/umulo-64-legalisation-lowering.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s --check-prefixes=X86 + +define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 { +; X86-LABEL: mulodi_test: +; X86: # %bb.0: # %start +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: pushl %ebx +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: pushl %edi +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 20 +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: testl %esi, %esi +; X86-NEXT: setne %dl +; X86-NEXT: testl %eax, %eax +; X86-NEXT: setne %bl +; X86-NEXT: andb %dl, %bl +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, %edi +; X86-NEXT: seto %cl +; X86-NEXT: movl %esi, %eax +; X86-NEXT: mull %ebp +; X86-NEXT: movl %eax, %esi +; X86-NEXT: seto %ch +; X86-NEXT: orb %cl, %ch +; X86-NEXT: addl %edi, %esi +; X86-NEXT: movl %ebp, %eax +; X86-NEXT: mull {{[0-9]+}}(%esp) +; X86-NEXT: addl %esi, %edx +; X86-NEXT: setb %cl +; X86-NEXT: orb %ch, %cl +; X86-NEXT: orb %bl, %cl +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: popl %edi +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: popl %ebx +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +start: + %0 = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %l, i64 %r) #2 + %1 = extractvalue { i64, i1 } %0, 0 + %2 = extractvalue { i64, i1 } %0, 1 + %3 = zext i1 %2 to i8 + %4 = insertvalue { i64, i8 } undef, i64 %1, 0 + %5 = insertvalue { i64, i8 } %4, i8 %3, 1 + ret { i64, i8 } %5 +} + +; Function Attrs: nounwind readnone speculatable +declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #1 + +attributes #0 = { nounwind readnone uwtable } +attributes #1 = { nounwind readnone speculatable } +attributes #2 = { nounwind }