diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -4428,7 +4428,10 @@ else if (VT == MVT::i128) LC = RTLIB::MULO_I128; - if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) { + // If we don't have the libcall or if the function we are compiling is the + // implementation of the expected libcall (avoid inf-loop), expand inline. + if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC) || + TLI.getLibcallName(LC) == DAG.getMachineFunction().getName()) { // FIXME: This is not an optimal expansion, but better than crashing. EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2); diff --git a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll --- a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll @@ -30,37 +30,62 @@ ret { i128, i8 } %5 } -define i128 @__muloti4(i128 %0, i128 %1, i32* nocapture nonnull writeonly align 4 %2) #4 { +; PR56403 +; We avoid lowering the intrinsic as a libcall because this function has the same name as +; the libcall we wanted to generate (that would create an infinite loop). + +define i128 @__muloti4(i128 %0, i128 %1, i32* nocapture nonnull writeonly align 4 %2) #2 { ; AARCH-LABEL: __muloti4: ; AARCH: // %bb.0: // %Entry -; AARCH-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill -; AARCH-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; AARCH-NEXT: mov x19, x4 +; AARCH-NEXT: asr x9, x1, #63 +; AARCH-NEXT: asr x10, x3, #63 +; AARCH-NEXT: umulh x14, x0, x2 +; AARCH-NEXT: mov x8, x1 +; AARCH-NEXT: mul x11, x2, x9 ; AARCH-NEXT: str wzr, [x4] -; AARCH-NEXT: add x4, sp, #8 -; AARCH-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill -; AARCH-NEXT: mov x21, x3 -; AARCH-NEXT: mov x20, x2 -; AARCH-NEXT: mov x22, x1 -; AARCH-NEXT: str xzr, [sp, #8] -; AARCH-NEXT: bl __muloti4 -; AARCH-NEXT: ldr x8, [sp, #8] -; AARCH-NEXT: cmp x8, #0 -; AARCH-NEXT: cset w8, ne -; AARCH-NEXT: tbz x22, #63, .LBB1_2 +; AARCH-NEXT: umulh x12, x10, x0 +; AARCH-NEXT: umulh x13, x2, x9 +; AARCH-NEXT: madd x12, x10, x1, x12 +; AARCH-NEXT: add x13, x13, x11 +; AARCH-NEXT: mul x10, x10, x0 +; AARCH-NEXT: madd x9, x3, x9, x13 +; AARCH-NEXT: add x12, x12, x10 +; AARCH-NEXT: adds x10, x10, x11 +; AARCH-NEXT: mul x11, x1, x2 +; AARCH-NEXT: adc x9, x12, x9 +; AARCH-NEXT: umulh x13, x1, x2 +; AARCH-NEXT: mul x12, x0, x3 +; AARCH-NEXT: adds x11, x11, x14 +; AARCH-NEXT: umulh x14, x0, x3 +; AARCH-NEXT: cinc x13, x13, hs +; AARCH-NEXT: adds x1, x12, x11 +; AARCH-NEXT: mul x12, x8, x3 +; AARCH-NEXT: cinc x11, x14, hs +; AARCH-NEXT: mul x0, x0, x2 +; AARCH-NEXT: adds x11, x13, x11 +; AARCH-NEXT: umulh x13, x8, x3 +; AARCH-NEXT: cset w14, hs +; AARCH-NEXT: adds x11, x12, x11 +; AARCH-NEXT: adc x12, x13, x14 +; AARCH-NEXT: adds x10, x11, x10 +; AARCH-NEXT: adc x9, x12, x9 +; AARCH-NEXT: asr x11, x1, #63 +; AARCH-NEXT: eor x9, x9, x11 +; AARCH-NEXT: eor x10, x10, x11 +; AARCH-NEXT: orr x9, x10, x9 +; AARCH-NEXT: cmp x9, #0 +; AARCH-NEXT: cset w9, ne +; AARCH-NEXT: tbz x8, #63, .LBB1_2 ; AARCH-NEXT: // %bb.1: // %Entry -; AARCH-NEXT: eor x9, x21, #0x8000000000000000 -; AARCH-NEXT: orr x9, x20, x9 -; AARCH-NEXT: cbz x9, .LBB1_3 +; AARCH-NEXT: eor x8, x3, #0x8000000000000000 +; AARCH-NEXT: orr x8, x2, x8 +; AARCH-NEXT: cbz x8, .LBB1_3 ; AARCH-NEXT: .LBB1_2: // %Else2 -; AARCH-NEXT: cbz w8, .LBB1_4 +; AARCH-NEXT: cbz w9, .LBB1_4 ; AARCH-NEXT: .LBB1_3: // %Then7 ; AARCH-NEXT: mov w8, #1 -; AARCH-NEXT: str w8, [x19] +; AARCH-NEXT: str w8, [x4] ; AARCH-NEXT: .LBB1_4: // %Block9 -; AARCH-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; AARCH-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload -; AARCH-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload ; AARCH-NEXT: ret Entry: store i32 0, i32* %2, align 4 @@ -90,4 +115,3 @@ attributes #0 = { nounwind readnone uwtable } attributes #1 = { nounwind readnone speculatable } attributes #2 = { nounwind } -attributes #4 = { nounwind mustprogress nobuiltin }