diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -603,6 +603,12 @@ setOperationAction(ISD::UMULO, MVT::i32, Custom); setOperationAction(ISD::UMULO, MVT::i64, Custom); + // The MULO libcall is not part of libgcc, only compiler-rt. + // + // TODO: Link compiler-rt's builtins by default in Clang once and for all + // See: https://github.com/llvm/llvm-project/issues/16778 + setLibcallName(RTLIB::MULO_I128, nullptr); + setOperationAction(ISD::ADDCARRY, MVT::i32, Custom); setOperationAction(ISD::ADDCARRY, MVT::i64, Custom); setOperationAction(ISD::SUBCARRY, MVT::i32, Custom); diff --git a/llvm/test/CodeGen/AArch64/i128-math.ll b/llvm/test/CodeGen/AArch64/i128-math.ll --- a/llvm/test/CodeGen/AArch64/i128-math.ll +++ b/llvm/test/CodeGen/AArch64/i128-math.ll @@ -355,15 +355,41 @@ define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) { ; CHECK-LABEL: i128_checked_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x30, xzr, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: add x4, sp, #8 -; CHECK-NEXT: bl __muloti4 -; CHECK-NEXT: ldr x8, [sp, #8] -; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: asr x8, x1, #63 +; CHECK-NEXT: asr x9, x3, #63 +; CHECK-NEXT: mul x10, x2, x8 +; CHECK-NEXT: umulh x11, x2, x8 +; CHECK-NEXT: umulh x12, x9, x0 +; CHECK-NEXT: mul x13, x9, x0 +; CHECK-NEXT: add x11, x11, x10 +; CHECK-NEXT: madd x9, x9, x1, x12 +; CHECK-NEXT: madd x8, x3, x8, x11 +; CHECK-NEXT: adds x10, x13, x10 +; CHECK-NEXT: umulh x11, x0, x2 +; CHECK-NEXT: add x9, x9, x13 +; CHECK-NEXT: mul x13, x1, x2 +; CHECK-NEXT: adc x9, x9, x8 +; CHECK-NEXT: umulh x12, x1, x2 +; CHECK-NEXT: mul x8, x0, x3 +; CHECK-NEXT: adds x11, x13, x11 +; CHECK-NEXT: umulh x13, x0, x3 +; CHECK-NEXT: cinc x12, x12, hs +; CHECK-NEXT: adds x8, x8, x11 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: cinc x11, x13, hs +; CHECK-NEXT: mul x13, x1, x3 +; CHECK-NEXT: adds x11, x12, x11 +; CHECK-NEXT: umulh x12, x1, x3 +; CHECK-NEXT: cset w14, hs +; CHECK-NEXT: adds x11, x13, x11 +; CHECK-NEXT: adc x12, x12, x14 +; CHECK-NEXT: adds x10, x11, x10 +; CHECK-NEXT: asr x11, x8, #63 +; CHECK-NEXT: adc x9, x12, x9 +; CHECK-NEXT: cmp x10, x11 +; CHECK-NEXT: mov x1, x8 +; CHECK-NEXT: ccmp x9, x11, #0, eq ; CHECK-NEXT: cset w2, eq -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -378,15 +404,41 @@ define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) { ; CHECK-LABEL: i128_overflowing_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x30, xzr, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: add x4, sp, #8 -; CHECK-NEXT: bl __muloti4 -; CHECK-NEXT: ldr x8, [sp, #8] -; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: asr x8, x1, #63 +; CHECK-NEXT: asr x9, x3, #63 +; CHECK-NEXT: mul x10, x2, x8 +; CHECK-NEXT: umulh x11, x2, x8 +; CHECK-NEXT: umulh x12, x9, x0 +; CHECK-NEXT: mul x13, x9, x0 +; CHECK-NEXT: add x11, x11, x10 +; CHECK-NEXT: madd x9, x9, x1, x12 +; CHECK-NEXT: madd x8, x3, x8, x11 +; CHECK-NEXT: adds x10, x13, x10 +; CHECK-NEXT: umulh x11, x0, x2 +; CHECK-NEXT: add x9, x9, x13 +; CHECK-NEXT: mul x13, x1, x2 +; CHECK-NEXT: adc x9, x9, x8 +; CHECK-NEXT: umulh x12, x1, x2 +; CHECK-NEXT: mul x8, x0, x3 +; CHECK-NEXT: adds x11, x13, x11 +; CHECK-NEXT: umulh x13, x0, x3 +; CHECK-NEXT: cinc x12, x12, hs +; CHECK-NEXT: adds x8, x8, x11 +; CHECK-NEXT: mul x0, x0, x2 +; CHECK-NEXT: cinc x11, x13, hs +; CHECK-NEXT: mul x13, x1, x3 +; CHECK-NEXT: adds x11, x12, x11 +; CHECK-NEXT: umulh x12, x1, x3 +; CHECK-NEXT: cset w14, hs +; CHECK-NEXT: adds x11, x13, x11 +; CHECK-NEXT: adc x12, x12, x14 +; CHECK-NEXT: adds x10, x11, x10 +; CHECK-NEXT: asr x11, x8, #63 +; CHECK-NEXT: adc x9, x12, x9 +; CHECK-NEXT: cmp x10, x11 +; CHECK-NEXT: mov x1, x8 +; CHECK-NEXT: ccmp x9, x11, #0, eq ; CHECK-NEXT: cset w2, ne -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -400,26 +452,46 @@ define i128 @i128_saturating_mul(i128 %x, i128 %y) { ; CHECK-LABEL: i128_saturating_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: add x4, sp, #8 -; CHECK-NEXT: mov x19, x3 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: str xzr, [sp, #8] -; CHECK-NEXT: bl __muloti4 -; CHECK-NEXT: ldr x8, [sp, #8] -; CHECK-NEXT: eor x9, x19, x20 -; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: asr x9, x9, #63 -; CHECK-NEXT: eor x10, x9, #0x7fffffffffffffff +; CHECK-NEXT: asr x8, x1, #63 +; CHECK-NEXT: asr x9, x3, #63 +; CHECK-NEXT: umulh x14, x1, x2 +; CHECK-NEXT: mul x10, x2, x8 +; CHECK-NEXT: umulh x11, x2, x8 +; CHECK-NEXT: umulh x12, x9, x0 +; CHECK-NEXT: mul x13, x9, x0 +; CHECK-NEXT: add x11, x11, x10 +; CHECK-NEXT: madd x9, x9, x1, x12 +; CHECK-NEXT: madd x8, x3, x8, x11 +; CHECK-NEXT: adds x10, x13, x10 +; CHECK-NEXT: umulh x12, x0, x2 +; CHECK-NEXT: add x9, x9, x13 +; CHECK-NEXT: mul x11, x1, x2 +; CHECK-NEXT: adc x8, x9, x8 +; CHECK-NEXT: mul x9, x0, x3 +; CHECK-NEXT: adds x11, x11, x12 +; CHECK-NEXT: umulh x12, x0, x3 +; CHECK-NEXT: cinc x13, x14, hs +; CHECK-NEXT: adds x9, x9, x11 +; CHECK-NEXT: cinc x11, x12, hs +; CHECK-NEXT: mul x12, x1, x3 +; CHECK-NEXT: adds x11, x13, x11 +; CHECK-NEXT: umulh x13, x1, x3 +; CHECK-NEXT: cset w14, hs +; CHECK-NEXT: adds x11, x12, x11 +; CHECK-NEXT: adc x12, x13, x14 +; CHECK-NEXT: adds x10, x11, x10 +; CHECK-NEXT: adc x8, x12, x8 +; CHECK-NEXT: asr x11, x9, #63 +; CHECK-NEXT: eor x8, x8, x11 +; CHECK-NEXT: eor x10, x10, x11 +; CHECK-NEXT: orr x8, x10, x8 +; CHECK-NEXT: eor x10, x3, x1 +; CHECK-NEXT: mul x11, x0, x2 +; CHECK-NEXT: asr x10, x10, #63 +; CHECK-NEXT: eor x12, x10, #0x7fffffffffffffff ; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: csinv x0, x0, x9, eq -; CHECK-NEXT: csel x1, x10, x1, ne -; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: csinv x0, x11, x10, eq +; CHECK-NEXT: csel x1, x12, x9, ne ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0