Index: llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -4061,6 +4061,13 @@ } else return nullptr; + // Bail out if the type is not desirable. The intrinsic might be lowered as a + // function call if the target can't handle the type. We allow the transform + // for vectors under the assumption that those will always be expanded inline. + Type *Ty = X->getType(); + if (Ty->isIntegerTy() && !isDesirableIntType(Ty->getPrimitiveSizeInBits())) + return nullptr; + BuilderTy::InsertPointGuard Guard(Builder); // If the pattern included (x * y), we'll want to insert new instructions // right before that original multiplication so that we can replace it. @@ -4072,7 +4079,7 @@ Div->getOpcode() == Instruction::UDiv ? Intrinsic::umul_with_overflow : Intrinsic::smul_with_overflow, - X->getType()); + Ty); CallInst *Call = Builder.CreateCall(F, {X, Y}, "mul"); // If the multiplication was used elsewhere, to ensure that we don't leave Index: llvm/test/Transforms/PhaseOrdering/AArch64/mul-ov.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/AArch64/mul-ov.ll +++ llvm/test/Transforms/PhaseOrdering/AArch64/mul-ov.ll @@ -1,28 +1,34 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes="default" -S < %s | FileCheck %s +; PR56403 +; We do not want to form a mul-with-overflow intrinsic here because +; that could be lowered to a runtime libcall: __muloti4! + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-unknown-linux-unknown" define i128 @__muloti4(i128 %0, i128 %1, i32* nonnull align 4 %2) { ; CHECK-LABEL: @__muloti4( ; CHECK-NEXT: Entry: -; CHECK-NEXT: [[DOTFR:%.*]] = freeze i128 [[TMP1:%.*]] ; CHECK-NEXT: store i32 0, i32* [[TMP2:%.*]], align 4 -; CHECK-NEXT: [[MUL:%.*]] = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 [[TMP0:%.*]], i128 [[DOTFR]]) -; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i128 [[TMP0]], 0 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i128 [[DOTFR]], -170141183460469231731687303715884105728 -; CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP3]], [[TMP4]] -; CHECK-NEXT: br i1 [[TMP5]], label [[THEN7:%.*]], label [[ELSE2:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = mul i128 [[TMP1:%.*]], [[TMP0:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i128 [[TMP0]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i128 [[TMP1]], -170141183460469231731687303715884105728 +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP4]], i1 [[TMP5]], i1 false +; CHECK-NEXT: br i1 [[TMP6]], label [[THEN7:%.*]], label [[ELSE2:%.*]] ; CHECK: Else2: -; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { i128, i1 } [[MUL]], 1 -; CHECK-NEXT: br i1 [[MUL_OV]], label [[THEN7]], label [[BLOCK9:%.*]] +; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i128 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[DOTNOT]], label [[BLOCK9:%.*]], label [[THEN3:%.*]] +; CHECK: Then3: +; CHECK-NEXT: [[TMP7:%.*]] = sdiv i128 [[TMP3]], [[TMP0]] +; CHECK-NEXT: [[DOTNOT3:%.*]] = icmp eq i128 [[TMP7]], [[TMP1]] +; CHECK-NEXT: br i1 [[DOTNOT3]], label [[BLOCK9]], label [[THEN7]] ; CHECK: Then7: ; CHECK-NEXT: store i32 1, i32* [[TMP2]], align 4 ; CHECK-NEXT: br label [[BLOCK9]] ; CHECK: Block9: -; CHECK-NEXT: [[MUL_VAL:%.*]] = extractvalue { i128, i1 } [[MUL]], 0 -; CHECK-NEXT: ret i128 [[MUL_VAL]] +; CHECK-NEXT: ret i128 [[TMP3]] ; Entry: %3 = alloca i128, align 16