diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3525,6 +3525,50 @@ Constant::getNullValue(WidestTy)); } +/// Fold +/// (-1 u/ x) u< y +/// to +/// @llvm.umul.with.overflow(x, y) plus extraction of overflow bit +/// Note that the comparison is commutative, while inverted (u>=) predicate +/// will mean that we are looking for the opposite answer. +static Value * +foldUnsignedMultiplicationOverflowCheck(ICmpInst &I, + InstCombiner::BuilderTy &Builder) { + ICmpInst::Predicate Pred; + Value *X, *Y; + bool NeedNegation; + // Look for: (-1 u/ x) u= y + if (!I.isEquality() && + match(&I, m_c_ICmp(Pred, m_OneUse(m_UDiv(m_AllOnes(), m_Value(X))), + m_Value(Y)))) { + // Canonicalize as-if y was on RHS. + if (I.getOperand(1) != Y) + Pred = I.getSwappedPredicate(); + + // Are we checking that overflow does not happen, or does happen? + switch (Pred) { + case ICmpInst::Predicate::ICMP_ULT: + NeedNegation = false; + break; // OK + case ICmpInst::Predicate::ICMP_UGE: + NeedNegation = true; + break; // OK + default: + return nullptr; // Wrong predicate. + } + } else + return nullptr; + + Function *F = Intrinsic::getDeclaration( + I.getModule(), Intrinsic::umul_with_overflow, X->getType()); + CallInst *Call = Builder.CreateCall(F, {X, Y}, "umul"); + Value *Res = Builder.CreateExtractValue(Call, 1, "umul.ov"); + if (NeedNegation) // This technically increases instruction count. + Res = Builder.CreateNot(Res, "umul.not.ov"); + + return Res; +} + /// Try to fold icmp (binop), X or icmp X, (binop). /// TODO: A large part of this logic is duplicated in InstSimplify's /// simplifyICmpWithBinOp(). We should be able to share that and avoid the code @@ -3874,6 +3918,9 @@ } } + if (Value *V = foldUnsignedMultiplicationOverflowCheck(I, Builder)) + return replaceInstUsesWith(I, V); + if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder)) return replaceInstUsesWith(I, V); diff --git a/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll b/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll --- a/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll @@ -8,9 +8,10 @@ define i1 @t0_basic(i8 %x, i8 %y) { ; CHECK-LABEL: @t0_basic( -; CHECK-NEXT: [[T0:%.*]] = udiv i8 -1, [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp uge i8 [[T0]], [[Y:%.*]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[UMUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i8, i1 } [[UMUL]], 1 +; CHECK-NEXT: [[UMUL_NOT_OV:%.*]] = xor i1 [[UMUL_OV]], true +; CHECK-NEXT: ret i1 [[UMUL_NOT_OV]] ; %t0 = udiv i8 -1, %x %r = icmp uge i8 %t0, %y @@ -19,9 +20,10 @@ define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t1_vec( -; CHECK-NEXT: [[T0:%.*]] = udiv <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp uge <2 x i8> [[T0]], [[Y:%.*]] -; CHECK-NEXT: ret <2 x i1> [[R]] +; CHECK-NEXT: [[UMUL:%.*]] = call { <2 x i8>, <2 x i1> } @llvm.umul.with.overflow.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) +; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[UMUL]], 1 +; CHECK-NEXT: [[UMUL_NOT_OV:%.*]] = xor <2 x i1> [[UMUL_OV]], +; CHECK-NEXT: ret <2 x i1> [[UMUL_NOT_OV]] ; %t0 = udiv <2 x i8> , %x %r = icmp uge <2 x i8> %t0, %y @@ -30,9 +32,10 @@ define <3 x i1> @t2_vec_undef(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @t2_vec_undef( -; CHECK-NEXT: [[T0:%.*]] = udiv <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp uge <3 x i8> [[T0]], [[Y:%.*]] -; CHECK-NEXT: ret <3 x i1> [[R]] +; CHECK-NEXT: [[UMUL:%.*]] = call { <3 x i8>, <3 x i1> } @llvm.umul.with.overflow.v3i8(<3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]]) +; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { <3 x i8>, <3 x i1> } [[UMUL]], 1 +; CHECK-NEXT: [[UMUL_NOT_OV:%.*]] = xor <3 x i1> [[UMUL_OV]], +; CHECK-NEXT: ret <3 x i1> [[UMUL_NOT_OV]] ; %t0 = udiv <3 x i8> , %x %r = icmp uge <3 x i8> %t0, %y @@ -43,10 +46,11 @@ define i1 @t3_commutative(i8 %x) { ; CHECK-LABEL: @t3_commutative( -; CHECK-NEXT: [[T0:%.*]] = udiv i8 -1, [[X:%.*]] ; CHECK-NEXT: [[Y:%.*]] = call i8 @gen8() -; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[Y]], [[T0]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[UMUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X:%.*]], i8 [[Y]]) +; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i8, i1 } [[UMUL]], 1 +; CHECK-NEXT: [[UMUL_NOT_OV:%.*]] = xor i1 [[UMUL_OV]], true +; CHECK-NEXT: ret i1 [[UMUL_NOT_OV]] ; %t0 = udiv i8 -1, %x %y = call i8 @gen8() diff --git a/llvm/test/Transforms/InstCombine/unsigned-mul-overflow-check-via-udiv-of-allones.ll b/llvm/test/Transforms/InstCombine/unsigned-mul-overflow-check-via-udiv-of-allones.ll --- a/llvm/test/Transforms/InstCombine/unsigned-mul-overflow-check-via-udiv-of-allones.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-mul-overflow-check-via-udiv-of-allones.ll @@ -8,9 +8,9 @@ define i1 @t0_basic(i8 %x, i8 %y) { ; CHECK-LABEL: @t0_basic( -; CHECK-NEXT: [[T0:%.*]] = udiv i8 -1, [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[T0]], [[Y:%.*]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[UMUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i8, i1 } [[UMUL]], 1 +; CHECK-NEXT: ret i1 [[UMUL_OV]] ; %t0 = udiv i8 -1, %x %r = icmp ult i8 %t0, %y @@ -19,9 +19,9 @@ define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @t1_vec( -; CHECK-NEXT: [[T0:%.*]] = udiv <2 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult <2 x i8> [[T0]], [[Y:%.*]] -; CHECK-NEXT: ret <2 x i1> [[R]] +; CHECK-NEXT: [[UMUL:%.*]] = call { <2 x i8>, <2 x i1> } @llvm.umul.with.overflow.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]]) +; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { <2 x i8>, <2 x i1> } [[UMUL]], 1 +; CHECK-NEXT: ret <2 x i1> [[UMUL_OV]] ; %t0 = udiv <2 x i8> , %x %r = icmp ult <2 x i8> %t0, %y @@ -30,9 +30,9 @@ define <3 x i1> @t2_vec_undef(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @t2_vec_undef( -; CHECK-NEXT: [[T0:%.*]] = udiv <3 x i8> , [[X:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ult <3 x i8> [[T0]], [[Y:%.*]] -; CHECK-NEXT: ret <3 x i1> [[R]] +; CHECK-NEXT: [[UMUL:%.*]] = call { <3 x i8>, <3 x i1> } @llvm.umul.with.overflow.v3i8(<3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]]) +; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { <3 x i8>, <3 x i1> } [[UMUL]], 1 +; CHECK-NEXT: ret <3 x i1> [[UMUL_OV]] ; %t0 = udiv <3 x i8> , %x %r = icmp ult <3 x i8> %t0, %y @@ -43,10 +43,10 @@ define i1 @t3_commutative(i8 %x) { ; CHECK-LABEL: @t3_commutative( -; CHECK-NEXT: [[T0:%.*]] = udiv i8 -1, [[X:%.*]] ; CHECK-NEXT: [[Y:%.*]] = call i8 @gen8() -; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[Y]], [[T0]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[UMUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X:%.*]], i8 [[Y]]) +; CHECK-NEXT: [[UMUL_OV:%.*]] = extractvalue { i8, i1 } [[UMUL]], 1 +; CHECK-NEXT: ret i1 [[UMUL_OV]] ; %t0 = udiv i8 -1, %x %y = call i8 @gen8() diff --git a/llvm/test/Transforms/PhaseOrdering/unsigned-multiply-overflow-check.ll b/llvm/test/Transforms/PhaseOrdering/unsigned-multiply-overflow-check.ll --- a/llvm/test/Transforms/PhaseOrdering/unsigned-multiply-overflow-check.ll +++ b/llvm/test/Transforms/PhaseOrdering/unsigned-multiply-overflow-check.ll @@ -18,17 +18,61 @@ ; } define i1 @will_not_overflow(i64 %arg, i64 %arg1) { -; ALL-LABEL: @will_not_overflow( -; ALL-NEXT: bb: -; ALL-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0 -; ALL-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]] -; ALL: bb2: -; ALL-NEXT: [[T3:%.*]] = udiv i64 -1, [[ARG]] -; ALL-NEXT: [[T4:%.*]] = icmp ult i64 [[T3]], [[ARG1:%.*]] -; ALL-NEXT: br label [[BB5]] -; ALL: bb5: -; ALL-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[T4]], [[BB2]] ] -; ALL-NEXT: ret i1 [[T6]] +; SIMPLIFYCFG-LABEL: @will_not_overflow( +; SIMPLIFYCFG-NEXT: bb: +; SIMPLIFYCFG-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0 +; SIMPLIFYCFG-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]] +; SIMPLIFYCFG: bb2: +; SIMPLIFYCFG-NEXT: [[T3:%.*]] = udiv i64 -1, [[ARG]] +; SIMPLIFYCFG-NEXT: [[T4:%.*]] = icmp ult i64 [[T3]], [[ARG1:%.*]] +; SIMPLIFYCFG-NEXT: br label [[BB5]] +; SIMPLIFYCFG: bb5: +; SIMPLIFYCFG-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[T4]], [[BB2]] ] +; SIMPLIFYCFG-NEXT: ret i1 [[T6]] +; +; INSTCOMBINEONLY-LABEL: @will_not_overflow( +; INSTCOMBINEONLY-NEXT: bb: +; INSTCOMBINEONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0 +; INSTCOMBINEONLY-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]] +; INSTCOMBINEONLY: bb2: +; INSTCOMBINEONLY-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]]) +; INSTCOMBINEONLY-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1 +; INSTCOMBINEONLY-NEXT: br label [[BB5]] +; INSTCOMBINEONLY: bb5: +; INSTCOMBINEONLY-NEXT: [[T6:%.*]] = phi i1 [ false, [[BB:%.*]] ], [ [[UMUL_OV]], [[BB2]] ] +; INSTCOMBINEONLY-NEXT: ret i1 [[T6]] +; +; INSTCOMBINESIMPLIFYCFGONLY-LABEL: @will_not_overflow( +; INSTCOMBINESIMPLIFYCFGONLY-NEXT: bb: +; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0 +; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]]) +; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1 +; INSTCOMBINESIMPLIFYCFGONLY-NEXT: [[T6:%.*]] = select i1 [[T0]], i1 false, i1 [[UMUL_OV]] +; INSTCOMBINESIMPLIFYCFGONLY-NEXT: ret i1 [[T6]] +; +; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-LABEL: @will_not_overflow( +; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: bb: +; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T0:%.*]] = icmp ne i64 [[ARG:%.*]], 0 +; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]]) +; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1 +; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: [[T6:%.*]] = and i1 [[UMUL_OV]], [[T0]] +; INSTCOMBINESIMPLIFYCFGINSTCOMBINE-NEXT: ret i1 [[T6]] +; +; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-LABEL: @will_not_overflow( +; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-NEXT: bb: +; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0 +; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]]) +; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1 +; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-NEXT: [[T6:%.*]] = select i1 [[T0]], i1 false, i1 [[UMUL_OV]] +; INSTCOMBINESIMPLIFYCFGCOSTLYONLY-NEXT: ret i1 [[T6]] +; +; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-LABEL: @will_not_overflow( +; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: bb: +; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[T0:%.*]] = icmp ne i64 [[ARG:%.*]], 0 +; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]]) +; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1 +; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: [[T6:%.*]] = and i1 [[UMUL_OV]], [[T0]] +; INSTCOMBINESIMPLIFYCFGCOSTLYINSTCOMBINE-NEXT: ret i1 [[T6]] ; bb: %t0 = icmp eq i64 %arg, 0 @@ -65,11 +109,12 @@ ; INSTCOMBINE-NEXT: [[T0:%.*]] = icmp eq i64 [[ARG:%.*]], 0 ; INSTCOMBINE-NEXT: br i1 [[T0]], label [[BB5:%.*]], label [[BB2:%.*]] ; INSTCOMBINE: bb2: -; INSTCOMBINE-NEXT: [[T3:%.*]] = udiv i64 -1, [[ARG]] -; INSTCOMBINE-NEXT: [[T4:%.*]] = icmp uge i64 [[T3]], [[ARG1:%.*]] +; INSTCOMBINE-NEXT: [[UMUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 [[ARG]], i64 [[ARG1:%.*]]) +; INSTCOMBINE-NEXT: [[UMUL_OV:%.*]] = extractvalue { i64, i1 } [[UMUL]], 1 +; INSTCOMBINE-NEXT: [[PHITMP:%.*]] = xor i1 [[UMUL_OV]], true ; INSTCOMBINE-NEXT: br label [[BB5]] ; INSTCOMBINE: bb5: -; INSTCOMBINE-NEXT: [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[T4]], [[BB2]] ] +; INSTCOMBINE-NEXT: [[T6:%.*]] = phi i1 [ true, [[BB:%.*]] ], [ [[PHITMP]], [[BB2]] ] ; INSTCOMBINE-NEXT: ret i1 [[T6]] ; bb: