diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3256,11 +3256,14 @@ // extractvalue (any_mul_with_overflow X, -1), 0 --> -X Intrinsic::ID OvID = WO->getIntrinsicID(); - if (*EV.idx_begin() == 0 && - (OvID == Intrinsic::smul_with_overflow || - OvID == Intrinsic::umul_with_overflow) && - match(WO->getArgOperand(1), m_AllOnes())) { - return BinaryOperator::CreateNeg(WO->getArgOperand(0)); + const APInt *C = nullptr; + if (match(WO->getRHS(), m_APIntAllowUndef(C))) { + if (*EV.idx_begin() == 0 && + (OvID == Intrinsic::smul_with_overflow || + OvID == Intrinsic::umul_with_overflow) && + C->isAllOnes()) { + return BinaryOperator::CreateNeg(WO->getArgOperand(0)); + } } // We're extracting from an overflow intrinsic. See if we're the only user. @@ -3289,8 +3292,7 @@ // If only the overflow result is used, and the right hand side is a // constant (or constant splat), we can remove the intrinsic by directly // checking for overflow. - const APInt *C; - if (match(WO->getRHS(), m_APInt(C))) { + if (C) { // Compute the no-wrap range for LHS given RHS=C, then construct an // equivalent icmp, potentially using an offset. ConstantRange NWR = ConstantRange::makeExactNoWrapRegion( diff --git a/llvm/test/Transforms/InstCombine/with_overflow.ll b/llvm/test/Transforms/InstCombine/with_overflow.ll --- a/llvm/test/Transforms/InstCombine/with_overflow.ll +++ b/llvm/test/Transforms/InstCombine/with_overflow.ll @@ -793,13 +793,10 @@ ret <4 x i8> %r } -; TODO: partly failed to match vector constant with poison element - define <4 x i8> @smul_neg1_vec_poison(<4 x i8> %x, <4 x i1>* %p) { ; CHECK-LABEL: @smul_neg1_vec_poison( -; CHECK-NEXT: [[M:%.*]] = tail call { <4 x i8>, <4 x i1> } @llvm.smul.with.overflow.v4i8(<4 x i8> [[X:%.*]], <4 x i8> ) -; CHECK-NEXT: [[R:%.*]] = sub <4 x i8> zeroinitializer, [[X]] -; CHECK-NEXT: [[OV:%.*]] = extractvalue { <4 x i8>, <4 x i1> } [[M]], 1 +; CHECK-NEXT: [[R:%.*]] = sub <4 x i8> zeroinitializer, [[X:%.*]] +; CHECK-NEXT: [[OV:%.*]] = icmp eq <4 x i8> [[X]], ; CHECK-NEXT: store <4 x i1> [[OV]], <4 x i1>* [[P:%.*]], align 1 ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -853,13 +850,10 @@ ret <4 x i8> %r } -; TODO: partly failed to match vector constant with poison element - define <4 x i8> @umul_neg1_vec_poison(<4 x i8> %x, <4 x i1>* %p) { ; CHECK-LABEL: @umul_neg1_vec_poison( -; CHECK-NEXT: [[M:%.*]] = tail call { <4 x i8>, <4 x i1> } @llvm.umul.with.overflow.v4i8(<4 x i8> [[X:%.*]], <4 x i8> ) -; CHECK-NEXT: [[R:%.*]] = sub <4 x i8> zeroinitializer, [[X]] -; CHECK-NEXT: [[OV:%.*]] = extractvalue { <4 x i8>, <4 x i1> } [[M]], 1 +; CHECK-NEXT: [[R:%.*]] = sub <4 x i8> zeroinitializer, [[X:%.*]] +; CHECK-NEXT: [[OV:%.*]] = icmp ugt <4 x i8> [[X]], ; CHECK-NEXT: store <4 x i1> [[OV]], <4 x i1>* [[P:%.*]], align 1 ; CHECK-NEXT: ret <4 x i8> [[R]] ; @@ -870,6 +864,17 @@ ret <4 x i8> %r } +define <4 x i1> @smul_not_neg1_vec(<4 x i8> %x) { +; CHECK-LABEL: @smul_not_neg1_vec( +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i8> [[X:%.*]], +; CHECK-NEXT: [[OV:%.*]] = icmp ult <4 x i8> [[TMP1]], +; CHECK-NEXT: ret <4 x i1> [[OV]] +; + %m = call { <4 x i8>, <4 x i1> } @llvm.smul.with.overflow.v4i8(<4 x i8> %x, <4 x i8> ) + %ov = extractvalue { <4 x i8>, <4 x i1> } %m, 1 + ret <4 x i1> %ov +} + ; TODO: this could be 'shl' and 'icmp' define i8 @umul_2(i8 %x, i1* %p) {