diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3254,15 +3254,20 @@ if (!WO) return nullptr; - // extractvalue (any_mul_with_overflow X, -1), 0 --> -X Intrinsic::ID OvID = WO->getIntrinsicID(); const APInt *C = nullptr; if (match(WO->getRHS(), m_APIntAllowUndef(C))) { - if (*EV.idx_begin() == 0 && - (OvID == Intrinsic::smul_with_overflow || - OvID == Intrinsic::umul_with_overflow) && - C->isAllOnes()) { - return BinaryOperator::CreateNeg(WO->getArgOperand(0)); + if (*EV.idx_begin() == 0 && (OvID == Intrinsic::smul_with_overflow || + OvID == Intrinsic::umul_with_overflow)) { + // extractvalue (any_mul_with_overflow X, -1), 0 --> -X + if (C->isAllOnes()) + return BinaryOperator::CreateNeg(WO->getLHS()); + // extractvalue (any_mul_with_overflow X, 2^n), 0 --> X << n + if (C->isPowerOf2()) { + return BinaryOperator::CreateShl( + WO->getLHS(), + ConstantInt::get(WO->getLHS()->getType(), C->logBase2())); + } } } diff --git a/llvm/test/Transforms/InstCombine/with_overflow.ll b/llvm/test/Transforms/InstCombine/with_overflow.ll --- a/llvm/test/Transforms/InstCombine/with_overflow.ll +++ b/llvm/test/Transforms/InstCombine/with_overflow.ll @@ -254,11 +254,10 @@ define i32 @umultest4(i32 %n) nounwind { ; CHECK-LABEL: @umultest4( -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[N:%.*]], 1 -; CHECK-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[SHR]], i32 4) -; CHECK-NEXT: [[OV:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 -; CHECK-NEXT: [[RES:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 -; CHECK-NEXT: [[RET:%.*]] = select i1 [[OV]], i32 -1, i32 [[RES]] +; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[N:%.*]], 1 +; CHECK-NEXT: [[RES:%.*]] = and i32 [[TMP1]], -4 +; CHECK-NEXT: [[OV_INV:%.*]] = icmp sgt i32 [[N]], -1 +; CHECK-NEXT: [[RET:%.*]] = select i1 [[OV_INV]], i32 [[RES]], i32 -1 ; CHECK-NEXT: ret i32 [[RET]] ; %shr = lshr i32 %n, 1 @@ -890,13 +889,10 @@ ret i8 %r } -; TODO: this could be 'shl' and 'icmp' - define i8 @umul_2(i8 %x, i1* %p) { ; CHECK-LABEL: @umul_2( -; CHECK-NEXT: [[M:%.*]] = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X:%.*]], i8 2) -; CHECK-NEXT: [[R:%.*]] = extractvalue { i8, i1 } [[M]], 0 -; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[M]], 1 +; CHECK-NEXT: [[R:%.*]] = shl i8 [[X:%.*]], 1 +; CHECK-NEXT: [[OV:%.*]] = icmp slt i8 [[X]], 0 ; CHECK-NEXT: store i1 [[OV]], i1* [[P:%.*]], align 1 ; CHECK-NEXT: ret i8 [[R]] ; @@ -909,9 +905,8 @@ define i8 @umul_8(i8 %x, i1* %p) { ; CHECK-LABEL: @umul_8( -; CHECK-NEXT: [[M:%.*]] = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X:%.*]], i8 8) -; CHECK-NEXT: [[R:%.*]] = extractvalue { i8, i1 } [[M]], 0 -; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[M]], 1 +; CHECK-NEXT: [[R:%.*]] = shl i8 [[X:%.*]], 3 +; CHECK-NEXT: [[OV:%.*]] = icmp ugt i8 [[X]], 31 ; CHECK-NEXT: store i1 [[OV]], i1* [[P:%.*]], align 1 ; CHECK-NEXT: ret i8 [[R]] ; @@ -924,9 +919,8 @@ define i8 @umul_64(i8 %x, i1* %p) { ; CHECK-LABEL: @umul_64( -; CHECK-NEXT: [[M:%.*]] = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X:%.*]], i8 64) -; CHECK-NEXT: [[R:%.*]] = extractvalue { i8, i1 } [[M]], 0 -; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[M]], 1 +; CHECK-NEXT: [[R:%.*]] = shl i8 [[X:%.*]], 6 +; CHECK-NEXT: [[OV:%.*]] = icmp ugt i8 [[X]], 3 ; CHECK-NEXT: store i1 [[OV]], i1* [[P:%.*]], align 1 ; CHECK-NEXT: ret i8 [[R]] ; @@ -949,6 +943,8 @@ ret i8 %r } +; Negative test: not PowerOf2 + define i8 @umul_3(i8 %x, i1* %p) { ; CHECK-LABEL: @umul_3( ; CHECK-NEXT: [[M:%.*]] = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[X:%.*]], i8 3) @@ -966,9 +962,9 @@ define i8 @smul_4(i8 %x, i1* %p) { ; CHECK-LABEL: @smul_4( -; CHECK-NEXT: [[M:%.*]] = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[X:%.*]], i8 4) -; CHECK-NEXT: [[R:%.*]] = extractvalue { i8, i1 } [[M]], 0 -; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[M]], 1 +; CHECK-NEXT: [[R:%.*]] = shl i8 [[X:%.*]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], -32 +; CHECK-NEXT: [[OV:%.*]] = icmp ult i8 [[TMP1]], -64 ; CHECK-NEXT: store i1 [[OV]], i1* [[P:%.*]], align 1 ; CHECK-NEXT: ret i8 [[R]] ; @@ -981,9 +977,9 @@ define i8 @smul_16(i8 %x, i1* %p) { ; CHECK-LABEL: @smul_16( -; CHECK-NEXT: [[M:%.*]] = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[X:%.*]], i8 16) -; CHECK-NEXT: [[R:%.*]] = extractvalue { i8, i1 } [[M]], 0 -; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[M]], 1 +; CHECK-NEXT: [[R:%.*]] = shl i8 [[X:%.*]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], -8 +; CHECK-NEXT: [[OV:%.*]] = icmp ult i8 [[TMP1]], -16 ; CHECK-NEXT: store i1 [[OV]], i1* [[P:%.*]], align 1 ; CHECK-NEXT: ret i8 [[R]] ; @@ -996,9 +992,9 @@ define i8 @smul_32(i8 %x, i1* %p) { ; CHECK-LABEL: @smul_32( -; CHECK-NEXT: [[M:%.*]] = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[X:%.*]], i8 32) -; CHECK-NEXT: [[R:%.*]] = extractvalue { i8, i1 } [[M]], 0 -; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[M]], 1 +; CHECK-NEXT: [[R:%.*]] = shl i8 [[X:%.*]], 5 +; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], -4 +; CHECK-NEXT: [[OV:%.*]] = icmp ult i8 [[TMP1]], -8 ; CHECK-NEXT: store i1 [[OV]], i1* [[P:%.*]], align 1 ; CHECK-NEXT: ret i8 [[R]] ; @@ -1011,9 +1007,8 @@ define i8 @smul_128(i8 %x, i1* %p) { ; CHECK-LABEL: @smul_128( -; CHECK-NEXT: [[M:%.*]] = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[X:%.*]], i8 -128) -; CHECK-NEXT: [[R:%.*]] = extractvalue { i8, i1 } [[M]], 0 -; CHECK-NEXT: [[OV:%.*]] = extractvalue { i8, i1 } [[M]], 1 +; CHECK-NEXT: [[R:%.*]] = shl i8 [[X:%.*]], 7 +; CHECK-NEXT: [[OV:%.*]] = icmp ugt i8 [[X]], 1 ; CHECK-NEXT: store i1 [[OV]], i1* [[P:%.*]], align 1 ; CHECK-NEXT: ret i8 [[R]] ; @@ -1024,6 +1019,8 @@ ret i8 %r } +; Negative test: not PowerOf2 + define i8 @smul_7(i8 %x, i1* %p) { ; CHECK-LABEL: @smul_7( ; CHECK-NEXT: [[M:%.*]] = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 [[X:%.*]], i8 7)