diff --git a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp --- a/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/TruncInstCombine.cpp @@ -65,6 +65,8 @@ case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: + case Instruction::UDiv: + case Instruction::URem: Ops.push_back(I->getOperand(0)); Ops.push_back(I->getOperand(1)); break; @@ -134,6 +136,8 @@ case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: + case Instruction::UDiv: + case Instruction::URem: case Instruction::Select: { SmallVector Operands; getRelevantOperands(I, Operands); @@ -143,8 +147,7 @@ default: // TODO: Can handle more cases here: // 1. shufflevector, extractelement, insertelement - // 2. udiv, urem - // 3. phi node(and loop handling) + // 2. phi node(and loop handling) // ... return false; } @@ -306,6 +309,19 @@ return nullptr; Itr.second.MinBitWidth = MinBitWidth; } + if (I->getOpcode() == Instruction::UDiv || + I->getOpcode() == Instruction::URem) { + KnownBits KnownLHS = computeKnownBits(I->getOperand(0)); + unsigned MinBitWidth = KnownLHS.getMaxValue().getActiveBits(); + if (MinBitWidth >= OrigBitWidth) + return nullptr; + KnownBits KnownRHS = computeKnownBits(I->getOperand(1)); + MinBitWidth = + std::max(MinBitWidth, KnownRHS.getMaxValue().getActiveBits()); + if (MinBitWidth >= OrigBitWidth) + return nullptr; + Itr.second.MinBitWidth = MinBitWidth; + } } // Calculate minimum allowed bit-width allowed for shrinking the currently @@ -397,7 +413,9 @@ case Instruction::Xor: case Instruction::Shl: case Instruction::LShr: - case Instruction::AShr: { + case Instruction::AShr: + case Instruction::UDiv: + case Instruction::URem: { Value *LHS = getReducedOperand(I->getOperand(0), SclTy); Value *RHS = getReducedOperand(I->getOperand(1), SclTy); Res = Builder.CreateBinOp((Instruction::BinaryOps)Opc, LHS, RHS); diff --git a/llvm/test/Transforms/AggressiveInstCombine/trunc_udivrem.ll b/llvm/test/Transforms/AggressiveInstCombine/trunc_udivrem.ll --- a/llvm/test/Transforms/AggressiveInstCombine/trunc_udivrem.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/trunc_udivrem.ll @@ -3,10 +3,9 @@ define i16 @udiv_one_arg(i8 %x) { ; CHECK-LABEL: @udiv_one_arg( -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[X:%.*]] to i32 -; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[ZEXT]], 42 -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[DIV]] to i16 -; CHECK-NEXT: ret i16 [[TRUNC]] +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[X:%.*]] to i16 +; CHECK-NEXT: [[DIV:%.*]] = udiv i16 [[ZEXT]], 42 +; CHECK-NEXT: ret i16 [[DIV]] ; %zext = zext i8 %x to i32 %div = udiv i32 %zext, 42 @@ -16,11 +15,8 @@ define i16 @udiv_two_args(i16 %x, i16 %y) { ; CHECK-LABEL: @udiv_two_args( -; CHECK-NEXT: [[ZEXTX:%.*]] = zext i16 [[X:%.*]] to i32 -; CHECK-NEXT: [[ZEXTY:%.*]] = zext i16 [[X]] to i32 -; CHECK-NEXT: [[I0:%.*]] = udiv i32 [[ZEXTX]], [[ZEXTY]] -; CHECK-NEXT: [[R:%.*]] = trunc i32 [[I0]] to i16 -; CHECK-NEXT: ret i16 [[R]] +; CHECK-NEXT: [[I0:%.*]] = udiv i16 [[X:%.*]], [[X]] +; CHECK-NEXT: ret i16 [[I0]] ; %zextx = zext i16 %x to i32 %zexty = zext i16 %x to i32 @@ -29,6 +25,7 @@ ret i16 %r } +; Negative test define i16 @udiv_big_const(i8 %x) { ; CHECK-LABEL: @udiv_big_const( ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[X:%.*]] to i32 @@ -44,10 +41,9 @@ define <2 x i16> @udiv_vector(<2 x i8> %x) { ; CHECK-LABEL: @udiv_vector( -; CHECK-NEXT: [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[S:%.*]] = udiv <2 x i32> [[Z]], -; CHECK-NEXT: [[T:%.*]] = trunc <2 x i32> [[S]] to <2 x i16> -; CHECK-NEXT: ret <2 x i16> [[T]] +; CHECK-NEXT: [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i16> +; CHECK-NEXT: [[S:%.*]] = udiv <2 x i16> [[Z]], +; CHECK-NEXT: ret <2 x i16> [[S]] ; %z = zext <2 x i8> %x to <2 x i32> %s = udiv <2 x i32> %z, @@ -55,6 +51,7 @@ ret <2 x i16> %t } +; Negative test: can only fold to <2 x i16>, requiring new vector type define <2 x i8> @udiv_vector_need_new_vector_type(<2 x i8> %x) { ; CHECK-LABEL: @udiv_vector_need_new_vector_type( ; CHECK-NEXT: [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> @@ -68,6 +65,7 @@ ret <2 x i8> %t } +; Negative test define <2 x i16> @udiv_vector_big_const(<2 x i8> %x) { ; CHECK-LABEL: @udiv_vector_big_const( ; CHECK-NEXT: [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> @@ -83,11 +81,8 @@ define i16 @udiv_exact(i16 %x, i16 %y) { ; CHECK-LABEL: @udiv_exact( -; CHECK-NEXT: [[ZEXTX:%.*]] = zext i16 [[X:%.*]] to i32 -; CHECK-NEXT: [[ZEXTY:%.*]] = zext i16 [[X]] to i32 -; CHECK-NEXT: [[I0:%.*]] = udiv exact i32 [[ZEXTX]], [[ZEXTY]] -; CHECK-NEXT: [[R:%.*]] = trunc i32 [[I0]] to i16 -; CHECK-NEXT: ret i16 [[R]] +; CHECK-NEXT: [[I0:%.*]] = udiv exact i16 [[X:%.*]], [[X]] +; CHECK-NEXT: ret i16 [[I0]] ; %zextx = zext i16 %x to i32 %zexty = zext i16 %x to i32 @@ -99,10 +94,9 @@ define i16 @urem_one_arg(i8 %x) { ; CHECK-LABEL: @urem_one_arg( -; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[X:%.*]] to i32 -; CHECK-NEXT: [[DIV:%.*]] = urem i32 [[ZEXT]], 42 -; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[DIV]] to i16 -; CHECK-NEXT: ret i16 [[TRUNC]] +; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[X:%.*]] to i16 +; CHECK-NEXT: [[DIV:%.*]] = urem i16 [[ZEXT]], 42 +; CHECK-NEXT: ret i16 [[DIV]] ; %zext = zext i8 %x to i32 %div = urem i32 %zext, 42 @@ -112,11 +106,8 @@ define i16 @urem_two_args(i16 %x, i16 %y) { ; CHECK-LABEL: @urem_two_args( -; CHECK-NEXT: [[ZEXTX:%.*]] = zext i16 [[X:%.*]] to i32 -; CHECK-NEXT: [[ZEXTY:%.*]] = zext i16 [[X]] to i32 -; CHECK-NEXT: [[I0:%.*]] = urem i32 [[ZEXTX]], [[ZEXTY]] -; CHECK-NEXT: [[R:%.*]] = trunc i32 [[I0]] to i16 -; CHECK-NEXT: ret i16 [[R]] +; CHECK-NEXT: [[I0:%.*]] = urem i16 [[X:%.*]], [[X]] +; CHECK-NEXT: ret i16 [[I0]] ; %zextx = zext i16 %x to i32 %zexty = zext i16 %x to i32 @@ -125,6 +116,7 @@ ret i16 %r } +; Negative test define i16 @urem_big_const(i8 %x) { ; CHECK-LABEL: @urem_big_const( ; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[X:%.*]] to i32 @@ -140,10 +132,9 @@ define <2 x i16> @urem_vector(<2 x i8> %x) { ; CHECK-LABEL: @urem_vector( -; CHECK-NEXT: [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[S:%.*]] = urem <2 x i32> [[Z]], -; CHECK-NEXT: [[T:%.*]] = trunc <2 x i32> [[S]] to <2 x i16> -; CHECK-NEXT: ret <2 x i16> [[T]] +; CHECK-NEXT: [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i16> +; CHECK-NEXT: [[S:%.*]] = urem <2 x i16> [[Z]], +; CHECK-NEXT: ret <2 x i16> [[S]] ; %z = zext <2 x i8> %x to <2 x i32> %s = urem <2 x i32> %z, @@ -151,6 +142,7 @@ ret <2 x i16> %t } +; Negative test: can only fold to <2 x i16>, requiring new vector type define <2 x i8> @urem_vector_need_new_vector_type(<2 x i8> %x) { ; CHECK-LABEL: @urem_vector_need_new_vector_type( ; CHECK-NEXT: [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32> @@ -164,6 +156,7 @@ ret <2 x i8> %t } +; Negative test define <2 x i16> @urem_vector_big_const(<2 x i8> %x) { ; CHECK-LABEL: @urem_vector_big_const( ; CHECK-NEXT: [[Z:%.*]] = zext <2 x i8> [[X:%.*]] to <2 x i32>