Index: lib/Transforms/InstCombine/InstCombineMulDivRem.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -969,19 +969,6 @@ return nullptr; } -/// dyn_castZExtVal - Checks if V is a zext or constant that can -/// be truncated to Ty without losing bits. -static Value *dyn_castZExtVal(Value *V, Type *Ty) { - if (ZExtInst *Z = dyn_cast(V)) { - if (Z->getSrcTy() == Ty) - return Z->getOperand(0); - } else if (ConstantInt *C = dyn_cast(V)) { - if (C->getValue().getActiveBits() <= cast(Ty)->getBitWidth()) - return ConstantExpr::getTrunc(C, Ty); - } - return nullptr; -} - namespace { const unsigned MaxDepth = 6; typedef Instruction *(*FoldUDivOperandCb)(Value *Op0, Value *Op1, @@ -1095,6 +1082,43 @@ return 0; } +/// If we have zero-extended operands of an unsigned div or rem, we may be able +/// to narrow the operation (sink the zext below the math). +static Instruction *narrowUDivURem(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + Instruction::BinaryOps Opcode = I.getOpcode(); + Value *N = I.getOperand(0); + Value *D = I.getOperand(1); + Type *Ty = I.getType(); + Value *X, *Y; + if (match(N, m_ZExt(m_Value(X))) && match(D, m_ZExt(m_Value(Y))) && + X->getType() == Y->getType() && (N->hasOneUse() || D->hasOneUse())) { + // udiv (zext X), (zext Y) --> zext (udiv X, Y) + // urem (zext X), (zext Y) --> zext (urem X, Y) + Value *NarrowOp = Builder.CreateBinOp(Opcode, X, Y); + return new ZExtInst(NarrowOp, Ty); + } + + Constant *C; + if ((match(N, m_OneUse(m_ZExt(m_Value(X)))) && match(D, m_Constant(C))) || + (match(D, m_OneUse(m_ZExt(m_Value(X)))) && match(N, m_Constant(C)))) { + // If the constant is the same in the smaller type, use the narrow version. + Constant *TruncC = ConstantExpr::getTrunc(C, X->getType()); + if (ConstantExpr::getZExt(TruncC, Ty) != C) + return nullptr; + + // udiv (zext X), C --> zext (udiv X, C') + // urem (zext X), C --> zext (urem X, C') + // udiv C, (zext X) --> zext (udiv C', X) + // urem C, (zext X) --> zext (urem C', X) + Value *NarrowOp = isa(D) ? Builder.CreateBinOp(Opcode, X, TruncC) + : Builder.CreateBinOp(Opcode, TruncC, X); + return new ZExtInst(NarrowOp, Ty); + } + + return nullptr; +} + Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -1127,12 +1151,8 @@ } } - // (zext A) udiv (zext B) --> zext (A udiv B) - if (ZExtInst *ZOp0 = dyn_cast(Op0)) - if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) - return new ZExtInst( - Builder.CreateUDiv(ZOp0->getOperand(0), ZOp1, "div", I.isExact()), - I.getType()); + if (Instruction *NarrowDiv = narrowUDivURem(I, Builder)) + return NarrowDiv; // (LHS udiv (select (select (...)))) -> (LHS >> (select (select (...)))) SmallVector UDivActions; @@ -1477,11 +1497,8 @@ if (Instruction *common = commonIRemTransforms(I)) return common; - // (zext A) urem (zext B) --> zext (A urem B) - if (ZExtInst *ZOp0 = dyn_cast(Op0)) - if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) - return new ZExtInst(Builder.CreateURem(ZOp0->getOperand(0), ZOp1), - I.getType()); + if (Instruction *NarrowRem = narrowUDivURem(I, Builder)) + return NarrowRem; // X urem Y -> X and Y-1, where Y is a power of 2, if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) { Index: test/Transforms/InstCombine/udivrem-change-width.ll =================================================================== --- test/Transforms/InstCombine/udivrem-change-width.ll +++ test/Transforms/InstCombine/udivrem-change-width.ll @@ -79,10 +79,9 @@ ; CHECK-LABEL: @udiv_i32_multiuse( ; CHECK-NEXT: [[ZA:%.*]] = zext i8 %a to i32 ; CHECK-NEXT: [[ZB:%.*]] = zext i8 %b to i32 -; CHECK-NEXT: [[DIV:%.*]] = udiv i8 %a, %b -; CHECK-NEXT: [[UDIV:%.*]] = zext i8 [[DIV]] to i32 +; CHECK-NEXT: [[UDIV:%.*]] = udiv i32 [[ZA]], [[ZB]] ; CHECK-NEXT: [[EXTRA_USES:%.*]] = add nuw nsw i32 [[ZA]], [[ZB]] -; CHECK-NEXT: [[R:%.*]] = mul nuw nsw i32 [[EXTRA_USES]], [[UDIV]] +; CHECK-NEXT: [[R:%.*]] = mul nuw nsw i32 [[UDIV]], [[EXTRA_USES]] ; CHECK-NEXT: ret i32 [[R]] ; %za = zext i8 %a to i32 @@ -133,10 +132,9 @@ ; CHECK-LABEL: @urem_i32_multiuse( ; CHECK-NEXT: [[ZA:%.*]] = zext i8 %a to i32 ; CHECK-NEXT: [[ZB:%.*]] = zext i8 %b to i32 -; CHECK-NEXT: [[TMP1:%.*]] = urem i8 %a, %b -; CHECK-NEXT: [[UREM:%.*]] = zext i8 [[TMP1]] to i32 +; CHECK-NEXT: [[UREM:%.*]] = urem i32 [[ZA]], [[ZB]] ; CHECK-NEXT: [[EXTRA_USES:%.*]] = add nuw nsw i32 [[ZA]], [[ZB]] -; CHECK-NEXT: [[R:%.*]] = mul nuw nsw i32 [[EXTRA_USES]], [[UREM]] +; CHECK-NEXT: [[R:%.*]] = mul nuw nsw i32 [[UREM]], [[EXTRA_USES]] ; CHECK-NEXT: ret i32 [[R]] ; %za = zext i8 %a to i32 @@ -172,8 +170,8 @@ define <2 x i32> @udiv_i32_c_vec(<2 x i8> %a) { ; CHECK-LABEL: @udiv_i32_c_vec( -; CHECK-NEXT: [[ZA:%.*]] = zext <2 x i8> %a to <2 x i32> -; CHECK-NEXT: [[UDIV:%.*]] = udiv <2 x i32> [[ZA]], +; CHECK-NEXT: [[TMP1:%.*]] = udiv <2 x i8> %a, +; CHECK-NEXT: [[UDIV:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[UDIV]] ; %za = zext <2 x i8> %a to <2 x i32> @@ -184,9 +182,8 @@ define i32 @udiv_i32_c_multiuse(i8 %a) { ; CHECK-LABEL: @udiv_i32_c_multiuse( ; CHECK-NEXT: [[ZA:%.*]] = zext i8 %a to i32 -; CHECK-NEXT: [[DIV:%.*]] = udiv i8 %a, 10 -; CHECK-NEXT: [[UDIV:%.*]] = zext i8 [[DIV]] to i32 -; CHECK-NEXT: [[EXTRA_USE:%.*]] = add nuw nsw i32 [[ZA]], [[UDIV]] +; CHECK-NEXT: [[UDIV:%.*]] = udiv i32 [[ZA]], 10 +; CHECK-NEXT: [[EXTRA_USE:%.*]] = add nuw nsw i32 [[UDIV]], [[ZA]] ; CHECK-NEXT: ret i32 [[EXTRA_USE]] ; %za = zext i8 %a to i32 @@ -219,8 +216,8 @@ define <2 x i32> @urem_i32_c_vec(<2 x i8> %a) { ; CHECK-LABEL: @urem_i32_c_vec( -; CHECK-NEXT: [[ZA:%.*]] = zext <2 x i8> %a to <2 x i32> -; CHECK-NEXT: [[UREM:%.*]] = urem <2 x i32> [[ZA]], +; CHECK-NEXT: [[TMP1:%.*]] = urem <2 x i8> %a, +; CHECK-NEXT: [[UREM:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[UREM]] ; %za = zext <2 x i8> %a to <2 x i32> @@ -231,9 +228,8 @@ define i32 @urem_i32_c_multiuse(i8 %a) { ; CHECK-LABEL: @urem_i32_c_multiuse( ; CHECK-NEXT: [[ZA:%.*]] = zext i8 %a to i32 -; CHECK-NEXT: [[TMP1:%.*]] = urem i8 %a, 10 -; CHECK-NEXT: [[UREM:%.*]] = zext i8 [[TMP1]] to i32 -; CHECK-NEXT: [[EXTRA_USE:%.*]] = add nuw nsw i32 [[ZA]], [[UREM]] +; CHECK-NEXT: [[UREM:%.*]] = urem i32 [[ZA]], 10 +; CHECK-NEXT: [[EXTRA_USE:%.*]] = add nuw nsw i32 [[UREM]], [[ZA]] ; CHECK-NEXT: ret i32 [[EXTRA_USE]] ; %za = zext i8 %a to i32 @@ -255,8 +251,8 @@ define i32 @udiv_c_i32(i8 %a) { ; CHECK-LABEL: @udiv_c_i32( -; CHECK-NEXT: [[ZA:%.*]] = zext i8 %a to i32 -; CHECK-NEXT: [[UDIV:%.*]] = udiv i32 10, [[ZA]] +; CHECK-NEXT: [[TMP1:%.*]] = udiv i8 10, %a +; CHECK-NEXT: [[UDIV:%.*]] = zext i8 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[UDIV]] ; %za = zext i8 %a to i32 @@ -266,8 +262,8 @@ define i32 @urem_c_i32(i8 %a) { ; CHECK-LABEL: @urem_c_i32( -; CHECK-NEXT: [[ZA:%.*]] = zext i8 %a to i32 -; CHECK-NEXT: [[UREM:%.*]] = urem i32 10, [[ZA]] +; CHECK-NEXT: [[TMP1:%.*]] = urem i8 10, %a +; CHECK-NEXT: [[UREM:%.*]] = zext i8 [[TMP1]] to i32 ; CHECK-NEXT: ret i32 [[UREM]] ; %za = zext i8 %a to i32