Index: lib/Transforms/Scalar/CorrelatedValuePropagation.cpp =================================================================== --- lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -61,6 +61,7 @@ STATISTIC(NumSDivs, "Number of sdiv converted to udiv"); STATISTIC(NumUDivs, "Number of udivs whose width was decreased"); STATISTIC(NumAShrs, "Number of ashr converted to lshr"); +STATISTIC(NumLShrs, "Number of lshrs whose width was decreased"); STATISTIC(NumSRems, "Number of srem converted to urem"); STATISTIC(NumOverflows, "Number of overflow checks removed"); @@ -578,6 +579,64 @@ return true; } +static bool processLShr(BinaryOperator *Instr, LazyValueInfo *LVI) { + assert(Instr->getOpcode() == Instruction::LShr); + + if (Instr->getType()->isVectorTy()) + return false; + + auto getMaxVal = [&Instr, &LVI](unsigned Operand) -> APInt { + BasicBlock *BB = Instr->getParent(); + Value *Value = Instr->getOperand(Operand); + ConstantRange Range = LVI->getConstantRange(Value, BB, Instr); + return Range.getUnsignedMax(); + }; + + // What is maximal value for the first operand? + APInt LHS = getMaxVal(/*Operand=*/0); + // If we don't know the range, don't bother going any further. + if (LHS.isAllOnesValue()) + return false; + uint64_t LWidth = LHS.getActiveBits(); + + // What about the shift? This is tricky. + APInt RHS; + // If the shift is constant, then we can just use said constant. + if (auto *RHSConst = dyn_cast(Instr->getOperand(1))) + RHS = RHSConst->getValue(); + else + // Else, what is the maximal value for the second operand? + RHS = getMaxVal(/*Operand=*/1); + + // Avoid unsigned integer overflow. + // Could happen if data type is i64, and there is no data on constant range. + if (RHS.isAllOnesValue()) + return false; + + // The shift amount needs to be less than the width of the first operand. + // Thus, the "width of the second operand" is the shift amount plus one. + uint64_t RWidth = uint64_t(1) + RHS.getZExtValue(); + + // So what is the biggest width? + uint64_t NewWidth = std::max(LWidth, RWidth); + // Increase it to the next power-of-two. + NewWidth = PowerOf2Ceil(NewWidth); + // Don't shrink below 8 bits wide. + NewWidth = std::max(NewWidth, 8); + + // NewWidth might be greater than OrigWidth. Don't increase the width. + auto OrigWidth = Instr->getType()->getIntegerBitWidth(); + if (NewWidth >= OrigWidth) + return false; + + ++NumLShrs; + auto *BO = CreateTruncatedBO(Instr, NewWidth); + BO->setIsExact(Instr->isExact()); + Instr->eraseFromParent(); + + return true; +} + static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) { if (SDI->getType()->isVectorTy()) return false; @@ -594,6 +653,9 @@ SDI->replaceAllUsesWith(BO); SDI->eraseFromParent(); + // Try to process our new lshr. + processLShr(BO, LVI); + return true; } @@ -714,6 +776,9 @@ case Instruction::URem: BBChanged |= processUDivOrURem(cast(II), LVI); break; + case Instruction::LShr: + BBChanged |= processLShr(cast(II), LVI); + break; case Instruction::AShr: BBChanged |= processAShr(cast(II), LVI); break; Index: test/Transforms/CorrelatedValuePropagation/lshr.ll =================================================================== --- test/Transforms/CorrelatedValuePropagation/lshr.ll +++ test/Transforms/CorrelatedValuePropagation/lshr.ll @@ -9,7 +9,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[N:%.*]], 65534 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[N]], 7 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 7 to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -32,7 +35,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[N:%.*]], 65535 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[N]], 7 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 7 to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -52,7 +58,10 @@ define void @const_val1_variant2(i32 %n) { ; CHECK-LABEL: @const_val1_variant2( ; CHECK-NEXT: [[TRUNC:%.*]] = and i32 [[N:%.*]], 65535 -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[TRUNC]], 7 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[TRUNC]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 7 to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: ret void ; %trunc = and i32 %n, 65535 @@ -89,7 +98,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[N:%.*]], 65535 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[N]], 15 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 15 to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -253,7 +265,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[M]], [[N]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[M]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -280,7 +295,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[M]], [[N]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[M]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -303,7 +321,10 @@ ; CHECK-LABEL: @var_val1_variant2( ; CHECK-NEXT: [[TRUNCM:%.*]] = and i32 [[N:%.*]], 65535 ; CHECK-NEXT: [[TRUNCN:%.*]] = and i32 [[N]], 7 -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[TRUNCM]], [[TRUNCN]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[TRUNCM]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[TRUNCN]] to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: ret void ; %truncm = and i32 %n, 65535 @@ -347,7 +368,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[M]], [[N]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[M]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -598,7 +622,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[N:%.*]], 65535 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr exact i32 [[N]], 6 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 6 to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr exact i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -623,7 +650,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr exact i32 [[M]], [[N]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[M]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr exact i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -648,7 +678,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[N:%.*]], 15 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[N]], 1 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[N]] to i8 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 1 to i8 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i8 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i8 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -673,7 +706,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[M]], [[N]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[M]] to i8 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[N]] to i8 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i8 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i8 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -698,7 +734,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[N:%.*]], 4095 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[N]], 1 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 1 to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -723,7 +762,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[M]], [[N]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[M]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void Index: test/Transforms/PhaseOrdering/udiv-urem-instcombine-vs-cvp.ll =================================================================== --- test/Transforms/PhaseOrdering/udiv-urem-instcombine-vs-cvp.ll +++ test/Transforms/PhaseOrdering/udiv-urem-instcombine-vs-cvp.ll @@ -67,9 +67,12 @@ ; ; CHECK-CVP-AFTER-INSTCOMBINE-LABEL: @poweroftwo( ; CHECK-CVP-AFTER-INSTCOMBINE-NEXT: [[ZA:%.*]] = zext i32 [[A:%.*]] to i64 -; CHECK-CVP-AFTER-INSTCOMBINE-NEXT: [[UDIV:%.*]] = lshr i64 [[ZA]], 5 +; CHECK-CVP-AFTER-INSTCOMBINE-NEXT: [[UDIV_LHS_TRUNC:%.*]] = trunc i64 [[ZA]] to i32 +; CHECK-CVP-AFTER-INSTCOMBINE-NEXT: [[UDIV_RHS_TRUNC:%.*]] = trunc i64 5 to i32 +; CHECK-CVP-AFTER-INSTCOMBINE-NEXT: [[UDIV1:%.*]] = lshr i32 [[UDIV_LHS_TRUNC]], [[UDIV_RHS_TRUNC]] +; CHECK-CVP-AFTER-INSTCOMBINE-NEXT: [[UDIV_ZEXT:%.*]] = zext i32 [[UDIV1]] to i64 ; CHECK-CVP-AFTER-INSTCOMBINE-NEXT: [[UREM:%.*]] = and i64 [[ZA]], 31 -; CHECK-CVP-AFTER-INSTCOMBINE-NEXT: [[UADD:%.*]] = add nuw nsw i64 [[UDIV]], [[UREM]] +; CHECK-CVP-AFTER-INSTCOMBINE-NEXT: [[UADD:%.*]] = add nuw nsw i64 [[UDIV_ZEXT]], [[UREM]] ; CHECK-CVP-AFTER-INSTCOMBINE-NEXT: ret i64 [[UADD]] ; %za = zext i32 %a to i64