Index: lib/Transforms/Scalar/CorrelatedValuePropagation.cpp =================================================================== --- lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -61,6 +61,7 @@ STATISTIC(NumSDivs, "Number of sdiv converted to udiv"); STATISTIC(NumUDivs, "Number of udivs whose width was decreased"); STATISTIC(NumAShrs, "Number of ashr converted to lshr"); +STATISTIC(NumLShrs, "Number of lshrs whose width was decreased"); STATISTIC(NumSRems, "Number of srem converted to urem"); STATISTIC(NumOverflows, "Number of overflow checks removed"); @@ -578,6 +579,56 @@ return true; } +static bool processLShr(BinaryOperator *Instr, LazyValueInfo *LVI) { + assert(Instr->getOpcode() == Instruction::LShr); + if (Instr->getType()->isVectorTy()) + return false; + + const auto OrigWidth = Instr->getType()->getIntegerBitWidth(); + + auto getUnsignedMaxVal = [&Instr, &LVI](unsigned Operand) -> APInt { + BasicBlock *BB = Instr->getParent(); + Value *Value = Instr->getOperand(Operand); + ConstantRange Range = LVI->getConstantRange(Value, BB, Instr); + return Range.getUnsignedMax(); + }; + + // What is maximal value for the first operand? + APInt LHS = getUnsignedMaxVal(/*Operand=*/0); + // If we don't know the range, don't bother going any further. + if (LHS.isAllOnesValue()) + return false; + uint64_t LWidth = LHS.getActiveBits(); + + // What is maximal value for the first operand? Might be a constant. + APInt RHS = getUnsignedMaxVal(/*Operand=*/1); + // The shift amount needs to be less than the bit width of the instruction. + // This also handles cases where shift amount is larger-than 64-bit value. + if (RHS.uge(OrigWidth)) + return false; + // The shift amount needs to be less than the width of the first operand. + // Thus, the "width of the second operand" is the shift amount plus one. + uint64_t RWidth = uint64_t(1) + RHS.getZExtValue(); + + // So what is the biggest width? + uint64_t NewWidth = std::max(LWidth, RWidth); + // Increase it to the next power-of-two. + NewWidth = PowerOf2Ceil(NewWidth); + // Don't shrink below 8 bits wide. + NewWidth = std::max(NewWidth, 8); + + // NewWidth might be greater than OrigWidth. Don't increase the width. + if (NewWidth >= OrigWidth) + return false; + + ++NumLShrs; + auto *BO = CreateTruncatedBO(Instr, NewWidth); + BO->setIsExact(Instr->isExact()); + Instr->eraseFromParent(); + + return true; +} + static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) { if (SDI->getType()->isVectorTy()) return false; @@ -594,6 +645,9 @@ SDI->replaceAllUsesWith(BO); SDI->eraseFromParent(); + // Try to process our new lshr. + processLShr(BO, LVI); + return true; } @@ -714,6 +768,9 @@ case Instruction::URem: BBChanged |= processUDivOrURem(cast(II), LVI); break; + case Instruction::LShr: + BBChanged |= processLShr(cast(II), LVI); + break; case Instruction::AShr: BBChanged |= processAShr(cast(II), LVI); break; Index: test/Transforms/CorrelatedValuePropagation/lshr.ll =================================================================== --- test/Transforms/CorrelatedValuePropagation/lshr.ll +++ test/Transforms/CorrelatedValuePropagation/lshr.ll @@ -9,7 +9,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[N:%.*]], 65534 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[N]], 7 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 7 to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -32,7 +35,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[N:%.*]], 65535 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[N]], 7 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 7 to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -52,7 +58,10 @@ define void @const_val1_variant2(i32 %n) { ; CHECK-LABEL: @const_val1_variant2( ; CHECK-NEXT: [[TRUNC:%.*]] = and i32 [[N:%.*]], 65535 -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[TRUNC]], 7 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[TRUNC]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 7 to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: ret void ; %trunc = and i32 %n, 65535 @@ -89,7 +98,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[N:%.*]], 65535 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[N]], 15 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 15 to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -155,7 +167,10 @@ define void @largeconst(i256 %n) { ; CHECK-LABEL: @largeconst( ; CHECK-NEXT: [[TRUNCN:%.*]] = and i256 [[N:%.*]], 1 -; CHECK-NEXT: [[DIV:%.*]] = lshr i256 [[TRUNCN]], 127 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i256 [[TRUNCN]] to i128 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i256 127 to i128 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i128 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i128 [[DIV1]] to i256 ; CHECK-NEXT: ret void ; %truncn = and i256 %n, 1 @@ -275,7 +290,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[M]], [[N]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[M]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -302,7 +320,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[M]], [[N]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[M]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -325,7 +346,10 @@ ; CHECK-LABEL: @var_val1_variant2( ; CHECK-NEXT: [[TRUNCM:%.*]] = and i32 [[N:%.*]], 65535 ; CHECK-NEXT: [[TRUNCN:%.*]] = and i32 [[N]], 7 -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[TRUNCM]], [[TRUNCN]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[TRUNCM]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[TRUNCN]] to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: ret void ; %truncm = and i32 %n, 65535 @@ -369,7 +393,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[M]], [[N]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[M]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -450,7 +477,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i256 [[M]], [[N]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i256 [[M]] to i128 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i256 [[N]] to i128 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i128 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i128 [[DIV1]] to i256 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -674,7 +704,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[N:%.*]], 65535 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr exact i32 [[N]], 6 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 6 to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr exact i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -699,7 +732,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr exact i32 [[M]], [[N]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[M]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr exact i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -724,7 +760,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[N:%.*]], 15 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[N]], 1 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[N]] to i8 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 1 to i8 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i8 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i8 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -749,7 +788,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[M]], [[N]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[M]] to i8 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[N]] to i8 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i8 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i8 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -774,7 +816,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[N:%.*]], 4095 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[N]], 1 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 1 to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -799,7 +844,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[M]], [[N]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[M]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void Index: test/Transforms/PhaseOrdering/udiv-urem-instcombine-vs-cvp.ll =================================================================== --- test/Transforms/PhaseOrdering/udiv-urem-instcombine-vs-cvp.ll +++ test/Transforms/PhaseOrdering/udiv-urem-instcombine-vs-cvp.ll @@ -75,16 +75,19 @@ ; ; CHECK-IC-CVP-LABEL: @poweroftwo( ; CHECK-IC-CVP-NEXT: [[ZA:%.*]] = zext i32 [[A:%.*]] to i64 -; CHECK-IC-CVP-NEXT: [[UDIV:%.*]] = lshr i64 [[ZA]], 5 +; CHECK-IC-CVP-NEXT: [[UDIV_LHS_TRUNC:%.*]] = trunc i64 [[ZA]] to i32 +; CHECK-IC-CVP-NEXT: [[UDIV_RHS_TRUNC:%.*]] = trunc i64 5 to i32 +; CHECK-IC-CVP-NEXT: [[UDIV1:%.*]] = lshr i32 [[UDIV_LHS_TRUNC]], [[UDIV_RHS_TRUNC]] +; CHECK-IC-CVP-NEXT: [[UDIV_ZEXT:%.*]] = zext i32 [[UDIV1]] to i64 ; CHECK-IC-CVP-NEXT: [[UREM:%.*]] = and i64 [[ZA]], 31 -; CHECK-IC-CVP-NEXT: [[UADD:%.*]] = add nuw nsw i64 [[UDIV]], [[UREM]] +; CHECK-IC-CVP-NEXT: [[UADD:%.*]] = add nuw nsw i64 [[UDIV_ZEXT]], [[UREM]] ; CHECK-IC-CVP-NEXT: ret i64 [[UADD]] ; ; CHECK-IC-CVP-IC-LABEL: @poweroftwo( -; CHECK-IC-CVP-IC-NEXT: [[ZA:%.*]] = zext i32 [[A:%.*]] to i64 -; CHECK-IC-CVP-IC-NEXT: [[UDIV:%.*]] = lshr i64 [[ZA]], 5 -; CHECK-IC-CVP-IC-NEXT: [[UREM:%.*]] = and i64 [[ZA]], 31 -; CHECK-IC-CVP-IC-NEXT: [[UADD:%.*]] = add nuw nsw i64 [[UDIV]], [[UREM]] +; CHECK-IC-CVP-IC-NEXT: [[UDIV1:%.*]] = lshr i32 [[A:%.*]], 5 +; CHECK-IC-CVP-IC-NEXT: [[TMP1:%.*]] = and i32 [[A]], 31 +; CHECK-IC-CVP-IC-NEXT: [[ADDCONV:%.*]] = add nuw nsw i32 [[UDIV1]], [[TMP1]] +; CHECK-IC-CVP-IC-NEXT: [[UADD:%.*]] = zext i32 [[ADDCONV]] to i64 ; CHECK-IC-CVP-IC-NEXT: ret i64 [[UADD]] ; %za = zext i32 %a to i64