Index: lib/Transforms/Scalar/CorrelatedValuePropagation.cpp =================================================================== --- lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -61,6 +61,7 @@ STATISTIC(NumSDivs, "Number of sdiv converted to udiv"); STATISTIC(NumUDivs, "Number of udivs whose width was decreased"); STATISTIC(NumAShrs, "Number of ashr converted to lshr"); +STATISTIC(NumLShrs, "Number of lshrs whose width was decreased"); STATISTIC(NumSRems, "Number of srem converted to urem"); STATISTIC(NumOverflows, "Number of overflow checks removed"); @@ -493,6 +494,21 @@ return true; } +BinaryOperator *CreateTruncatedBO(BinaryOperator *Instr, unsigned NewWidth) { + auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth); + auto *LHS = CastInst::Create(Instruction::Trunc, Instr->getOperand(0), + TruncTy, Instr->getName() + ".lhs.trunc", Instr); + auto *RHS = CastInst::Create(Instruction::Trunc, Instr->getOperand(1), + TruncTy, Instr->getName() + ".rhs.trunc", Instr); + auto *BO = BinaryOperator::Create(Instr->getOpcode(), LHS, RHS, + Instr->getName(), Instr); + auto *Zext = CastInst::Create(Instruction::ZExt, BO, Instr->getType(), + Instr->getName() + ".zext", Instr); + Instr->replaceAllUsesWith(Zext); + Instr->eraseFromParent(); + return BO; +} + /// Try to shrink a udiv/urem's width down to the smallest power of two that's /// sufficient to contain its operands. static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) { @@ -518,20 +534,13 @@ return false; ++NumUDivs; - auto *TruncTy = Type::getIntNTy(Instr->getContext(), NewWidth); - auto *LHS = CastInst::Create(Instruction::Trunc, Instr->getOperand(0), TruncTy, - Instr->getName() + ".lhs.trunc", Instr); - auto *RHS = CastInst::Create(Instruction::Trunc, Instr->getOperand(1), TruncTy, - Instr->getName() + ".rhs.trunc", Instr); - auto *BO = - BinaryOperator::Create(Instr->getOpcode(), LHS, RHS, Instr->getName(), Instr); - auto *Zext = CastInst::Create(Instruction::ZExt, BO, Instr->getType(), - Instr->getName() + ".zext", Instr); - if (BO->getOpcode() == Instruction::UDiv) - BO->setIsExact(Instr->isExact()); + llvm::Optional IsExact; + if (Instr->getOpcode() == Instruction::UDiv) + IsExact = Instr->isExact(); + auto *BO = CreateTruncatedBO(Instr, NewWidth); + if (IsExact.hasValue()) + BO->setIsExact(IsExact.getValue()); - Instr->replaceAllUsesWith(Zext); - Instr->eraseFromParent(); return true; } @@ -573,6 +582,64 @@ return true; } +static bool processLShr(BinaryOperator *Instr, LazyValueInfo *LVI) { + assert(Instr->getOpcode() == Instruction::LShr); + + if (Instr->getType()->isVectorTy()) + return false; + + auto getMaxVal = [&Instr, &LVI](unsigned Operand) -> APInt { + BasicBlock *BB = Instr->getParent(); + Value *Value = Instr->getOperand(Operand); + ConstantRange Range = LVI->getConstantRange(Value, BB, Instr); + return Range.getUnsignedMax(); + }; + + // What is maximal value for the first operand? + APInt LHS = getMaxVal(/*Operand=*/0); + // If we don't know the range, don't bother going any further. + if (LHS.isAllOnesValue()) + return false; + uint64_t LWidth = LHS.getActiveBits(); + + // What about the shift? This is tricky. + APInt RHS; + // If the shift is constant, then we can just use said constant. + if (auto *RHSConst = dyn_cast(Instr->getOperand(1))) + RHS = RHSConst->getValue(); + else + // Else, what is the maximal value for the second operand? + RHS = getMaxVal(/*Operand=*/1); + + // Avoid unsigned integer overflow. + // Could happen if data type is i64, and there is no data constant range. + if (RHS.isAllOnesValue()) + return false; + + // The shift amount needs to be less than the width of the first operand. + // Thus, the "width of the second operand" is the shift amount plus one. + uint64_t RWidth = uint64_t(1) + RHS.getZExtValue(); + + // So what is the biggest width? + uint64_t NewWidth = std::max(LWidth, RWidth); + // Increase it to the next power-of-two + NewWidth = PowerOf2Ceil(NewWidth); + // Don't shrink below 8 bits wide. + NewWidth = std::max(NewWidth, 8); + + // NewWidth might be greater than OrigWidth. Don't increase the width. + auto OrigWidth = Instr->getType()->getIntegerBitWidth(); + if (NewWidth >= OrigWidth) + return false; + + ++NumLShrs; + const bool IsExact = Instr->isExact(); + auto *BO = CreateTruncatedBO(Instr, NewWidth); + BO->setIsExact(IsExact); + + return true; +} + static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) { if (SDI->getType()->isVectorTy()) return false; @@ -589,6 +656,9 @@ SDI->replaceAllUsesWith(BO); SDI->eraseFromParent(); + // Try to process our new lshr. + processLShr(BO, LVI); + return true; } @@ -709,6 +779,9 @@ case Instruction::URem: BBChanged |= processUDivOrURem(cast(II), LVI); break; + case Instruction::LShr: + BBChanged |= processLShr(cast(II), LVI); + break; case Instruction::AShr: BBChanged |= processAShr(cast(II), LVI); break; Index: test/Transforms/CorrelatedValuePropagation/lshr.ll =================================================================== --- test/Transforms/CorrelatedValuePropagation/lshr.ll +++ test/Transforms/CorrelatedValuePropagation/lshr.ll @@ -9,7 +9,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[N:%.*]], 65535 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[N]], 6 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 6 to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -29,7 +32,10 @@ define void @const0_variant2(i32 %n) { ; CHECK-LABEL: @const0_variant2( ; CHECK-NEXT: [[TRUNC:%.*]] = and i32 [[N:%.*]], 65535 -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[TRUNC]], 6 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[TRUNC]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 6 to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: ret void ; %trunc = and i32 %n, 65535 @@ -43,7 +49,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[N:%.*]], 255 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[N]], 7 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[N]] to i8 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 7 to i8 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i8 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i8 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -66,7 +75,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[N:%.*]], 255 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[N]], 8 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 8 to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -89,7 +101,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[N:%.*]], 255 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[N]], 9 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 9 to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -207,7 +222,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[M]], [[N]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[M]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -230,7 +248,10 @@ ; CHECK-LABEL: @var0_variant2( ; CHECK-NEXT: [[TRUNCM:%.*]] = and i32 [[N:%.*]], 65535 ; CHECK-NEXT: [[TRUNCN:%.*]] = and i32 [[N]], 6 -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[TRUNCM]], [[TRUNCN]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[TRUNCM]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[TRUNCN]] to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: ret void ; %truncm = and i32 %n, 65535 @@ -247,7 +268,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[M]], [[N]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[M]] to i8 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[N]] to i8 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i8 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i8 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -274,7 +298,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[M]], [[N]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[M]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -301,7 +328,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[M]], [[N]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[M]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -498,7 +528,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[N:%.*]], 65535 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr exact i32 [[N]], 6 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 6 to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr exact i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -523,7 +556,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr exact i32 [[M]], [[N]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[M]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr exact i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -548,7 +584,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[N:%.*]], 15 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[N]], 1 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[N]] to i8 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 1 to i8 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i8 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i8 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -573,7 +612,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[M]], [[N]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[M]] to i8 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[N]] to i8 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i8 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i8 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -598,7 +640,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ule i32 [[N:%.*]], 4095 ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[N]], 1 +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 1 to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void @@ -623,7 +668,10 @@ ; CHECK-NEXT: [[CMP:%.*]] = and i1 [[CMP1]], [[CMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[M]], [[N]] +; CHECK-NEXT: [[DIV_LHS_TRUNC:%.*]] = trunc i32 [[M]] to i16 +; CHECK-NEXT: [[DIV_RHS_TRUNC:%.*]] = trunc i32 [[N]] to i16 +; CHECK-NEXT: [[DIV1:%.*]] = lshr i16 [[DIV_LHS_TRUNC]], [[DIV_RHS_TRUNC]] +; CHECK-NEXT: [[DIV_ZEXT:%.*]] = zext i16 [[DIV1]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void