Index: llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp =================================================================== --- llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -58,6 +58,7 @@ STATISTIC(NumReturns, "Number of return values propagated"); STATISTIC(NumDeadCases, "Number of switch cases removed"); STATISTIC(NumSDivs, "Number of sdiv converted to udiv"); +STATISTIC(NumUDivs, "Number of udivs whose width was decreased"); STATISTIC(NumAShrs, "Number of ashr converted to lshr"); STATISTIC(NumSRems, "Number of srem converted to urem"); STATISTIC(NumOverflows, "Number of overflow checks removed"); @@ -444,6 +445,57 @@ return true; } +// Tries to find the smallest power-of-two bit width greater than 8 bits which +// is sufficient to hold all of the operands of SDI (interpreted as uints). +static Optional +smallestPowerOf2WidthForOperandsOf(BinaryOperator *SDI, LazyValueInfo *LVI) { + Optional Result; + auto OrigWidth = SDI->getType()->getIntegerBitWidth(); + for (uint64_t Width = PowerOf2Floor(OrigWidth - 1); Width >= 8; Width /= 2) { + Constant *Max = ConstantInt::get( + SDI->getType(), APInt::getAllOnesValue(Width).zext(OrigWidth)); + if (all_of(SDI->operands(), [&](Value *Operand) { + return LVI->getPredicateAt(ICmpInst::ICMP_ULE, Operand, Max, SDI) == + LazyValueInfo::True; + })) + Result = Width; + else + break; + } + return Result; +} + +/// Try to shrink a udiv/urem's width down to the smallest power of two that's +/// sufficient to contain its operands. +static bool processUDivOrURem(BinaryOperator *SDI, LazyValueInfo *LVI) { + assert(SDI->getOpcode() == Instruction::UDiv || + SDI->getOpcode() == Instruction::URem); + if (SDI->getType()->isVectorTy()) + return false; + + Optional TruncBitWidth = + smallestPowerOf2WidthForOperandsOf(SDI, LVI); + if (!TruncBitWidth) + return false; + + ++NumUDivs; + auto *TruncTy = Type::getIntNTy(SDI->getContext(), *TruncBitWidth); + auto *LHS = CastInst::Create(Instruction::Trunc, SDI->getOperand(0), TruncTy, + SDI->getName() + ".lhs.trunc", SDI); + auto *RHS = CastInst::Create(Instruction::Trunc, SDI->getOperand(1), TruncTy, + SDI->getName() + ".rhs.trunc", SDI); + auto *BO = + BinaryOperator::Create(SDI->getOpcode(), LHS, RHS, SDI->getName(), SDI); + auto *Zext = CastInst::Create(Instruction::ZExt, BO, SDI->getType(), + SDI->getName() + ".zext", SDI); + if (BO->getOpcode() == Instruction::UDiv) + BO->setIsExact(SDI->isExact()); + + SDI->replaceAllUsesWith(Zext); + SDI->eraseFromParent(); + return true; +} + /// See if LazyValueInfo's ability to exploit edge conditions or range /// information is sufficient to prove the both operands of this SDiv are /// positive. If this is the case, replace the SDiv with a UDiv. Even for local @@ -460,6 +512,9 @@ SDI->replaceAllUsesWith(BO); SDI->eraseFromParent(); + // Try to simplify our new udiv. + processUDivOrURem(BO, LVI); + return true; } @@ -595,6 +650,10 @@ case Instruction::SDiv: BBChanged |= processSDiv(cast(II), LVI); break; + case Instruction::UDiv: + case Instruction::URem: + BBChanged |= processUDivOrURem(cast(II), LVI); + break; case Instruction::AShr: BBChanged |= processAShr(cast(II), LVI); break; Index: llvm/test/Transforms/CorrelatedValuePropagation/udiv.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/CorrelatedValuePropagation/udiv.ll @@ -0,0 +1,65 @@ +; RUN: opt < %s -correlated-propagation -S | FileCheck %s + +; CHECK-LABEL: @test1( +define void @test1(i32 %n) { +entry: + %cmp = icmp ule i32 %n, 65535 + br i1 %cmp, label %bb, label %exit + +bb: +; CHECK: udiv i16 + %div = udiv i32 %n, 100 + br label %exit + +exit: + ret void +} + +; CHECK-LABEL: @test2( +define void @test2(i32 %n) { +entry: + %cmp = icmp ule i32 %n, 65536 + br i1 %cmp, label %bb, label %exit + +bb: +; CHECK: udiv i32 %n, 100 + %div = udiv i32 %n, 100 + br label %exit + +exit: + ret void +} + +; CHECK-LABEL: @test3( +define void @test3(i32 %m, i32 %n) { +entry: + %cmp1 = icmp ult i32 %m, 65535 + %cmp2 = icmp ult i32 %n, 65535 + %cmp = and i1 %cmp1, %cmp2 + br i1 %cmp, label %bb, label %exit + +bb: +; CHECK: udiv i16 + %div = udiv i32 %m, %n + br label %exit + +exit: + ret void +} + +; CHECK-LABEL: @test4( +define void @test4(i32 %m, i32 %n) { +entry: + %cmp1 = icmp ult i32 %m, 65535 + %cmp2 = icmp ule i32 %n, 65536 + %cmp = and i1 %cmp1, %cmp2 + br i1 %cmp, label %bb, label %exit + +bb: +; CHECK: udiv i32 %m, %n + %div = udiv i32 %m, %n + br label %exit + +exit: + ret void +} Index: llvm/test/Transforms/CorrelatedValuePropagation/urem.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/CorrelatedValuePropagation/urem.ll @@ -0,0 +1,65 @@ +; RUN: opt < %s -correlated-propagation -S | FileCheck %s + +; CHECK-LABEL: @test1( +define void @test1(i32 %n) { +entry: + %cmp = icmp ule i32 %n, 65535 + br i1 %cmp, label %bb, label %exit + +bb: +; CHECK: urem i16 + %div = urem i32 %n, 100 + br label %exit + +exit: + ret void +} + +; CHECK-LABEL: @test2( +define void @test2(i32 %n) { +entry: + %cmp = icmp ule i32 %n, 65536 + br i1 %cmp, label %bb, label %exit + +bb: +; CHECK: urem i32 %n, 100 + %div = urem i32 %n, 100 + br label %exit + +exit: + ret void +} + +; CHECK-LABEL: @test3( +define void @test3(i32 %m, i32 %n) { +entry: + %cmp1 = icmp ult i32 %m, 65535 + %cmp2 = icmp ult i32 %n, 65535 + %cmp = and i1 %cmp1, %cmp2 + br i1 %cmp, label %bb, label %exit + +bb: +; CHECK: urem i16 + %div = urem i32 %m, %n + br label %exit + +exit: + ret void +} + +; CHECK-LABEL: @test4( +define void @test4(i32 %m, i32 %n) { +entry: + %cmp1 = icmp ult i32 %m, 65535 + %cmp2 = icmp ule i32 %n, 65536 + %cmp = and i1 %cmp1, %cmp2 + br i1 %cmp, label %bb, label %exit + +bb: +; CHECK: urem i32 %m, %n + %div = urem i32 %m, %n + br label %exit + +exit: + ret void +}