Index: llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -373,11 +373,12 @@ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); assert(Op0->getType() == Op1->getType()); + Type *Ty = I.getType(); // If the shift amount is a one-use `sext`, we can demote it to `zext`. Value *Y; if (match(Op1, m_OneUse(m_SExt(m_Value(Y))))) { - Value *NewExt = Builder.CreateZExt(Y, I.getType(), Op1->getName()); + Value *NewExt = Builder.CreateZExt(Y, Ty, Op1->getName()); return BinaryOperator::Create(I.getOpcode(), Op0, NewExt); } @@ -409,6 +410,57 @@ return BinaryOperator::Create(I.getOpcode(), NewC, A); } + unsigned BitWidth = Ty->getScalarSizeInBits(); + + const APInt *AC, *AddC; + // Try to pre-shift a constant shifted by a variable amount added with a + // negative number: + // C << (X - AddC) --> (C >> AddC) << X + // and + // C >> (X - AddC) --> (C << AddC) >> X + if (match(Op0, m_APInt(AC)) && match(Op1, m_Add(m_Value(A), m_APInt(AddC))) && + AddC->isNegative() && (-*AddC).ult(BitWidth)) { + assert(!AC->isZero() && "Expected simplify of shifted zero"); + unsigned PosOffset = (-*AddC).getZExtValue(); + + auto isSuitableForPreShift = [PosOffset, &I, + AC](Instruction::BinaryOps BinOpcode) { + switch (BinOpcode) { + default: + return false; + case Instruction::Shl: + return (I.hasNoSignedWrap() || I.hasNoUnsignedWrap()) && + AC->eq(AC->lshr(PosOffset).shl(PosOffset)); + case Instruction::LShr: + return I.isExact() && AC->eq(AC->shl(PosOffset).lshr(PosOffset)); + case Instruction::AShr: + return I.isExact() && AC->eq(AC->shl(PosOffset).ashr(PosOffset)); + } + }; + auto applyFlag = [&I](BinaryOperator *NewShiftOp) { + switch (NewShiftOp->getOpcode()) { + default: + llvm_unreachable("Inconsistency within commonShiftTransforms"); + case Instruction::Shl: + NewShiftOp->setHasNoSignedWrap(false); + NewShiftOp->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); + return; + case Instruction::LShr: + case Instruction::AShr: + NewShiftOp->setIsExact(); + } + }; + if (isSuitableForPreShift(I.getOpcode())) { + Constant *NewC = ConstantInt::get(Ty, I.getOpcode() == Instruction::Shl + ? AC->lshr(PosOffset) + : AC->shl(PosOffset)); + BinaryOperator *NewShiftOp = + BinaryOperator::Create(I.getOpcode(), NewC, A); + applyFlag(NewShiftOp); + return NewShiftOp; + } + } + // X shift (A srem C) -> X shift (A and (C - 1)) iff C is a power of 2. // Because shifts by negative values (which could occur if A were negative) // are undefined. @@ -416,7 +468,7 @@ match(C, m_Power2())) { // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't // demand the sign bit (and many others) here?? - Constant *Mask = ConstantExpr::getSub(C, ConstantInt::get(I.getType(), 1)); + Constant *Mask = ConstantExpr::getSub(C, ConstantInt::get(Ty, 1)); Value *Rem = Builder.CreateAnd(A, Mask, Op1->getName()); return replaceOperand(I, 1, Rem); } @@ -988,23 +1040,6 @@ return BinaryOperator::CreateLShr( ConstantInt::get(Ty, APInt::getSignMask(BitWidth)), X); - // Try to pre-shift a constant shifted by a variable amount: - // C << (X + AddC) --> (C >> -AddC) << X - // This requires a no-wrap flag and negative offset constant. - const APInt *AddC; - if ((I.hasNoSignedWrap() || I.hasNoUnsignedWrap()) && - match(Op0, m_APInt(C)) && match(Op1, m_Add(m_Value(X), m_APInt(AddC))) && - AddC->isNegative() && (-*AddC).ult(BitWidth)) { - assert(!C->isZero() && "Expected simplify of shifted zero"); - unsigned PosOffset = (-*AddC).getZExtValue(); - if (C->eq(C->lshr(PosOffset).shl(PosOffset))) { - Constant *NewC = ConstantInt::get(Ty, C->lshr(PosOffset)); - Instruction *NewShl = BinaryOperator::CreateShl(NewC, X); - NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); - return NewShl; - } - } - return nullptr; } Index: llvm/test/Transforms/InstCombine/shift-add.ll =================================================================== --- llvm/test/Transforms/InstCombine/shift-add.ll +++ llvm/test/Transforms/InstCombine/shift-add.ll @@ -157,6 +157,97 @@ ret i32 %r } +; Negative offset precondition check for lshr_exact_add_negative_shift_positive +define i32 @lshr_exact_add_positive_shift_positive(i32 %x) { +; CHECK-LABEL: @lshr_exact_add_positive_shift_positive( +; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], 1 +; CHECK-NEXT: [[R:%.*]] = lshr exact i32 2, [[A:%.*]] +; CHECK-NEXT: ret i32 [[R]] +; + %a = add i32 %x, 1 + %r = lshr exact i32 2, %a + ret i32 %r +} + +; Positive constant precondition check for lshr_exact_add_negative_shift_positive +define i32 @lshr_exact_add_negative_shift_negative(i32 %x) { +; CHECK-LABEL: @lshr_exact_add_negative_shift_negative( +; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[R:%.*]] = lshr exact i32 -2, [[A:%.*]] +; CHECK-NEXT: ret i32 [[R]] +; + %a = add i32 %x, -1 + %r = lshr exact i32 -2, %a + ret i32 %r +} + +; Exact precondition check for lshr_exact_add_negative_shift_positive +define i32 @lshr_add_negative_shift_no_exact(i32 %x) { +; CHECK-LABEL: @lshr_add_negative_shift_no_exact( +; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[R:%.*]] = lshr i32 2, [[A:%.*]] +; CHECK-NEXT: ret i32 [[R]] +; + %a = add i32 %x, -1 + %r = lshr i32 2, %a + ret i32 %r +} + +define i32 @lshr_exact_add_negative_shift_positive(i32 %x) { +; CHECK-LABEL: @lshr_exact_add_negative_shift_positive( +; CHECK-NEXT: [[R:%.*]] = lshr exact i32 4, [[X:%.*]] +; CHECK-NEXT: ret i32 [[R]] +; + %a = add i32 %x, -1 + %r = lshr exact i32 2, %a + ret i32 %r +} + +; Trailing zeros precondition check for ashr_exact_add_negative_shift_[positive,negative] +define i8 @ashr_exact_add_negative_shift_no_trailing_zeros(i8 %x) { +; CHECK-LABEL: @ashr_exact_add_negative_shift_no_trailing_zeros( +; CHECK-NEXT: [[A:%.*]] = add i8 [[X:%.*]], -4 +; CHECK-NEXT: [[R:%.*]] = ashr exact i8 -112, [[A:%.*]] +; CHECK-NEXT: ret i8 [[R]] +; + %a = add i8 %x, -4 + %r = ashr exact i8 -112, %a ; 0b1001_0000 + ret i8 %r +} + +; Exact precondition check for ashr_exact_add_negative_shift_[positive,negative] +define i32 @ashr_add_negative_shift_no_exact(i32 %x) { +; CHECK-LABEL: @ashr_add_negative_shift_no_exact( +; CHECK-NEXT: [[A:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[R:%.*]] = ashr i32 -2, [[A:%.*]] +; CHECK-NEXT: ret i32 [[R]] +; + %a = add i32 %x, -1 + %r = ashr i32 -2, %a + ret i32 %r +} + +define i32 @ashr_exact_add_negative_shift_negative(i32 %x) { +; CHECK-LABEL: @ashr_exact_add_negative_shift_negative( +; CHECK-NEXT: [[R:%.*]] = ashr exact i32 -4, [[X:%.*]] +; CHECK-NEXT: ret i32 [[R]] +; + %a = add i32 %x, -1 + %r = ashr exact i32 -2, %a + ret i32 %r +} + +define i16 @ashr_exact_add_negative_shift_positive(i16 %x) { +; CHECK-LABEL: @ashr_exact_add_negative_shift_positive( +; CHECK-NEXT: [[R:%.*]] = lshr i16 4, [[X:%.*]] +; CHECK-NEXT: ret i16 [[R]] +; + %a = add i16 %x, -1 + %r = ashr exact i16 2, %a + ret i16 %r +} + + ; negative test - must have 'nuw' define i32 @shl_add_nsw(i32 %x) {