Changeset View
Standalone View
llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
Show First 20 Lines • Show All 833 Lines • ▼ Show 20 Lines | if (!isa<Constant>(TrueVal) && FBO->getOperand(0) == TrueVal && | ||||||||||
Value *NewOp = Builder.CreateBinOp(FBO->getOpcode(), NewShift, NewRHS); | Value *NewOp = Builder.CreateBinOp(FBO->getOpcode(), NewShift, NewRHS); | ||||||||||
return SelectInst::Create(Cond, NewShift, NewOp); | return SelectInst::Create(Cond, NewShift, NewOp); | ||||||||||
} | } | ||||||||||
} | } | ||||||||||
return nullptr; | return nullptr; | ||||||||||
} | } | ||||||||||
// Tries to perform | |||||||||||
// (lshr (add (zext X), (zext Y)), K) | |||||||||||
// -> (icmp ult (add X, Y), X) | |||||||||||
// where | |||||||||||
// - The add's operands are zexts from a K-bits integer to a bigger type. | |||||||||||
// - The add is only used by the shr, or by iK (or narrower) truncates. | |||||||||||
// - The lshr type has more than 2 bits (other types are boolean math). | |||||||||||
lebedev.riUnsubmitted Done ReplyInline Actions
lebedev.ri: | |||||||||||
foadUnsubmitted Not Done ReplyInline ActionsLikewise I don't think you need this line and the "boolean math" comment belongs on the "K > 1" line. foad: Likewise I don't think you need this line and the "boolean math" comment belongs on the "K > 1"… | |||||||||||
Pierre-vhAuthorUnsubmitted Will address in D141129 Pierre-vh: Will address in D141129 | |||||||||||
// - K > 1 | |||||||||||
// note that | |||||||||||
// - The resulting add cannot have nuw/nsw, else on overflow we get a | |||||||||||
// poison value and the transform isn't legal anymore. | |||||||||||
Instruction *InstCombinerImpl::foldLShrOverflowBit(BinaryOperator &I) { | |||||||||||
I think it would be nicer to hoist the match and rename the variable. Value *Add = I.getOperand(0); Value *X = nullptr, *Y = nullptr; if (!match(Add, m_Add(m_Value(X), m_Value(Y)))) return nullptr; lebedev.ri: I think it would be nicer to hoist the match and rename the variable.
```
Value *Add = I. | |||||||||||
assert(I.getOpcode() == Instruction::LShr); | |||||||||||
Value *Add = I.getOperand(0); | |||||||||||
Value *ShiftAmt = I.getOperand(1); | |||||||||||
Type *Ty = I.getType(); | |||||||||||
if (Ty->getScalarSizeInBits() < 3) | |||||||||||
foadUnsubmitted I don't think you need this check since you check below that K != 1 and it matches the width of X and Y, and the shift type must be strictly wider than that. But I guess it's harmless. foad: I don't think you need this check since you check below that K != 1 and it matches the width of… | |||||||||||
return nullptr; | |||||||||||
const APInt *ShAmtAPInt = nullptr; | |||||||||||
Value *X = nullptr, *Y = nullptr; | |||||||||||
Is it not sufficient to just check that the width is 2x the shift amount? lebedev.ri: Is it not sufficient to just check that the width is 2x the shift amount?
Do we need the power… | |||||||||||
I don't think the power of 2 check is really needed - I inherited it from the original patch. I will remove it Pierre-vh: I don't think the power of 2 check is really needed - I inherited it from the original patch. I… | |||||||||||
if (!match(ShiftAmt, m_APInt(ShAmtAPInt)) || | |||||||||||
!match(Add, | |||||||||||
We should put m_OneUse() limits on the m_ZExt matches. Also, add a test where the zexts have extra use(s). That should avoid the +2 instruction count regression in the next patch. We are still potentially increasing instruction count with this transform, but the trade-off seems more reasonable if we can eliminate more of the intermediate instructions. spatel: We should put m_OneUse() limits on the m_ZExt matches. Also, add a test where the zexts have… | |||||||||||
m_Add(m_OneUse(m_ZExt(m_Value(X))), m_OneUse(m_ZExt(m_Value(Y)))))) | |||||||||||
return nullptr; | |||||||||||
const unsigned ShAmt = ShAmtAPInt->getZExtValue(); | |||||||||||
if (ShAmt == 1) | |||||||||||
What we want to know here, is whether both X and Y can be NUW-truncated to ShAmt-wide types. /// Get the upper bound on bit size for this Value \p Op as a signed integer. /// i.e. x == sext(trunc(x to MaxSignificantBits) to bitwidth(x)). /// Similar to the APInt::getSignificantBits function. unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr); but that is for sign bits, while we want zero bits. lebedev.ri: What we want to know here, is whether both `X` and `Y` can be `NUW`-truncated to `ShAmt`-wide… | |||||||||||
return nullptr; | |||||||||||
// X/Y are zexts from `ShAmt`-sized ints. | |||||||||||
if (X->getType()->getScalarSizeInBits() != ShAmt || | |||||||||||
Y->getType()->getScalarSizeInBits() != ShAmt) | |||||||||||
return nullptr; | |||||||||||
// Make sure that `Add` is only used by `I` and `ShAmt`-truncates. | |||||||||||
if (!Add->hasOneUse()) { | |||||||||||
Is there test coverage when the shift amt isn't the half of the original width? lebedev.ri: Is there test coverage when the shift amt isn't the half of the original width? | |||||||||||
lshr_16_to_64_add_zext_basic? Pierre-vh: lshr_16_to_64_add_zext_basic?
I also added a couple of test cases where the shift amount is… | |||||||||||
for (User *U : Add->users()) { | |||||||||||
if (U == &I) | |||||||||||
continue; | |||||||||||
TruncInst *Trunc = dyn_cast<TruncInst>(U); | |||||||||||
if (!Trunc || Trunc->getType()->getScalarSizeInBits() > ShAmt) | |||||||||||
return nullptr; | |||||||||||
} | |||||||||||
} | |||||||||||
// Insert at Add so that the newly created `NarrowAdd` will dominate it's | |||||||||||
This is rather unusual in InstCombine. lebedev.ri: This is rather unusual in InstCombine.
Would it not be sufficient to just zext the result? | |||||||||||
I think the original intent was to only to do the transform when it appears beneficial, but if we're fine with doing it all the time as a canonical form then we can indeed just zext + RAUW. What do you think? Pierre-vh: I think the original intent was to only to do the transform when it appears beneficial, but if… | |||||||||||
Now that I think about it again, I don't think the transform would be valid if we don't look for the truncs? Pierre-vh: Now that I think about it again, I don't think the transform would be valid if we don't look… | |||||||||||
Ah yes, you look at truncs of the wide add. I think what you want here, is: if(!Op0.hasOneUse()) { for (User *Usr : Op0->users()) { if (Usr == &I) continue; TruncInst *Trunc = dyn_cast<TruncInst>(Usr); if (!Trunc || Trunc->getType()->getScalarSizeInBits() > HalfWidth) return nullptr; } } and then just if(!Op0.hasOneUse()) { Value*WideOV = Builder.CreateZExt(Overflow); replaceInstUsesWith(Op0, WideOV); } lebedev.ri: Ah yes, you look at truncs of the wide add.
It wouldn't be illegal to just leave it be.
I… | |||||||||||
Indeed that works too, but I think you meant CreateZExt(UAdd) instead of the Overflow bit? Pierre-vh: Indeed that works too, but I think you meant `CreateZExt(UAdd)` instead of the Overflow bit? | |||||||||||
// users (i.e. `Add`'s users). | |||||||||||
Instruction *AddInst = cast<Instruction>(Add); | |||||||||||
Builder.SetInsertPoint(AddInst); | |||||||||||
Value *NarrowAdd = Builder.CreateAdd(X, Y, "add.narrowed"); | |||||||||||
Value *Overflow = | |||||||||||
Builder.CreateICmpULT(NarrowAdd, X, "add.narrowed.overflow"); | |||||||||||
// Replace the uses of the original add with a zext of the | |||||||||||
// NarrowAdd's result. Note that all users at this stage are known to | |||||||||||
// be ShAmt-sized truncs, or the lshr itself. | |||||||||||
if (!Add->hasOneUse()) | |||||||||||
replaceInstUsesWith(*AddInst, Builder.CreateZExt(NarrowAdd, Ty)); | |||||||||||
alive2 proof please? lebedev.ri: alive2 proof please? | |||||||||||
At this stage, the add is only used by either ShAmt-sized truncs, or the shift. Pierre-vh: At this stage, the add is only used by either ShAmt-sized truncs, or the shift.
We're removing… | |||||||||||
Not Done ReplyInline ActionsRight, i'm being slow. This is correct. lebedev.ri: Right, i'm being slow. This is correct. | |||||||||||
// Replace the LShr with a zext of the overflow check. | |||||||||||
return new ZExtInst(Overflow, Ty); | |||||||||||
} | |||||||||||
I think we should just emit it all next to the original add, lebedev.ri: I think we should just emit it all next to the original `add`,
there isn't much point in… | |||||||||||
Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { | Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) { | ||||||||||
const SimplifyQuery Q = SQ.getWithInstruction(&I); | const SimplifyQuery Q = SQ.getWithInstruction(&I); | ||||||||||
Right, not an overflow bit. Probably adjust the name too. lebedev.ri: Right, not an overflow bit. Probably adjust the name too. | |||||||||||
if (Value *V = simplifyShlInst(I.getOperand(0), I.getOperand(1), | if (Value *V = simplifyShlInst(I.getOperand(0), I.getOperand(1), | ||||||||||
I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), Q)) | I.hasNoSignedWrap(), I.hasNoUnsignedWrap(), Q)) | ||||||||||
return replaceInstUsesWith(I, V); | return replaceInstUsesWith(I, V); | ||||||||||
This is incorrect in the i2/i1 case: https://alive2.llvm.org/ce/z/ga8CDx Using sext if the original shift was ashr will fix it. foad: This is incorrect in the i2/i1 case: https://alive2.llvm.org/ce/z/ga8CDx
Using sext if the… | |||||||||||
if (Instruction *X = foldVectorBinop(I)) | if (Instruction *X = foldVectorBinop(I)) | ||||||||||
return X; | return X; | ||||||||||
mul i16 255, 2 is i16 510, which is non-negative, lebedev.ri: `mul i16 255, 2` is `i16 510`, which is non-negative,
so the signbit of the original result is… | |||||||||||
Is it normal that Alive2 reports the transformation as incorrect for i2 then (see @foad's comment above) ? Pierre-vh: Is it normal that Alive2 reports the transformation as incorrect for i2 then (see @foad's… | |||||||||||
Ok, good point, please add that as a comment. lebedev.ri: Ok, good point, please add that as a comment. | |||||||||||
if (Instruction *V = commonShiftTransforms(I)) | if (Instruction *V = commonShiftTransforms(I)) | ||||||||||
return V; | return V; | ||||||||||
if (Instruction *V = dropRedundantMaskingOfLeftShiftInput(&I, Q, Builder)) | if (Instruction *V = dropRedundantMaskingOfLeftShiftInput(&I, Q, Builder)) | ||||||||||
return V; | return V; | ||||||||||
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); | Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); | ||||||||||
▲ Show 20 Lines • Show All 469 Lines • ▼ Show 20 Lines | Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) { | ||||||||||
// Transform (x << y) >> y to x & (-1 >> y) | // Transform (x << y) >> y to x & (-1 >> y) | ||||||||||
if (match(Op0, m_OneUse(m_Shl(m_Value(X), m_Specific(Op1))))) { | if (match(Op0, m_OneUse(m_Shl(m_Value(X), m_Specific(Op1))))) { | ||||||||||
Constant *AllOnes = ConstantInt::getAllOnesValue(Ty); | Constant *AllOnes = ConstantInt::getAllOnesValue(Ty); | ||||||||||
Value *Mask = Builder.CreateLShr(AllOnes, Op1); | Value *Mask = Builder.CreateLShr(AllOnes, Op1); | ||||||||||
return BinaryOperator::CreateAnd(Mask, X); | return BinaryOperator::CreateAnd(Mask, X); | ||||||||||
} | } | ||||||||||
if (Instruction *Overflow = foldLShrOverflowBit(I)) | |||||||||||
return Overflow; | |||||||||||
return nullptr; | return nullptr; | ||||||||||
} | } | ||||||||||
Instruction * | Instruction * | ||||||||||
InstCombinerImpl::foldVariableSignZeroExtensionOfVariableHighBitExtract( | InstCombinerImpl::foldVariableSignZeroExtensionOfVariableHighBitExtract( | ||||||||||
BinaryOperator &OldAShr) { | BinaryOperator &OldAShr) { | ||||||||||
assert(OldAShr.getOpcode() == Instruction::AShr && | assert(OldAShr.getOpcode() == Instruction::AShr && | ||||||||||
"Must be called with arithmetic right-shift instruction only."); | "Must be called with arithmetic right-shift instruction only."); | ||||||||||
▲ Show 20 Lines • Show All 185 Lines • Show Last 20 Lines |