Changeset View
Standalone View
lib/CodeGen/SelectionDAG/TargetLowering.cpp
Show First 20 Lines • Show All 2,788 Lines • ▼ Show 20 Lines | if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB || | ||||
} | } | ||||
} | } | ||||
} | } | ||||
if (SDValue V = simplifySetCCWithAnd(VT, N0, N1, Cond, DCI, dl)) | if (SDValue V = simplifySetCCWithAnd(VT, N0, N1, Cond, DCI, dl)) | ||||
return V; | return V; | ||||
} | } | ||||
// Fold remainder of division by a a constant | |||||
if (N0.getOpcode() == ISD::UREM && N0.hasOneUse() && | |||||
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) { | |||||
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); | |||||
// When division is cheap or optimizing for minimum size, | |||||
lebedev.riUnsubmitted Done ReplyInline Actionslebedev.ri: ```
if (SDValue Folded = BuildREMEqFold(VT, N0, N1, Cond, DCI, dl))
return Folded;
``` | |||||
// fall through to DIVREM creation by skipping this fold. | |||||
if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) | |||||
if (SDValue Folded = BuildUREMEqFold(VT, N0, N1, Cond, DCI, dl)) | |||||
return Folded; | |||||
} | |||||
// Fold away ALL boolean setcc's. | // Fold away ALL boolean setcc's. | ||||
SDValue Temp; | SDValue Temp; | ||||
if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) { | if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) { | ||||
EVT OpVT = N0.getValueType(); | EVT OpVT = N0.getValueType(); | ||||
switch (Cond) { | switch (Cond) { | ||||
default: llvm_unreachable("Unknown integer setcc!"); | default: llvm_unreachable("Unknown integer setcc!"); | ||||
case ISD::SETEQ: // X == Y -> ~(X^Y) | case ISD::SETEQ: // X == Y -> ~(X^Y) | ||||
Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1); | Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1); | ||||
▲ Show 20 Lines • Show All 953 Lines • ▼ Show 20 Lines | SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, | ||||
Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift); | Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift); | ||||
Created.push_back(Q.getNode()); | Created.push_back(Q.getNode()); | ||||
SDValue One = DAG.getConstant(1, dl, VT); | SDValue One = DAG.getConstant(1, dl, VT); | ||||
SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ); | SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ); | ||||
return DAG.getSelect(dl, VT, IsOne, N0, Q); | return DAG.getSelect(dl, VT, IsOne, N0, Q); | ||||
} | } | ||||
/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE | |||||
/// where the divisor is constant and the comparison target is zero, | |||||
/// return a DAG expression that will generate the same comparison result | |||||
/// using only multiplications, additions and shifts/rotations. | |||||
/// Ref: "Hacker's Delight" 10-17. | |||||
SDValue TargetLowering::BuildUREMEqFold(EVT VT, SDValue REMNode, | |||||
SDValue CompNode, ISD::CondCode Cond, | |||||
DAGCombinerInfo &DCI, | |||||
This should be SmallVectorImpl<SDNode *> &Created like in all the other cases. lebedev.ri: This should be `SmallVectorImpl<SDNode *> &Created` like in all the other cases. | |||||
Not Done ReplyInline ActionsIsn't this still missing? RKSimon: Isn't this still missing? | |||||
Not Done ReplyInline ActionsThe approach is different now: BuildUREMEqFold is now called from TargetLowering::SimplifySetCC and not from DAGCombiners::visitREM, and l tried to match the style of the other helpers called from there (like simplifySetCCWithAnd). hermord: The approach is different now: `BuildUREMEqFold` is now called from `TargetLowering… | |||||
Not Done ReplyInline ActionsLooking at the code on which this was originally based, i think i was slightly wrong. See TargetLowering::BuildSDIVPow2() (which is the implementation), vs DAGCombiner::BuildSDIVPow2() (which is the wrapper). lebedev.ri: Looking at the code on which this was originally based, i think i was slightly wrong.
This… | |||||
Not Done ReplyInline ActionsAh sorry, just to clarify, there used to be a DAGCombiner::BuildREMEqFold in an earlier version of this patch, but my rationale for removing it was that:
If it's better to mimic BuildSDIVPow2 and have a wrapper, I could:
hermord: Ah sorry, just to clarify, there used to be a `DAGCombiner::BuildREMEqFold` in an earlier… | |||||
Not Done ReplyInline ActionsThis current BuildUREMEqFold() does not adds nodes to the worklist directly as far as i can see? Well, i had the third variant in mind.
So nothing changes for the callers. lebedev.ri: This current `BuildUREMEqFold()` does not `adds nodes to the worklist directly` as far as i can… | |||||
Not Done ReplyInline ActionsIt doesn't right now, this is true. I removed it following this comment: Comment at: lib/CodeGen/SelectionDAG/TargetLowering.cpp:3771 + SDValue Op1 = DAG.getNode(ISD::MUL, DL, REMVT, REMNode->getOperand(0), PVal); + DCI.AddToWorklist(Op1.getNode()); + // Will change in the signed case ---------------- I do not know if this is needed? At least rL338079 removed such a call. Looking back at rL338079, it doesn't say that AddToWorklist is never necessary in this context, and similar code (BuildUDIV) does do it, so I removing it might have been premature. I took a stab at the third variant in the latest diff. hermord: It doesn't right now, this is true. I removed it following this comment:
```
Comment at… | |||||
const SDLoc &DL) const { | |||||
SmallVector<SDNode *, 2> Built; | |||||
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Only applicable for [in]equality comparisons."); lebedev.ri: ```
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
"Only applicable for… | |||||
// fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q) (it would be great to use more descriptive names to differentiate between Constants and not constants, too) lebedev.ri: ```
// fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
```
(it would be… | |||||
But thinking about it a bit more, probably don't rename the variables. Consistency [with the orig doc] may be best. lebedev.ri: > (it would be great to use more descriptive names to differentiate between Constants and not… | |||||
The comment change wasn't applied, i'm not aware of gte predicate, but i do know [u/s]ge. lebedev.ri: The comment change wasn't applied, i'm not aware of `gte` predicate, but i do know `[u/s]ge`. | |||||
if (SDValue Folded = | |||||
PrepareUREMEqFold(VT, REMNode, CompNode, Cond, DCI, DL, Built)) { | |||||
The produced patterns are rather different between unsigned and signed cases. lebedev.ri: The produced patterns are rather different between unsigned and signed cases.
I think this… | |||||
for (SDNode *N : Built) | |||||
DCI.AddToWorklist(N); | |||||
return Folded; | |||||
You can move this closer to where it's actually used. lebedev.ri: You can move this closer to where it's actually used. | |||||
} | |||||
return SDValue(); | |||||
} | |||||
SDValue | |||||
TargetLowering::PrepareUREMEqFold(EVT VT, SDValue REMNode, | |||||
SDValue CompNode, ISD::CondCode Cond, | |||||
DAGCombinerInfo &DCI, const SDLoc &DL, | |||||
SmallVectorImpl<SDNode *> &Created) const { | |||||
Not Done ReplyInline ActionsIt should be trivial for you to add non-uniform vector support in this patch, following the patterns I did for the other divsion-by-constant builders. RKSimon: It should be trivial for you to add non-uniform vector support in this patch, following the… | |||||
Not Done ReplyInline ActionsI've got conflicting directions for this patch (add nonsplat here or submit it separately), so I didn't touch that part in this update. hermord: I've got conflicting directions for this patch (add nonsplat here or submit it separately), so… | |||||
// fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q) | |||||
// - D must be constant with D = D0 * 2^K where D0 is odd and D0 != 1 | |||||
// - P is the multiplicative inverse of D0 modulo 2^W | |||||
// - Q = floor((2^W - 1) / D0) | |||||
// where W is the width of the common type of N and D | |||||
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && | |||||
"Only applicable for (in)equality comparisons."); | |||||
// Decompose D into D0 * 2^K lebedev.ri: ```
// Decompose D into D0 * 2^K
``` | |||||
Not Done ReplyInline ActionsYou don't actually modify D as far as i can tell? lebedev.ri: You don't actually modify `D` as far as i can tell?
I think it should be `const APInt &D` | |||||
Separate with newline. lebedev.ri: Separate with newline. | |||||
bool DivisorIsEven = K != 0; lebedev.ri: ```
bool DivisorIsEven = K != 0;
``` | |||||
EVT REMVT = REMNode->getValueType(0); | |||||
APInt::lshr returns a value not a reference. RKSimon: APInt::lshr returns a value not a reference. | |||||
if (!isTypeLegal(REMVT)) | |||||
Not Done ReplyInline ActionsAdd self-explanatory assertion? lebedev.ri: Add self-explanatory assertion? | |||||
Not Done ReplyInline ActionsSorry, I couldn't figure out what the assertion should be. The negative case can still be handled just fine; do you mean that I should assert (D0 =/= 1) and then make the caller check for that? hermord: Sorry, I couldn't figure out what the assertion should be. The negative case can still be… | |||||
return SDValue(); | |||||
Not Done ReplyInline ActionsIs this supposed to be ashr even if we are doing UREM? Seems a little unusual but if this is correct, it probably earns a comment. majnemer: Is this supposed to be ashr even if we are doing UREM? Seems a little unusual but if this is… | |||||
// If MUL is unavailable, we cannot proceed in any case | |||||
Not Done ReplyInline ActionsSo what we are really checking here, is that D is not power of two. APInt D = Divisor->getAPIntValue(); unsigned W = D.getBitWidth(); // If D is power of two, D0 would be 1, and we cannot build this fold. if(D.isPowerOf2()) return SDValue(); // Decompose D into D0 * 2^K ... BUT. I do not understand from where does this requirement comes from? lebedev.ri: So what we are really checking here, is that `D` is not power of two.
How about simplifying… | |||||
Not Done ReplyInline ActionsThe algorithm also fails in general in this case because the inverse of 1 is 1 itself, and we get, for example: Given: N: 1, D: 4, C: 0 Then: K = 2 D0 = 1 P = 1 Q = (2^32 - 1)/1 = 2^32 - 1 And so: (P >>rot 2) == 2^30 < 2^32 - 1 == Q, so the condition holds, but N % D != 0 The book mentions this in passing: "This can be simplified a little by observing that because d is odd and, as we are assuming, positive and not equal to 1 [...]" (p. 227). hermord: The algorithm also fails in general in this case because the inverse of 1 is 1 itself, and we… | |||||
Well, i see it now - https://rise4fun.com/Alive/aXUu. APInt D = Divisor->getAPIntValue(); if(D.isPowerOf2()) { // rem by power-of-two is better represented by and-mask. return SDValue(); } // Decompose D into D0 * 2^K ... lebedev.ri: Well, i see it now - https://rise4fun.com/Alive/aXUu.
Then this should be:
```
APInt D =… | |||||
if (!(DCI.isBeforeLegalizeOps() ? isOperationLegalOrCustom(ISD::MUL, REMVT) | |||||
: isOperationLegal(ISD::MUL, REMVT))) | |||||
return SDValue(); | |||||
Not Done ReplyInline ActionsThe sign bit may be set for APInts fed into URem operations: all isNegative does is check that the MSB is set. I don't think it is OK to branch on the sign bit here without considering if we are in dealing with URem or SRem. I think what you want is: if (IsSigned) { D0 = D.ashr(K); } else { D0 = D.lshr(K); } majnemer: The sign bit may be set for APInts fed into URem operations: all isNegative does is check that… | |||||
Not Done ReplyInline ActionsOk, thank you for the explanation regarding D0 != 1. APInt D0 = D.lshr(K); assert(!D0.isOneValue() && "The fold is invalid for D0 of 1. Unreachable since we leave powers-of-two to be improved elsewhere."); lebedev.ri: Ok, thank you for the explanation regarding `D0 != 1`.
I think here now we can just add an… | |||||
Not Done ReplyInline ActionsThis turns out to actually be reachable: the logic that handles REM by powers of two is in visitREM, which is invoked after visitSetCC from where we would've come here. I kept this as an early return if that's OK. hermord: This turns out to actually be reachable: the logic that handles REM by powers of two is in… | |||||
Sounds good. lebedev.ri: Sounds good. | |||||
// TODO: Add non-uniform constant support | |||||
ConstantSDNode *Divisor = isConstOrConstSplat(REMNode->getOperand(1)); | |||||
Would [[ http://llvm.org/doxygen/classllvm_1_1APInt.html#a28ee15b0286415cce5599d4fb9f9ce02 | APInt::multiplicativeInverse() ]] work here? lebedev.ri: Would [[ http://llvm.org/doxygen/classllvm_1_1APInt.html#a28ee15b0286415cce5599d4fb9f9ce02 |… | |||||
ConstantSDNode *CompTarget = isConstOrConstSplat(CompNode); | |||||
if (!Divisor || !CompTarget || Divisor->isNullValue() || | |||||
!CompTarget->isNullValue()) | |||||
This is begging to be wrong. Can you create two variables on different lines? lebedev.ri: This is begging to be wrong. Can you create two variables on different lines? | |||||
Also, move the W here, to the use. lebedev.ri: Also, move the `W` here, to the use. | |||||
return SDValue(); | |||||
I would think just writing this as one line would be as clean APInt Q = APInt::getAllOnesValue(W).udiv(D0); lebedev.ri: I would think just writing this as one line would be as clean
```
APInt Q = APInt… | |||||
const APInt &D = Divisor->getAPIntValue(); | |||||
// Decompose D into D0 * 2^K | |||||
unsigned K = D.countTrailingZeros(); | |||||
bool DivisorIsEven = (K != 0); | |||||
I do not know if this is needed? lebedev.ri: I do not know if this is needed?
At least rL338079 removed such a call. | |||||
APInt::zext returns a value not a reference. RKSimon: APInt::zext returns a value not a reference. | |||||
APInt D0 = D.lshr(K); | |||||
// The fold is invalid when D0 == 1. | |||||
Drop the comment about signed cases? lebedev.ri: Drop the comment about signed cases?
I strongly believe that should be done in a new function. | |||||
// This is reachable because visitSetCC happens before visitREM. | |||||
if (D0.isOneValue()) | |||||
if(DivisorIsEven) { lebedev.ri: ```
if(DivisorIsEven) {
``` | |||||
Again, value not a reference RKSimon: Again, value not a reference | |||||
return SDValue(); | |||||
// P = inv(D0, 2^W) | |||||
// 2^W requires W + 1 bits, so we have to extend and then truncate | |||||
unsigned W = D.getBitWidth(); | |||||
APInt P = D0.zext(W + 1) | |||||
.multiplicativeInverse(APInt::getHighBitsSet(W + 1, 1)) | |||||
You must ensure that MUL + ROTR are legal when necessary - pass a IsAfterLegalization flag into the function and check with isOperationLegalOrCustom/isOperationLegal - see TargetLowering::BuildSDIV RKSimon: You must ensure that MUL + ROTR are legal when necessary - pass a IsAfterLegalization flag into… | |||||
.trunc(W); | |||||
Drop the comment about signed cases? lebedev.ri: Drop the comment about signed cases?
I strongly believe that should be done in a new function. | |||||
// Q = floor((2^W - 1) / D0) | |||||
APInt Q = APInt::getAllOnesValue(W).udiv(D0); | |||||
SelectionDAG &DAG = DCI.DAG; | |||||
SDValue PVal = DAG.getConstant(P, DL, REMVT); | |||||
SDValue QVal = DAG.getConstant(Q, DL, REMVT); | |||||
// (mul N, P) | |||||
SDValue Op1 = DAG.getNode(ISD::MUL, DL, REMVT, REMNode->getOperand(0), PVal); | |||||
Created.push_back(Op1.getNode()); | |||||
// Rotate right only if D was even | |||||
if (DivisorIsEven) { | |||||
// We need ROTR to do this | |||||
if (!(DCI.isBeforeLegalizeOps() ? isOperationLegalOrCustom(ISD::ROTR, REMVT) | |||||
: isOperationLegal(ISD::ROTR, REMVT))) | |||||
return SDValue(); | |||||
SDValue ShAmt = | |||||
DAG.getConstant(K, DL, getShiftAmountTy(REMVT, DAG.getDataLayout())); | |||||
SDNodeFlags Flags; | |||||
Flags.setExact(true); | |||||
// UREM: (rotr (mul N, P), K) | |||||
Op1 = DAG.getNode(ISD::ROTR, DL, REMVT, Op1, ShAmt, Flags); | |||||
Created.push_back(Op1.getNode()); | |||||
} | |||||
// UREM: (setule/setugt (rotr (mul N, P), K), Q) | |||||
return DAG.getSetCC(DL, VT, Op1, QVal, | |||||
((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); | |||||
} | |||||
bool TargetLowering:: | bool TargetLowering:: | ||||
verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { | verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { | ||||
if (!isa<ConstantSDNode>(Op.getOperand(0))) { | if (!isa<ConstantSDNode>(Op.getOperand(0))) { | ||||
DAG.getContext()->emitError("argument to '__builtin_return_address' must " | DAG.getContext()->emitError("argument to '__builtin_return_address' must " | ||||
"be a constant integer"); | "be a constant integer"); | ||||
return true; | return true; | ||||
} | } | ||||
▲ Show 20 Lines • Show All 794 Lines • Show Last 20 Lines |