Changeset View
Standalone View
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
- This file is larger than 256 KB, so syntax highlighting is disabled by default.
Show First 20 Lines • Show All 836 Lines • ▼ Show 20 Lines | #undef LCALLNAME5 | ||||
setTargetDAGCombine({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND, | setTargetDAGCombine({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND, | ||||
ISD::VECTOR_SPLICE, ISD::SIGN_EXTEND_INREG, | ISD::VECTOR_SPLICE, ISD::SIGN_EXTEND_INREG, | ||||
ISD::CONCAT_VECTORS, ISD::EXTRACT_SUBVECTOR, | ISD::CONCAT_VECTORS, ISD::EXTRACT_SUBVECTOR, | ||||
ISD::INSERT_SUBVECTOR, ISD::STORE}); | ISD::INSERT_SUBVECTOR, ISD::STORE}); | ||||
if (Subtarget->supportsAddressTopByteIgnored()) | if (Subtarget->supportsAddressTopByteIgnored()) | ||||
setTargetDAGCombine(ISD::LOAD); | setTargetDAGCombine(ISD::LOAD); | ||||
setTargetDAGCombine(ISD::MSTORE); | |||||
setTargetDAGCombine(ISD::MUL); | setTargetDAGCombine(ISD::MUL); | ||||
setTargetDAGCombine({ISD::SELECT, ISD::VSELECT}); | setTargetDAGCombine({ISD::SELECT, ISD::VSELECT}); | ||||
setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN, | setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN, | ||||
ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, | ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, | ||||
ISD::VECREDUCE_ADD, ISD::STEP_VECTOR}); | ISD::VECREDUCE_ADD, ISD::STEP_VECTOR}); | ||||
▲ Show 20 Lines • Show All 14,357 Lines • ▼ Show 20 Lines | performExtractSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, | ||||
// NOTE: This combine exists in DAGCombiner, but that version's legality check | // NOTE: This combine exists in DAGCombiner, but that version's legality check | ||||
// blocks this combine because the non-const case requires custom lowering. | // blocks this combine because the non-const case requires custom lowering. | ||||
// | // | ||||
// ty1 extract_vector(ty2 splat(const))) -> ty1 splat(const) | // ty1 extract_vector(ty2 splat(const))) -> ty1 splat(const) | ||||
if (V.getOpcode() == ISD::SPLAT_VECTOR) | if (V.getOpcode() == ISD::SPLAT_VECTOR) | ||||
if (isa<ConstantSDNode>(V.getOperand(0))) | if (isa<ConstantSDNode>(V.getOperand(0))) | ||||
return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V.getOperand(0)); | return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V.getOperand(0)); | ||||
return SDValue(); | return SDValue(); | ||||
} | } | ||||
paulwalker-arm: Is this universally true across all predicate patterns? Given my comment below I'm wondering… | |||||
static SDValue | static SDValue | ||||
performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, | performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, | ||||
SelectionDAG &DAG) { | SelectionDAG &DAG) { | ||||
SDLoc DL(N); | SDLoc DL(N); | ||||
SDValue Vec = N->getOperand(0); | SDValue Vec = N->getOperand(0); | ||||
SDValue SubVec = N->getOperand(1); | SDValue SubVec = N->getOperand(1); | ||||
uint64_t IdxVal = N->getConstantOperandVal(2); | uint64_t IdxVal = N->getConstantOperandVal(2); | ||||
EVT VecVT = Vec.getValueType(); | EVT VecVT = Vec.getValueType(); | ||||
▲ Show 20 Lines • Show All 1,785 Lines • ▼ Show 20 Lines | static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG) { | ||||
// splice(pg, op1, undef) -> op1 | // splice(pg, op1, undef) -> op1 | ||||
if (N->getOperand(2).isUndef()) | if (N->getOperand(2).isUndef()) | ||||
return N->getOperand(1); | return N->getOperand(1); | ||||
return SDValue(); | return SDValue(); | ||||
} | } | ||||
static SDValue performUnpackCombine(SDNode *N, SelectionDAG &DAG) { | static SDValue performUnpackCombine(SDNode *N, SelectionDAG &DAG, | ||||
const AArch64Subtarget *Subtarget) { | |||||
assert((N->getOpcode() == AArch64ISD::UUNPKHI || | assert((N->getOpcode() == AArch64ISD::UUNPKHI || | ||||
N->getOpcode() == AArch64ISD::UUNPKLO) && | N->getOpcode() == AArch64ISD::UUNPKLO) && | ||||
"Unexpected Opcode!"); | "Unexpected Opcode!"); | ||||
// uunpklo/hi undef -> undef | // uunpklo/hi undef -> undef | ||||
if (N->getOperand(0).isUndef()) | if (N->getOperand(0).isUndef()) | ||||
return DAG.getUNDEF(N->getValueType(0)); | return DAG.getUNDEF(N->getValueType(0)); | ||||
// If this is a masked load followed by an UUNPKLO, fold this into a masked | |||||
// extending load. We can do this even if this is already a masked | |||||
// {z,}extload. | |||||
Not Done ReplyInline ActionsThis function also accepts AArch64ISD::UUNPKHI so you'll need to protect against that. paulwalker-arm: This function also accepts `AArch64ISD::UUNPKHI` so you'll need to protect against that. | |||||
if (N->getOperand(0).getOpcode() == ISD::MLOAD && | |||||
N->getOpcode() == AArch64ISD::UUNPKLO) { | |||||
Not Done ReplyInline ActionsThis is not necessary because AArch64ISD::UUNPKLO only supports integer types. paulwalker-arm: This is not necessary because `AArch64ISD::UUNPKLO` only supports integer types. | |||||
MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N->getOperand(0)); | |||||
SDValue Mask = MLD->getMask(); | |||||
SDLoc DL(N); | |||||
if (MLD->isUnindexed() && MLD->getExtensionType() != ISD::SEXTLOAD && | |||||
SDValue(MLD, 0).hasOneUse() && Mask->getOpcode() == AArch64ISD::PTRUE) { | |||||
unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits(); | |||||
unsigned PgPattern = Mask->getConstantOperandVal(0); | |||||
EVT InVT = N->getValueType(0); | |||||
paulwalker-armUnsubmitted Not Done ReplyInline ActionsIs this not just VT? It's not related to any input from what I can see, or have a misunderstood the naming? paulwalker-arm: Is this not just VT? It's not related to any input from what I can see, or have a misunderstood… | |||||
// Ensure we can double the size of the predicate pattern | |||||
if (PgPattern != AArch64SVEPredPattern::all && | |||||
getNumElementsFromSVEPredPattern(PgPattern) * | |||||
Not Done ReplyInline ActionsI think you've confused extending with explanding? I don't think this is something we support for AArch64, which explains why most code in this file just omits this parameter. paulwalker-arm: I think you've confused extending with explanding? I don't think this is something we support… | |||||
paulwalker-armUnsubmitted Not Done ReplyInline ActionsgetNumElementsFromSVEPredPattern can return 0 for cases other than all. Perhaps unsigned NumElts = getNumElementsFromSVEPredPattern(PgPattern); if (NumElts && NumElts * InVT.getVectorElementType().getSizeInBits() <= MinSVESize) paulwalker-arm: `getNumElementsFromSVEPredPattern` can return 0 for cases other than `all`. Perhaps
```… | |||||
InVT.getVectorElementType().getSizeInBits() <= | |||||
MinSVESize) { | |||||
Mask = | |||||
getPTrue(DAG, DL, InVT.changeVectorElementType(MVT::i1), PgPattern); | |||||
SDValue NewLoad = DAG.getMaskedLoad( | |||||
N->getValueType(0), DL, MLD->getChain(), MLD->getBasePtr(), | |||||
MLD->getOffset(), Mask, MLD->getPassThru(), MLD->getMemoryVT(), | |||||
paulwalker-armUnsubmitted Not Done ReplyInline ActionsIs this correct? Given the new load has a different result type, I'd expect PassThru to also change. Speaking of which, I think the combine is only equivalent when the original PassThru is zero or undef and the new one is forced to zero? paulwalker-arm: Is this correct? Given the new load has a different result type, I'd expect `PassThru` to also… | |||||
MLD->getMemOperand(), MLD->getAddressingMode(), ISD::ZEXTLOAD); | |||||
DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), NewLoad.getValue(1)); | |||||
return NewLoad; | |||||
} | |||||
} | |||||
} | |||||
return SDValue(); | return SDValue(); | ||||
} | } | ||||
static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) { | static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) { | ||||
SDLoc DL(N); | SDLoc DL(N); | ||||
SDValue Op0 = N->getOperand(0); | SDValue Op0 = N->getOperand(0); | ||||
SDValue Op1 = N->getOperand(1); | SDValue Op1 = N->getOperand(1); | ||||
EVT ResVT = N->getValueType(0); | EVT ResVT = N->getValueType(0); | ||||
▲ Show 20 Lines • Show All 317 Lines • ▼ Show 20 Lines | if (Subtarget->supportsAddressTopByteIgnored() && | ||||
return SDValue(N, 0); | return SDValue(N, 0); | ||||
if (SDValue Store = foldTruncStoreOfExt(DAG, N)) | if (SDValue Store = foldTruncStoreOfExt(DAG, N)) | ||||
return Store; | return Store; | ||||
return SDValue(); | return SDValue(); | ||||
} | } | ||||
static SDValue performMSTORECombine(SDNode *N, | |||||
TargetLowering::DAGCombinerInfo &DCI, | |||||
SelectionDAG &DAG, | |||||
const AArch64Subtarget *Subtarget) { | |||||
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); | |||||
SDValue Value = MST->getValue(); | |||||
SDValue Mask = MST->getMask(); | |||||
SDLoc DL(N); | |||||
// If this is a UZP1 followed by a masked store, fold this into a masked | |||||
// truncating store. We can do this even if this is already a masked | |||||
// truncstore. | |||||
if (Value.getOpcode() == AArch64ISD::UZP1 && Value->hasOneUse() && | |||||
Value.getOperand(0) == Value.getOperand(1) && MST->isUnindexed() && | |||||
Mask->getOpcode() == AArch64ISD::PTRUE) { | |||||
Value = Value.getOperand(0); | |||||
if (Value.getOpcode() == ISD::BITCAST) { | |||||
unsigned MinSVESize = Subtarget->getMinSVEVectorSizeInBits(); | |||||
unsigned PgPattern = Mask->getConstantOperandVal(0); | |||||
Not Done ReplyInline ActionsNot quite sure but this feels too easy. What if you actually want to store the result of an UZP1? I think the current value of the predicate plays a key role here. The case you're trying to handle is when we're storing N elements where all those elements come from one side of an UZIP1. Not sure if the loads have the same issue but I figure it's safest for the two to have symmetry. paulwalker-arm: Not quite sure but this feels too easy. What if you actually want to store the result of an… | |||||
EVT InVT = Value.getOperand(0).getValueType(); | |||||
// Ensure we can double the size of the predicate pattern | |||||
Not Done ReplyInline ActionsWhilst this works, you don't need to restrict the combine like this. Here you're checking the stored elements all come from UZP1's first operand. You could do this using: ValueVT.isInteger() && ValueVT != MVT::nxv2i64) { EVT HalfVT = ValueVT.getHalfNumVectorElementsVT(*DAG.getContext()); EVT InVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext()); followed by your current NumElts check. Then just before the getMaskedStore() create a fresh bitcast (e.g. Value = bitcast(Value.getOperand(0) to InVT). This means you don't really care what UZP1's first operand is and you might catch more cases. paulwalker-arm: Whilst this works, you don't need to restrict the combine like this. Here you're checking the… | |||||
I think for now this change should be left to a separate ticket, if we ever encounter this causing an issue. The logic around this is already quite confusing and I worry a change like that would make this section of code very confusing, whilst possibly not benefiting anything realistic. bsmith: I think for now this change should be left to a separate ticket, if we ever encounter this… | |||||
if (PgPattern != AArch64SVEPredPattern::all && | |||||
getNumElementsFromSVEPredPattern(PgPattern) * | |||||
InVT.getVectorElementType().getSizeInBits() <= | |||||
MinSVESize) { | |||||
Mask = | |||||
getPTrue(DAG, DL, InVT.changeVectorElementType(MVT::i1), PgPattern); | |||||
return DAG.getMaskedStore(MST->getChain(), DL, Value.getOperand(0), | |||||
MST->getBasePtr(), MST->getOffset(), Mask, | |||||
MST->getMemoryVT(), MST->getMemOperand(), | |||||
MST->getAddressingMode(), | |||||
/*IsTruncating=*/true); | |||||
} | |||||
} | |||||
} | |||||
return SDValue(); | |||||
} | |||||
/// \return true if part of the index was folded into the Base. | /// \return true if part of the index was folded into the Base. | ||||
static bool foldIndexIntoBase(SDValue &BasePtr, SDValue &Index, SDValue Scale, | static bool foldIndexIntoBase(SDValue &BasePtr, SDValue &Index, SDValue Scale, | ||||
SDLoc DL, SelectionDAG &DAG) { | SDLoc DL, SelectionDAG &DAG) { | ||||
// This function assumes a vector of i64 indices. | // This function assumes a vector of i64 indices. | ||||
EVT IndexVT = Index.getValueType(); | EVT IndexVT = Index.getValueType(); | ||||
if (!IndexVT.isVector() || IndexVT.getVectorElementType() != MVT::i64) | if (!IndexVT.isVector() || IndexVT.getVectorElementType() != MVT::i64) | ||||
return false; | return false; | ||||
▲ Show 20 Lines • Show All 1,797 Lines • ▼ Show 20 Lines | SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, | ||||
case ISD::SETCC: | case ISD::SETCC: | ||||
return performSETCCCombine(N, DAG); | return performSETCCCombine(N, DAG); | ||||
case ISD::LOAD: | case ISD::LOAD: | ||||
if (performTBISimplification(N->getOperand(1), DCI, DAG)) | if (performTBISimplification(N->getOperand(1), DCI, DAG)) | ||||
return SDValue(N, 0); | return SDValue(N, 0); | ||||
break; | break; | ||||
case ISD::STORE: | case ISD::STORE: | ||||
return performSTORECombine(N, DCI, DAG, Subtarget); | return performSTORECombine(N, DCI, DAG, Subtarget); | ||||
case ISD::MSTORE: | |||||
return performMSTORECombine(N, DCI, DAG, Subtarget); | |||||
case ISD::MGATHER: | case ISD::MGATHER: | ||||
case ISD::MSCATTER: | case ISD::MSCATTER: | ||||
return performMaskedGatherScatterCombine(N, DCI, DAG); | return performMaskedGatherScatterCombine(N, DCI, DAG); | ||||
case ISD::VECTOR_SPLICE: | case ISD::VECTOR_SPLICE: | ||||
return performSVESpliceCombine(N, DAG); | return performSVESpliceCombine(N, DAG); | ||||
case ISD::FP_EXTEND: | case ISD::FP_EXTEND: | ||||
return performFPExtendCombine(N, DAG, DCI, Subtarget); | return performFPExtendCombine(N, DAG, DCI, Subtarget); | ||||
case AArch64ISD::BRCOND: | case AArch64ISD::BRCOND: | ||||
return performBRCONDCombine(N, DCI, DAG); | return performBRCONDCombine(N, DCI, DAG); | ||||
case AArch64ISD::TBNZ: | case AArch64ISD::TBNZ: | ||||
case AArch64ISD::TBZ: | case AArch64ISD::TBZ: | ||||
return performTBZCombine(N, DCI, DAG); | return performTBZCombine(N, DCI, DAG); | ||||
case AArch64ISD::CSEL: | case AArch64ISD::CSEL: | ||||
return performCSELCombine(N, DCI, DAG); | return performCSELCombine(N, DCI, DAG); | ||||
case AArch64ISD::DUP: | case AArch64ISD::DUP: | ||||
return performDUPCombine(N, DCI); | return performDUPCombine(N, DCI); | ||||
case AArch64ISD::NVCAST: | case AArch64ISD::NVCAST: | ||||
return performNVCASTCombine(N); | return performNVCASTCombine(N); | ||||
case AArch64ISD::SPLICE: | case AArch64ISD::SPLICE: | ||||
return performSpliceCombine(N, DAG); | return performSpliceCombine(N, DAG); | ||||
case AArch64ISD::UUNPKLO: | case AArch64ISD::UUNPKLO: | ||||
case AArch64ISD::UUNPKHI: | case AArch64ISD::UUNPKHI: | ||||
return performUnpackCombine(N, DAG); | return performUnpackCombine(N, DAG, Subtarget); | ||||
case AArch64ISD::UZP1: | case AArch64ISD::UZP1: | ||||
return performUzpCombine(N, DAG); | return performUzpCombine(N, DAG); | ||||
case AArch64ISD::SETCC_MERGE_ZERO: | case AArch64ISD::SETCC_MERGE_ZERO: | ||||
return performSetccMergeZeroCombine(N, DCI); | return performSetccMergeZeroCombine(N, DCI); | ||||
case AArch64ISD::GLD1_MERGE_ZERO: | case AArch64ISD::GLD1_MERGE_ZERO: | ||||
case AArch64ISD::GLD1_SCALED_MERGE_ZERO: | case AArch64ISD::GLD1_SCALED_MERGE_ZERO: | ||||
case AArch64ISD::GLD1_UXTW_MERGE_ZERO: | case AArch64ISD::GLD1_UXTW_MERGE_ZERO: | ||||
case AArch64ISD::GLD1_SXTW_MERGE_ZERO: | case AArch64ISD::GLD1_SXTW_MERGE_ZERO: | ||||
▲ Show 20 Lines • Show All 2,360 Lines • Show Last 20 Lines |
Is this universally true across all predicate patterns? Given my comment below I'm wondering if you'll eventually just create fresh PTRUEs rather than have to answer this question.