Index: llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -2913,49 +2913,6 @@ return; break; - case ISD::EXTRACT_VECTOR_ELT: { - // Extracting lane zero is a special case where we can just use a plain - // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for - // the rest of the compiler, especially the register allocator and copyi - // propagation, to reason about, so is preferred when it's possible to - // use it. - ConstantSDNode *LaneNode = cast(Node->getOperand(1)); - // Bail and use the default Select() for non-zero lanes. - if (LaneNode->getZExtValue() != 0) - break; - // If the element type is not the same as the result type, likewise - // bail and use the default Select(), as there's more to do than just - // a cross-class COPY. This catches extracts of i8 and i16 elements - // since they will need an explicit zext. - if (VT != Node->getOperand(0).getValueType().getVectorElementType()) - break; - unsigned SubReg; - switch (Node->getOperand(0) - .getValueType() - .getVectorElementType() - .getSizeInBits()) { - default: - llvm_unreachable("Unexpected vector element type!"); - case 64: - SubReg = AArch64::dsub; - break; - case 32: - SubReg = AArch64::ssub; - break; - case 16: - SubReg = AArch64::hsub; - break; - case 8: - llvm_unreachable("unexpected zext-requiring extract element!"); - } - SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT, - Node->getOperand(0)); - LLVM_DEBUG(dbgs() << "ISEL: Custom selection!\n=> "); - LLVM_DEBUG(Extract->dumpr(CurDAG)); - LLVM_DEBUG(dbgs() << "\n"); - ReplaceNode(Node, Extract.getNode()); - return; - } case ISD::Constant: { // Materialize zero constants as copies from WZR/XZR. This allows // the coalescer to propagate these into other instructions. Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -6973,5 +6973,21 @@ def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>; def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>; +def SDT_ExtractVectorElt : SDTypeProfile<1, 2, [SDTCisVec<1>, + SDTCisVT<2, i64>]>; +def kextract : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDT_ExtractVectorElt>; + +// Extracting lane zero is a special case where we can just use a plain +// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the +// rest of the compiler, especially the register allocator and copy propagation, +// to reason about, so is preferred when it's possible to use it. +let AddedComplexity = 10 in { + def : Pat<(i64 (kextract (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>; + def : Pat<(i32 (kextract (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>; + def : Pat<(i16 (kextract (v8i16 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, hsub)>; + def : Pat<(i32 (kextract (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>; + def : Pat<(i16 (kextract (v4i16 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, hsub)>; +} + include "AArch64InstrAtomics.td" include "AArch64SVEInstrInfo.td"