Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -855,6 +855,7 @@ SmallVectorImpl &InVals, bool isThisReturn, SDValue ThisVal) const; + SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1125,6 +1125,9 @@ setOperationAction(ISD::VSCALE, MVT::i32, Custom); setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom); + + setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom); + setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom); } if (Subtarget->hasSVE()) { @@ -4472,6 +4475,39 @@ return SDValue(); } +// Custom lowering for extending v4i8 vector loads. +SDValue AArch64TargetLowering::LowerLOAD(SDValue Op, + SelectionDAG &DAG) const { + SDLoc Dl(Op); + LoadSDNode *LoadNode = cast(Op); + assert(LoadNode && "Expected custom lowering of a load node"); + + if (LoadNode->getMemoryVT() != MVT::v4i8) + return SDValue(); + + unsigned ExtType; + if (LoadNode->getExtensionType() == ISD::SEXTLOAD) + ExtType = ISD::SIGN_EXTEND; + else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD) + ExtType = ISD::ZERO_EXTEND; + else + return SDValue(); + + SDValue Load = DAG.getLoad(MVT::f32, Dl, DAG.getEntryNode(), + LoadNode->getBasePtr(), + MachinePointerInfo()); + SDValue Chain = Load.getValue(1); + SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f32, Load); + SDValue BC = DAG.getNode(ISD::BITCAST, Dl, MVT::v8i8, Vec); + SDValue Ext = DAG.getNode(ExtType, Dl, MVT::v8i16, BC, + DAG.getValueType(MVT::v8i8)); + Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl, MVT::v4i16, Ext, + DAG.getConstant(0, Dl, MVT::i64)); + Ext = DAG.getNode(ISD::SIGN_EXTEND, Dl, MVT::v4i32, Ext, + DAG.getValueType(MVT::v4i16)); + return DAG.getMergeValues({Ext, Chain}, Dl); +} + // Generate SUBS and CSEL for integer abs. SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op.getSimpleValueType(); @@ -4715,7 +4751,7 @@ case ISD::LOAD: if (useSVEForFixedLengthVectorVT(Op.getValueType())) return LowerFixedLengthVectorLoadToSVE(Op, DAG); - llvm_unreachable("Unexpected request to lower ISD::LOAD"); + return LowerLOAD(Op, DAG); case ISD::ADD: return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED); case ISD::AND: Index: llvm/test/CodeGen/AArch64/neon-extload.ll =================================================================== --- llvm/test/CodeGen/AArch64/neon-extload.ll +++ llvm/test/CodeGen/AArch64/neon-extload.ll @@ -5,26 +5,17 @@ define <4 x i32> @fsext(<4 x i8>* %a) { ; LE-LABEL: fsext: ; LE: // %bb.0: -; LE-NEXT: ldrsb w8, [x0] -; LE-NEXT: ldrsb w9, [x0, #1] -; LE-NEXT: ldrsb w10, [x0, #2] -; LE-NEXT: ldrsb w11, [x0, #3] -; LE-NEXT: fmov s0, w8 -; LE-NEXT: mov v0.s[1], w9 -; LE-NEXT: mov v0.s[2], w10 -; LE-NEXT: mov v0.s[3], w11 +; LE-NEXT: ldr s0, [x0] +; LE-NEXT: sshll v0.8h, v0.8b, #0 +; LE-NEXT: sshll v0.4s, v0.4h, #0 ; LE-NEXT: ret ; ; BE-LABEL: fsext: ; BE: // %bb.0: -; BE-NEXT: ldrsb w8, [x0] -; BE-NEXT: ldrsb w9, [x0, #1] -; BE-NEXT: ldrsb w10, [x0, #2] -; BE-NEXT: ldrsb w11, [x0, #3] -; BE-NEXT: fmov s0, w8 -; BE-NEXT: mov v0.s[1], w9 -; BE-NEXT: mov v0.s[2], w10 -; BE-NEXT: mov v0.s[3], w11 +; BE-NEXT: ldr s0, [x0] +; BE-NEXT: rev32 v0.8b, v0.8b +; BE-NEXT: sshll v0.8h, v0.8b, #0 +; BE-NEXT: sshll v0.4s, v0.4h, #0 ; BE-NEXT: rev64 v0.4s, v0.4s ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; BE-NEXT: ret @@ -36,26 +27,17 @@ define <4 x i32> @fzext(<4 x i8>* %a) { ; LE-LABEL: fzext: ; LE: // %bb.0: -; LE-NEXT: ldrb w8, [x0] -; LE-NEXT: ldrb w9, [x0, #1] -; LE-NEXT: ldrb w10, [x0, #2] -; LE-NEXT: ldrb w11, [x0, #3] -; LE-NEXT: fmov s0, w8 -; LE-NEXT: mov v0.s[1], w9 -; LE-NEXT: mov v0.s[2], w10 -; LE-NEXT: mov v0.s[3], w11 +; LE-NEXT: ldr s0, [x0] +; LE-NEXT: ushll v0.8h, v0.8b, #0 +; LE-NEXT: ushll v0.4s, v0.4h, #0 ; LE-NEXT: ret ; ; BE-LABEL: fzext: ; BE: // %bb.0: -; BE-NEXT: ldrb w8, [x0] -; BE-NEXT: ldrb w9, [x0, #1] -; BE-NEXT: ldrb w10, [x0, #2] -; BE-NEXT: ldrb w11, [x0, #3] -; BE-NEXT: fmov s0, w8 -; BE-NEXT: mov v0.s[1], w9 -; BE-NEXT: mov v0.s[2], w10 -; BE-NEXT: mov v0.s[3], w11 +; BE-NEXT: ldr s0, [x0] +; BE-NEXT: rev32 v0.8b, v0.8b +; BE-NEXT: ushll v0.8h, v0.8b, #0 +; BE-NEXT: ushll v0.4s, v0.4h, #0 ; BE-NEXT: rev64 v0.4s, v0.4s ; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; BE-NEXT: ret