diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -13939,78 +13939,7 @@ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD); } - - // This is effectively a custom type legalization for AArch64. - // - // Type legalization will split an extend of a small, legal, type to a larger - // illegal type by first splitting the destination type, often creating - // illegal source types, which then get legalized in isel-confusing ways, - // leading to really terrible codegen. E.g., - // %result = v8i32 sext v8i8 %value - // becomes - // %losrc = extract_subreg %value, ... - // %hisrc = extract_subreg %value, ... - // %lo = v4i32 sext v4i8 %losrc - // %hi = v4i32 sext v4i8 %hisrc - // Things go rapidly downhill from there. - // - // For AArch64, the [sz]ext vector instructions can only go up one element - // size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32 - // take two instructions. - // - // This implies that the most efficient way to do the extend from v8i8 - // to two v4i32 values is to first extend the v8i8 to v8i16, then do - // the normal splitting to happen for the v8i16->v8i32. - - // This is pre-legalization to catch some cases where the default - // type legalization will create ill-tempered code. - if (!DCI.isBeforeLegalizeOps()) - return SDValue(); - - // We're only interested in cleaning things up for non-legal vector types - // here. If both the source and destination are legal, things will just - // work naturally without any fiddling. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT ResVT = N->getValueType(0); - if (!ResVT.isVector() || TLI.isTypeLegal(ResVT)) - return SDValue(); - // If the vector type isn't a simple VT, it's beyond the scope of what - // we're worried about here. Let legalization do its thing and hope for - // the best. - SDValue Src = N->getOperand(0); - EVT SrcVT = Src->getValueType(0); - if (!ResVT.isSimple() || !SrcVT.isSimple()) - return SDValue(); - - // If the source VT is a 64-bit fixed or scalable vector, we can play games - // and get the better results we want. - if (SrcVT.getSizeInBits().getKnownMinSize() != 64) - return SDValue(); - - unsigned SrcEltSize = SrcVT.getScalarSizeInBits(); - ElementCount SrcEC = SrcVT.getVectorElementCount(); - SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), SrcEC); - SDLoc DL(N); - Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src); - - // Now split the rest of the operation into two halves, each with a 64 - // bit source. - EVT LoVT, HiVT; - SDValue Lo, Hi; - LoVT = HiVT = ResVT.getHalfNumVectorElementsVT(*DAG.getContext()); - - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(), - LoVT.getVectorElementCount()); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src, - DAG.getConstant(0, DL, MVT::i64)); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src, - DAG.getConstant(InNVT.getVectorMinNumElements(), DL, MVT::i64)); - Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo); - Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi); - - // Now combine the parts back together so we still have a single result - // like the combiner expects. - return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); + return SDValue(); } static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll --- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -202,3 +202,67 @@ %r = sext <8 x i8> %v0 to <8 x i64> ret <8 x i64> %r } + +; Extends of vectors of i1. + +define <32 x i8> @zext_v32i1(<32 x i1> %arg) { +; CHECK-LABEL: zext_v32i1: +; CHECK: and.16b v0, v0, v2 +; CHECK-NEXT: and.16b v1, v1, v2 +; CHECK-NEXT: ret + %res = zext <32 x i1> %arg to <32 x i8> + ret <32 x i8> %res +} + +define <32 x i8> @sext_v32i1(<32 x i1> %arg) { +; CHECK-LABEL: sext_v32i1: +; CHECK: shl.16b v0, v0, #7 +; CHECK-NEXT: shl.16b v1, v1, #7 +; CHECK-NEXT: sshr.16b v0, v0, #7 +; CHECK-NEXT: sshr.16b v1, v1, #7 +; CHECK-NEXT: ret +; + %res = sext <32 x i1> %arg to <32 x i8> + ret <32 x i8> %res +} + +define <64 x i8> @zext_v64i1(<64 x i1> %arg) { +; CHECK-LABEL: zext_v64i1: +; CHECK: and.16b v0, v0, [[V4:v.+]] +; CHECK-NEXT: and.16b v1, v1, [[V4]] +; CHECK-NEXT: and.16b v2, v2, [[V4]] +; CHECK-NEXT: and.16b v3, v3, [[V4]] +; CHECK-NEXT: ret +; + %res = zext <64 x i1> %arg to <64 x i8> + ret <64 x i8> %res +} + +define <64 x i8> @sext_v64i1(<64 x i1> %arg) { +; CHECK-LABEL: sext_v64i1: +; CHECK: shl.16b v0, v0, #7 +; CHECK-NEXT: shl.16b v3, v3, #7 +; CHECK-NEXT: shl.16b v2, v2, #7 +; CHECK-NEXT: shl.16b [[V4:v.+]], v1, #7 +; CHECK-NEXT: sshr.16b v0, v0, #7 +; CHECK-NEXT: sshr.16b v1, v3, #7 +; CHECK-NEXT: sshr.16b v2, v2, #7 +; CHECK-NEXT: sshr.16b v3, [[V4]], #7 +; CHECK-NEXT: ret +; + %res = sext <64 x i1> %arg to <64 x i8> + ret <64 x i8> %res +} + +define <1 x i128> @sext_v1x64(<1 x i64> %arg) { +; CHECK-LABEL: sext_v1x64: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: asr x1, x8, #63 +; CHECK-NEXT: mov.d v0[1], x1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +; + %res = sext <1 x i64> %arg to <1 x i128> + ret <1 x i128> %res +}