diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -13978,7 +13978,11 @@ unsigned SrcEltSize = SrcVT.getScalarSizeInBits(); ElementCount SrcEC = SrcVT.getVectorElementCount(); - SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), SrcEC); + auto SrcEltTy = MVT::getIntegerVT(SrcEltSize * 2); + // Bail out if the source element type is not widened to a legal type. + if (!SrcEltTy.isValid()) + return SDValue(); + SrcVT = MVT::getVectorVT(SrcEltTy, SrcEC); SDLoc DL(N); Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src); diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll --- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -145,3 +145,408 @@ %r = sext <4 x i16> %v0 to <4 x i64> ret <4 x i64> %r } + + +define <32 x i8> @zext_v32i1(<32 x i1> %arg) { +; CHECK-LABEL: zext_v32i1: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: ldr w8, [sp, #64] +; CHECK-NEXT: ldr w9, [sp, #72] +; CHECK-NEXT: ldr w10, [sp, #80] +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ldr w11, [sp, #88] +; CHECK-NEXT: mov.b v0[1], w1 +; CHECK-NEXT: ldr w12, [sp, #96] +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: mov.b v0[2], w2 +; CHECK-NEXT: ldr w8, [sp, #104] +; CHECK-NEXT: mov.b v1[1], w9 +; CHECK-NEXT: mov.b v0[3], w3 +; CHECK-NEXT: ldr w13, [sp, #112] +; CHECK-NEXT: mov.b v1[2], w10 +; CHECK-NEXT: mov.b v0[4], w4 +; CHECK-NEXT: ldr w9, [sp, #120] +; CHECK-NEXT: mov.b v1[3], w11 +; CHECK-NEXT: ldr w11, [sp] +; CHECK-NEXT: mov.b v0[5], w5 +; CHECK-NEXT: mov.b v1[4], w12 +; CHECK-NEXT: ldr w12, [sp, #8] +; CHECK-NEXT: ldr w14, [sp, #128] +; CHECK-NEXT: mov.b v0[6], w6 +; CHECK-NEXT: mov.b v1[5], w8 +; CHECK-NEXT: ldr w8, [sp, #16] +; CHECK-NEXT: ldr w10, [sp, #136] +; CHECK-NEXT: mov.b v0[7], w7 +; CHECK-NEXT: mov.b v1[6], w13 +; CHECK-NEXT: ldr w13, [sp, #24] +; CHECK-NEXT: ldr w15, [sp, #144] +; CHECK-NEXT: mov.b v0[8], w11 +; CHECK-NEXT: mov.b v1[7], w9 +; CHECK-NEXT: ldr w9, [sp, #32] +; CHECK-NEXT: ldr w16, [sp, #152] +; CHECK-NEXT: mov.b v0[9], w12 +; CHECK-NEXT: ldr w11, [sp, #160] +; CHECK-NEXT: mov.b v0[10], w8 +; CHECK-NEXT: mov.b v1[8], w14 +; CHECK-NEXT: ldr w12, [sp, #168] +; CHECK-NEXT: mov.b v0[11], w13 +; CHECK-NEXT: ldr w14, [sp, #40] +; CHECK-NEXT: mov.b v1[9], w10 +; CHECK-NEXT: ldr w8, [sp, #176] +; CHECK-NEXT: mov.b v0[12], w9 +; CHECK-NEXT: ldr w9, [sp, #48] +; CHECK-NEXT: mov.b v1[10], w15 +; CHECK-NEXT: ldr w13, [sp, #184] +; CHECK-NEXT: ldr w10, [sp, #56] +; CHECK-NEXT: mov.b v1[11], w16 +; CHECK-NEXT: mov.b v1[12], w11 +; CHECK-NEXT: mov.b v0[13], w14 +; CHECK-NEXT: mov.b v1[13], w12 +; CHECK-NEXT: mov.b v0[14], w9 +; CHECK-NEXT: mov.b v1[14], w8 +; CHECK-NEXT: movi.16b v2, #1 +; CHECK-NEXT: mov.b v0[15], w10 +; CHECK-NEXT: mov.b v1[15], w13 +; CHECK-NEXT: and.16b v0, v0, v2 +; CHECK-NEXT: and.16b v1, v1, v2 +; CHECK-NEXT: ret + %res = zext <32 x i1> %arg to <32 x i8> + ret <32 x i8> %res +} + +define <32 x i8> @sext_v32i1(<32 x i1> %arg) { +; CHECK-LABEL: sext_v32i1: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: ldr w8, [sp, #64] +; CHECK-NEXT: ldr w9, [sp, #72] +; CHECK-NEXT: ldr w10, [sp, #80] +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ldr w11, [sp, #88] +; CHECK-NEXT: mov.b v0[1], w1 +; CHECK-NEXT: ldr w12, [sp, #96] +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: mov.b v0[2], w2 +; CHECK-NEXT: ldr w8, [sp, #104] +; CHECK-NEXT: mov.b v1[1], w9 +; CHECK-NEXT: mov.b v0[3], w3 +; CHECK-NEXT: ldr w13, [sp, #112] +; CHECK-NEXT: mov.b v1[2], w10 +; CHECK-NEXT: mov.b v0[4], w4 +; CHECK-NEXT: ldr w9, [sp, #120] +; CHECK-NEXT: mov.b v1[3], w11 +; CHECK-NEXT: ldr w11, [sp] +; CHECK-NEXT: mov.b v0[5], w5 +; CHECK-NEXT: mov.b v1[4], w12 +; CHECK-NEXT: ldr w12, [sp, #8] +; CHECK-NEXT: ldr w14, [sp, #128] +; CHECK-NEXT: mov.b v0[6], w6 +; CHECK-NEXT: mov.b v1[5], w8 +; CHECK-NEXT: ldr w8, [sp, #16] +; CHECK-NEXT: ldr w10, [sp, #136] +; CHECK-NEXT: mov.b v0[7], w7 +; CHECK-NEXT: mov.b v1[6], w13 +; CHECK-NEXT: ldr w13, [sp, #24] +; CHECK-NEXT: ldr w15, [sp, #144] +; CHECK-NEXT: mov.b v0[8], w11 +; CHECK-NEXT: mov.b v1[7], w9 +; CHECK-NEXT: ldr w9, [sp, #32] +; CHECK-NEXT: ldr w16, [sp, #152] +; CHECK-NEXT: mov.b v0[9], w12 +; CHECK-NEXT: ldr w11, [sp, #160] +; CHECK-NEXT: mov.b v0[10], w8 +; CHECK-NEXT: mov.b v1[8], w14 +; CHECK-NEXT: ldr w12, [sp, #168] +; CHECK-NEXT: mov.b v0[11], w13 +; CHECK-NEXT: ldr w14, [sp, #40] +; CHECK-NEXT: mov.b v1[9], w10 +; CHECK-NEXT: ldr w8, [sp, #176] +; CHECK-NEXT: mov.b v0[12], w9 +; CHECK-NEXT: ldr w9, [sp, #48] +; CHECK-NEXT: mov.b v1[10], w15 +; CHECK-NEXT: ldr w13, [sp, #184] +; CHECK-NEXT: ldr w10, [sp, #56] +; CHECK-NEXT: mov.b v1[11], w16 +; CHECK-NEXT: mov.b v1[12], w11 +; CHECK-NEXT: mov.b v0[13], w14 +; CHECK-NEXT: mov.b v1[13], w12 +; CHECK-NEXT: mov.b v0[14], w9 +; CHECK-NEXT: mov.b v1[14], w8 +; CHECK-NEXT: mov.b v0[15], w10 +; CHECK-NEXT: mov.b v1[15], w13 +; CHECK-NEXT: shl.16b v0, v0, #7 +; CHECK-NEXT: shl.16b v1, v1, #7 +; CHECK-NEXT: sshr.16b v0, v0, #7 +; CHECK-NEXT: sshr.16b v1, v1, #7 +; CHECK-NEXT: ret + %res = sext <32 x i1> %arg to <32 x i8> + ret <32 x i8> %res +} + +define <64 x i8> @zext_v64i1(<64 x i1> %arg) { +; CHECK-LABEL: zext_v64i1: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: ldr w11, [sp, #64] +; CHECK-NEXT: ldr w8, [sp, #320] +; CHECK-NEXT: ldr w10, [sp, #192] +; CHECK-NEXT: ldr w9, [sp, #328] +; CHECK-NEXT: ldr w12, [sp, #336] +; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: ldr w11, [sp, #200] +; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: ldr w8, [sp, #344] +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: fmov s2, w10 +; CHECK-NEXT: ldr w10, [sp, #352] +; CHECK-NEXT: mov.b v0[1], w1 +; CHECK-NEXT: mov.b v3[1], w9 +; CHECK-NEXT: ldr w9, [sp, #360] +; CHECK-NEXT: mov.b v0[2], w2 +; CHECK-NEXT: ldr w13, [sp, #72] +; CHECK-NEXT: mov.b v2[1], w11 +; CHECK-NEXT: ldr w11, [sp, #368] +; CHECK-NEXT: mov.b v3[2], w12 +; CHECK-NEXT: mov.b v0[3], w3 +; CHECK-NEXT: ldr w12, [sp, #376] +; CHECK-NEXT: mov.b v3[3], w8 +; CHECK-NEXT: mov.b v0[4], w4 +; CHECK-NEXT: mov.b v3[4], w10 +; CHECK-NEXT: ldr w10, [sp] +; CHECK-NEXT: mov.b v0[5], w5 +; CHECK-NEXT: ldr w14, [sp, #80] +; CHECK-NEXT: mov.b v3[5], w9 +; CHECK-NEXT: ldr w9, [sp, #8] +; CHECK-NEXT: mov.b v1[1], w13 +; CHECK-NEXT: ldr w13, [sp, #208] +; CHECK-NEXT: mov.b v0[6], w6 +; CHECK-NEXT: ldr w15, [sp, #88] +; CHECK-NEXT: mov.b v3[6], w11 +; CHECK-NEXT: ldr w11, [sp, #16] +; CHECK-NEXT: mov.b v0[7], w7 +; CHECK-NEXT: ldr w16, [sp, #96] +; CHECK-NEXT: mov.b v3[7], w12 +; CHECK-NEXT: ldr w12, [sp, #24] +; CHECK-NEXT: mov.b v0[8], w10 +; CHECK-NEXT: ldr w10, [sp, #104] +; CHECK-NEXT: mov.b v1[2], w14 +; CHECK-NEXT: ldr w14, [sp, #216] +; CHECK-NEXT: mov.b v0[9], w9 +; CHECK-NEXT: ldr w9, [sp, #112] +; CHECK-NEXT: mov.b v2[2], w13 +; CHECK-NEXT: ldr w13, [sp, #384] +; CHECK-NEXT: mov.b v1[3], w15 +; CHECK-NEXT: ldr w15, [sp, #224] +; CHECK-NEXT: mov.b v0[10], w11 +; CHECK-NEXT: ldr w11, [sp, #120] +; CHECK-NEXT: mov.b v1[4], w16 +; CHECK-NEXT: ldr w16, [sp, #232] +; CHECK-NEXT: mov.b v0[11], w12 +; CHECK-NEXT: ldr w12, [sp, #128] +; CHECK-NEXT: mov.b v1[5], w10 +; CHECK-NEXT: ldr w10, [sp, #240] +; CHECK-NEXT: mov.b v2[3], w14 +; CHECK-NEXT: mov.b v1[6], w9 +; CHECK-NEXT: ldr w9, [sp, #248] +; CHECK-NEXT: ldr w8, [sp, #392] +; CHECK-NEXT: mov.b v2[4], w15 +; CHECK-NEXT: mov.b v1[7], w11 +; CHECK-NEXT: ldr w11, [sp, #256] +; CHECK-NEXT: mov.b v3[8], w13 +; CHECK-NEXT: ldr w13, [sp, #32] +; CHECK-NEXT: ldr w14, [sp, #400] +; CHECK-NEXT: mov.b v2[5], w16 +; CHECK-NEXT: mov.b v1[8], w12 +; CHECK-NEXT: ldr w12, [sp, #264] +; CHECK-NEXT: ldr w15, [sp, #408] +; CHECK-NEXT: mov.b v2[6], w10 +; CHECK-NEXT: mov.b v2[7], w9 +; CHECK-NEXT: mov.b v2[8], w11 +; CHECK-NEXT: mov.b v0[12], w13 +; CHECK-NEXT: ldr w13, [sp, #136] +; CHECK-NEXT: mov.b v3[9], w8 +; CHECK-NEXT: ldr w8, [sp, #40] +; CHECK-NEXT: mov.b v2[9], w12 +; CHECK-NEXT: ldr w12, [sp, #272] +; CHECK-NEXT: mov.b v3[10], w14 +; CHECK-NEXT: ldr w14, [sp, #48] +; CHECK-NEXT: mov.b v3[11], w15 +; CHECK-NEXT: ldr w15, [sp, #56] +; CHECK-NEXT: ldr w16, [sp, #416] +; CHECK-NEXT: mov.b v1[9], w13 +; CHECK-NEXT: ldr w13, [sp, #280] +; CHECK-NEXT: mov.b v0[13], w8 +; CHECK-NEXT: ldr w8, [sp, #144] +; CHECK-NEXT: mov.b v2[10], w12 +; CHECK-NEXT: ldr w12, [sp, #288] +; CHECK-NEXT: mov.b v0[14], w14 +; CHECK-NEXT: ldr w14, [sp, #152] +; CHECK-NEXT: mov.b v0[15], w15 +; CHECK-NEXT: ldr w15, [sp, #160] +; CHECK-NEXT: ldr w10, [sp, #424] +; CHECK-NEXT: mov.b v1[10], w8 +; CHECK-NEXT: ldr w8, [sp, #296] +; CHECK-NEXT: mov.b v2[11], w13 +; CHECK-NEXT: mov.b v3[12], w16 +; CHECK-NEXT: ldr w16, [sp, #168] +; CHECK-NEXT: ldr w9, [sp, #432] +; CHECK-NEXT: ldr w13, [sp, #304] +; CHECK-NEXT: mov.b v1[11], w14 +; CHECK-NEXT: mov.b v2[12], w12 +; CHECK-NEXT: ldr w12, [sp, #176] +; CHECK-NEXT: ldr w11, [sp, #440] +; CHECK-NEXT: ldr w14, [sp, #312] +; CHECK-NEXT: mov.b v1[12], w15 +; CHECK-NEXT: ldr w15, [sp, #184] +; CHECK-NEXT: mov.b v3[13], w10 +; CHECK-NEXT: mov.b v2[13], w8 +; CHECK-NEXT: mov.b v1[13], w16 +; CHECK-NEXT: mov.b v3[14], w9 +; CHECK-NEXT: mov.b v2[14], w13 +; CHECK-NEXT: mov.b v1[14], w12 +; CHECK-NEXT: movi.16b v4, #1 +; CHECK-NEXT: mov.b v3[15], w11 +; CHECK-NEXT: mov.b v2[15], w14 +; CHECK-NEXT: mov.b v1[15], w15 +; CHECK-NEXT: and.16b v0, v0, v4 +; CHECK-NEXT: and.16b v1, v1, v4 +; CHECK-NEXT: and.16b v2, v2, v4 +; CHECK-NEXT: and.16b v3, v3, v4 +; CHECK-NEXT: ret + %res = zext <64 x i1> %arg to <64 x i8> + ret <64 x i8> %res +} + +define <64 x i8> @sext_v64i1(<64 x i1> %arg) { +; CHECK-LABEL: sext_v64i1: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: ldr w11, [sp, #64] +; CHECK-NEXT: ldr w8, [sp, #320] +; CHECK-NEXT: ldr w10, [sp, #192] +; CHECK-NEXT: ldr w9, [sp, #328] +; CHECK-NEXT: ldr w12, [sp, #336] +; CHECK-NEXT: fmov s3, w11 +; CHECK-NEXT: ldr w11, [sp, #200] +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: ldr w8, [sp, #344] +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: fmov s2, w10 +; CHECK-NEXT: ldr w10, [sp, #352] +; CHECK-NEXT: mov.b v0[1], w1 +; CHECK-NEXT: mov.b v1[1], w9 +; CHECK-NEXT: ldr w9, [sp, #360] +; CHECK-NEXT: mov.b v0[2], w2 +; CHECK-NEXT: ldr w13, [sp, #72] +; CHECK-NEXT: mov.b v2[1], w11 +; CHECK-NEXT: ldr w11, [sp, #368] +; CHECK-NEXT: mov.b v1[2], w12 +; CHECK-NEXT: mov.b v0[3], w3 +; CHECK-NEXT: ldr w12, [sp, #376] +; CHECK-NEXT: mov.b v1[3], w8 +; CHECK-NEXT: mov.b v0[4], w4 +; CHECK-NEXT: mov.b v1[4], w10 +; CHECK-NEXT: ldr w10, [sp] +; CHECK-NEXT: mov.b v0[5], w5 +; CHECK-NEXT: ldr w14, [sp, #80] +; CHECK-NEXT: mov.b v1[5], w9 +; CHECK-NEXT: ldr w9, [sp, #8] +; CHECK-NEXT: mov.b v3[1], w13 +; CHECK-NEXT: ldr w13, [sp, #208] +; CHECK-NEXT: mov.b v0[6], w6 +; CHECK-NEXT: ldr w15, [sp, #88] +; CHECK-NEXT: mov.b v1[6], w11 +; CHECK-NEXT: ldr w11, [sp, #16] +; CHECK-NEXT: mov.b v0[7], w7 +; CHECK-NEXT: ldr w16, [sp, #96] +; CHECK-NEXT: mov.b v1[7], w12 +; CHECK-NEXT: ldr w12, [sp, #24] +; CHECK-NEXT: mov.b v0[8], w10 +; CHECK-NEXT: ldr w10, [sp, #104] +; CHECK-NEXT: mov.b v3[2], w14 +; CHECK-NEXT: ldr w14, [sp, #216] +; CHECK-NEXT: mov.b v0[9], w9 +; CHECK-NEXT: ldr w9, [sp, #112] +; CHECK-NEXT: mov.b v2[2], w13 +; CHECK-NEXT: ldr w13, [sp, #384] +; CHECK-NEXT: mov.b v3[3], w15 +; CHECK-NEXT: ldr w15, [sp, #224] +; CHECK-NEXT: mov.b v0[10], w11 +; CHECK-NEXT: ldr w11, [sp, #120] +; CHECK-NEXT: mov.b v3[4], w16 +; CHECK-NEXT: ldr w16, [sp, #232] +; CHECK-NEXT: mov.b v0[11], w12 +; CHECK-NEXT: ldr w12, [sp, #128] +; CHECK-NEXT: mov.b v3[5], w10 +; CHECK-NEXT: ldr w10, [sp, #240] +; CHECK-NEXT: mov.b v2[3], w14 +; CHECK-NEXT: mov.b v3[6], w9 +; CHECK-NEXT: ldr w9, [sp, #248] +; CHECK-NEXT: ldr w8, [sp, #392] +; CHECK-NEXT: mov.b v2[4], w15 +; CHECK-NEXT: mov.b v3[7], w11 +; CHECK-NEXT: ldr w11, [sp, #256] +; CHECK-NEXT: mov.b v1[8], w13 +; CHECK-NEXT: ldr w13, [sp, #32] +; CHECK-NEXT: ldr w14, [sp, #400] +; CHECK-NEXT: mov.b v2[5], w16 +; CHECK-NEXT: mov.b v3[8], w12 +; CHECK-NEXT: ldr w12, [sp, #264] +; CHECK-NEXT: ldr w15, [sp, #408] +; CHECK-NEXT: mov.b v2[6], w10 +; CHECK-NEXT: mov.b v2[7], w9 +; CHECK-NEXT: mov.b v2[8], w11 +; CHECK-NEXT: mov.b v0[12], w13 +; CHECK-NEXT: ldr w13, [sp, #136] +; CHECK-NEXT: mov.b v1[9], w8 +; CHECK-NEXT: ldr w8, [sp, #40] +; CHECK-NEXT: mov.b v2[9], w12 +; CHECK-NEXT: ldr w12, [sp, #272] +; CHECK-NEXT: mov.b v1[10], w14 +; CHECK-NEXT: ldr w14, [sp, #48] +; CHECK-NEXT: mov.b v1[11], w15 +; CHECK-NEXT: ldr w15, [sp, #56] +; CHECK-NEXT: ldr w16, [sp, #416] +; CHECK-NEXT: mov.b v3[9], w13 +; CHECK-NEXT: ldr w13, [sp, #280] +; CHECK-NEXT: mov.b v0[13], w8 +; CHECK-NEXT: ldr w8, [sp, #144] +; CHECK-NEXT: mov.b v2[10], w12 +; CHECK-NEXT: ldr w12, [sp, #288] +; CHECK-NEXT: mov.b v0[14], w14 +; CHECK-NEXT: ldr w14, [sp, #152] +; CHECK-NEXT: mov.b v0[15], w15 +; CHECK-NEXT: ldr w15, [sp, #160] +; CHECK-NEXT: ldr w10, [sp, #424] +; CHECK-NEXT: mov.b v3[10], w8 +; CHECK-NEXT: ldr w8, [sp, #296] +; CHECK-NEXT: mov.b v2[11], w13 +; CHECK-NEXT: mov.b v1[12], w16 +; CHECK-NEXT: ldr w16, [sp, #168] +; CHECK-NEXT: ldr w9, [sp, #432] +; CHECK-NEXT: ldr w13, [sp, #304] +; CHECK-NEXT: mov.b v3[11], w14 +; CHECK-NEXT: mov.b v2[12], w12 +; CHECK-NEXT: ldr w12, [sp, #176] +; CHECK-NEXT: ldr w11, [sp, #440] +; CHECK-NEXT: ldr w14, [sp, #312] +; CHECK-NEXT: mov.b v3[12], w15 +; CHECK-NEXT: ldr w15, [sp, #184] +; CHECK-NEXT: mov.b v1[13], w10 +; CHECK-NEXT: mov.b v2[13], w8 +; CHECK-NEXT: mov.b v3[13], w16 +; CHECK-NEXT: mov.b v1[14], w9 +; CHECK-NEXT: mov.b v2[14], w13 +; CHECK-NEXT: mov.b v3[14], w12 +; CHECK-NEXT: mov.b v1[15], w11 +; CHECK-NEXT: mov.b v2[15], w14 +; CHECK-NEXT: mov.b v3[15], w15 +; CHECK-NEXT: shl.16b v0, v0, #7 +; CHECK-NEXT: shl.16b v3, v3, #7 +; CHECK-NEXT: shl.16b v2, v2, #7 +; CHECK-NEXT: shl.16b v4, v1, #7 +; CHECK-NEXT: sshr.16b v0, v0, #7 +; CHECK-NEXT: sshr.16b v1, v3, #7 +; CHECK-NEXT: sshr.16b v2, v2, #7 +; CHECK-NEXT: sshr.16b v3, v4, #7 +; CHECK-NEXT: ret + %res = sext <64 x i1> %arg to <64 x i8> + ret <64 x i8> %res +}