diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -375,6 +375,7 @@ LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI); LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI); LegalizeResult lowerFunnelShift(MachineInstr &MI); + LegalizeResult lowerEXT(MachineInstr &MI); LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI); LegalizeResult lowerRotate(MachineInstr &MI); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3601,6 +3601,10 @@ return lowerMemCpyFamily(MI); case G_MEMCPY_INLINE: return lowerMemcpyInline(MI); + case G_ZEXT: + case G_SEXT: + case G_ANYEXT: + return lowerEXT(MI); GISEL_VECREDUCE_CASES_NONSEQ return lowerVectorReduction(MI); } @@ -5955,6 +5959,48 @@ return Result; } +LegalizerHelper::LegalizeResult LegalizerHelper::lowerEXT(MachineInstr &MI) { + auto [Dst, Src] = MI.getFirst2Regs(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + uint32_t DstTySize = DstTy.getSizeInBits(); + uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits(); + uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits(); + + if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) || + !isPowerOf2_32(SrcTyScalarSize)) + return UnableToLegalize; + + // The step between extend is too large, split it by creating an intermediate + // extend instruction + if (SrcTyScalarSize * 2 < DstTyScalarSize) { + LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2); + // If the destination type is illegal, split it into multiple statements + // zext x -> zext(merge(zext(unmerge), zext(unmerge))) + auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src}); + // Unmerge the vector + LLT EltTy = MidTy.changeElementCount( + MidTy.getElementCount().divideCoefficientBy(2)); + auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt); + + // ZExt the vectors + LLT ZExtResTy = DstTy.changeElementCount( + DstTy.getElementCount().divideCoefficientBy(2)); + auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy}, + {UnmergeSrc.getReg(0)}); + auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy}, + {UnmergeSrc.getReg(1)}); + + // Merge the ending vectors + MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2}); + + MI.eraseFromParent(); + return Legalized; + } + return UnableToLegalize; +} + LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) { auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs(); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -491,14 +491,13 @@ auto ExtLegalFunc = [=](const LegalityQuery &Query) { unsigned DstSize = Query.Types[0].getSizeInBits(); - if (DstSize == 128 && !Query.Types[0].isVector()) - return false; // Extending to a scalar s128 needs narrowing. - - // Make sure that we have something that will fit in a register, and - // make sure it's a power of 2. - if (DstSize < 8 || DstSize > 128 || !isPowerOf2_32(DstSize)) + // Handle legal vectors using legalFor + if (Query.Types[0].isVector()) return false; + if (DstSize < 8 || DstSize >= 128 || !isPowerOf2_32(DstSize)) + return false; // Extending to a scalar s128 needs narrowing. + const LLT &SrcTy = Query.Types[1]; // Make sure we fit in a register otherwise. Don't bother checking that @@ -512,7 +511,20 @@ }; getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}) .legalIf(ExtLegalFunc) - .clampScalar(0, s64, s64); // Just for s128, others are handled above. + .legalFor({{v2s64, v2s32}, {v4s32, v4s16}, {v8s16, v8s8}}) + .clampScalar(0, s64, s64) // Just for s128, others are handled above. + .moreElementsToNextPow2(1) + .clampMaxNumElements(1, s8, 8) + .clampMaxNumElements(1, s16, 4) + .clampMaxNumElements(1, s32, 2) + // Tries to convert a large EXTEND into two smaller EXTENDs + .lowerIf([=](const LegalityQuery &Query) { + return (Query.Types[0].getScalarSizeInBits() > + Query.Types[1].getScalarSizeInBits() * 2) && + Query.Types[0].isVector() && + (Query.Types[1].getScalarSizeInBits() == 8 || + Query.Types[1].getScalarSizeInBits() == 16); + }); getActionDefinitionsBuilder(G_TRUNC) .minScalarOrEltIf( diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir @@ -243,15 +243,15 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY $b1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY $b2 - ; CHECK-NEXT: [[ANYEXT0:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY1]](s8) ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY2]](s8) - ; CHECK-NEXT: [[IMPLICIT_DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT0]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[IMPLICIT_DEF]](s16) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[DEF]](s16) ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[BUILD_VECTOR]], [[BUILD_VECTOR]] - ; CHECK-NEXT: [[VAL0:%[0-9]+]]:_(s16), [[VAL1:%[0-9]+]]:_(s16), [[VAL2:%[0-9]+]]:_(s16), [[VAL3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[AND]](<4 x s16>) - ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[VAL0]](s16) - ; CHECK-NEXT: $b0 = COPY [[TRUNC3]](s8) + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[AND]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) + ; CHECK-NEXT: $b0 = COPY [[TRUNC]](s8) ; CHECK-NEXT: RET_ReallyLR implicit $b0 %1:_(s8) = COPY $b0 %2:_(s8) = COPY $b1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir @@ -129,8 +129,8 @@ %0:_(s16) = COPY $h0 %1:_(s16) = COPY $h1 %2:_(<2 x s16>) = G_BUILD_VECTOR %0(s16), %1(s16) - %ext:_(<2 x s32>) = G_ANYEXT %2(<2 x s16>) - $d0 = COPY %ext(<2 x s32>) + %3:_(<2 x s32>) = G_ANYEXT %2(<2 x s16>) + $d0 = COPY %3(<2 x s32>) RET_ReallyLR ... @@ -141,14 +141,14 @@ ; CHECK-LABEL: name: widen_v2s8 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[DEF]](s32) - ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<2 x s32>) + ; CHECK-NEXT: %3:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[DEF]](s32) + ; CHECK-NEXT: $d0 = COPY %3(<2 x s32>) ; CHECK-NEXT: RET_ReallyLR %0:_(s8) = G_IMPLICIT_DEF %1:_(s8) = G_IMPLICIT_DEF %2:_(<2 x s8>) = G_BUILD_VECTOR %0(s8), %1(s8) - %ext:_(<2 x s32>) = G_ANYEXT %2(<2 x s8>) - $d0 = COPY %ext(<2 x s32>) + %3:_(<2 x s32>) = G_ANYEXT %2(<2 x s8>) + $d0 = COPY %3(<2 x s32>) RET_ReallyLR ... @@ -169,7 +169,7 @@ %2:_(s8) = G_IMPLICIT_DEF %3:_(s8) = G_IMPLICIT_DEF %4:_(<4 x s8>) = G_BUILD_VECTOR %0(s8), %1(s8), %2(s8), %3(s8) - %ext:_(<4 x s16>) = G_ANYEXT %4(<4 x s8>) - $d0 = COPY %ext(<4 x s16>) + %5:_(<4 x s16>) = G_ANYEXT %4(<4 x s8>) + $d0 = COPY %5(<4 x s16>) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-itofp.mir @@ -274,13 +274,12 @@ ; CHECK-LABEL: name: test_uitofp_v2s64_v2i1 ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[DEF]](s32) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<2 x s64>) = G_ANYEXT [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[ANYEXT]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[DEF]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[DEF]](s64), [[COPY]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[BUILD_VECTOR1]], [[BUILD_VECTOR]] ; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(<2 x s64>) = G_UITOFP [[AND]](<2 x s64>) ; CHECK-NEXT: $q0 = COPY [[UITOFP]](<2 x s64>) %0:_(<2 x s1>) = G_IMPLICIT_DEF @@ -296,11 +295,10 @@ ; CHECK-LABEL: name: test_sitofp_v2s64_v2i1 ; CHECK: liveins: $q0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[DEF]](s32) - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<2 x s64>) = G_ANYEXT [[BUILD_VECTOR]](<2 x s32>) - ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<2 x s64>) = G_SEXT_INREG [[ANYEXT]], 1 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY [[DEF]](s64) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[DEF]](s64), [[COPY]](s64) + ; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(<2 x s64>) = G_SEXT_INREG [[BUILD_VECTOR]], 1 ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:_(<2 x s64>) = G_SITOFP [[SEXT_INREG]](<2 x s64>) ; CHECK-NEXT: $q0 = COPY [[SITOFP]](<2 x s64>) %0:_(<2 x s1>) = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll --- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll @@ -69,14 +69,40 @@ declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) define i32 @oversized_ADDV_256(ptr noalias nocapture readonly %arg1, ptr noalias nocapture readonly %arg2) { -; CHECK-LABEL: oversized_ADDV_256: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uabdl v0.8h, v0.8b, v1.8b -; CHECK-NEXT: uaddlv s0, v0.8h -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; SDAG-LABEL: oversized_ADDV_256: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: ldr d0, [x0] +; SDAG-NEXT: ldr d1, [x1] +; SDAG-NEXT: uabdl v0.8h, v0.8b, v1.8b +; SDAG-NEXT: uaddlv s0, v0.8h +; SDAG-NEXT: fmov w0, s0 +; SDAG-NEXT: ret +; +; GISEL-LABEL: oversized_ADDV_256: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: ldr d1, [x0] +; GISEL-NEXT: ldr d2, [x1] +; GISEL-NEXT: movi v0.2d, #0000000000000000 +; GISEL-NEXT: ushll v1.8h, v1.8b, #0 +; GISEL-NEXT: ushll v2.8h, v2.8b, #0 +; GISEL-NEXT: mov d3, v1.d[1] +; GISEL-NEXT: mov d4, v2.d[1] +; GISEL-NEXT: usubl v1.4s, v1.4h, v2.4h +; GISEL-NEXT: usubl v2.4s, v3.4h, v4.4h +; GISEL-NEXT: cmgt v3.4s, v0.4s, v1.4s +; GISEL-NEXT: neg v4.4s, v1.4s +; GISEL-NEXT: cmgt v0.4s, v0.4s, v2.4s +; GISEL-NEXT: shl v3.4s, v3.4s, #31 +; GISEL-NEXT: shl v0.4s, v0.4s, #31 +; GISEL-NEXT: neg v5.4s, v2.4s +; GISEL-NEXT: sshr v3.4s, v3.4s, #31 +; GISEL-NEXT: sshr v0.4s, v0.4s, #31 +; GISEL-NEXT: bit v1.16b, v4.16b, v3.16b +; GISEL-NEXT: bsl v0.16b, v5.16b, v2.16b +; GISEL-NEXT: add v0.4s, v1.4s, v0.4s +; GISEL-NEXT: addv s0, v0.4s +; GISEL-NEXT: fmov w0, s0 +; GISEL-NEXT: ret entry: %0 = load <8 x i8>, ptr %arg1, align 1 %1 = zext <8 x i8> %0 to <8 x i32> @@ -93,16 +119,16 @@ declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) define i32 @oversized_ADDV_512(ptr %arr) { -; SDAG-LABEL: oversized_ADDV_512: -; SDAG: // %bb.0: -; SDAG-NEXT: ldp q0, q1, [x0, #32] -; SDAG-NEXT: ldp q3, q2, [x0] -; SDAG-NEXT: add v0.4s, v3.4s, v0.4s -; SDAG-NEXT: add v1.4s, v2.4s, v1.4s -; SDAG-NEXT: add v0.4s, v0.4s, v1.4s -; SDAG-NEXT: addv s0, v0.4s -; SDAG-NEXT: fmov w0, s0 -; SDAG-NEXT: ret +; SDAG-LABEL: oversized_ADDV_512: +; SDAG: // %bb.0: +; SDAG-NEXT: ldp q0, q1, [x0, #32] +; SDAG-NEXT: ldp q3, q2, [x0] +; SDAG-NEXT: add v0.4s, v3.4s, v0.4s +; SDAG-NEXT: add v1.4s, v2.4s, v1.4s +; SDAG-NEXT: add v0.4s, v0.4s, v1.4s +; SDAG-NEXT: addv s0, v0.4s +; SDAG-NEXT: fmov w0, s0 +; SDAG-NEXT: ret ; ; GISEL-LABEL: oversized_ADDV_512: ; GISEL: // %bb.0: @@ -148,19 +174,19 @@ } define i32 @addv_combine_i32(<4 x i32> %a1, <4 x i32> %a2) { -; SDAG-LABEL: addv_combine_i32: -; SDAG: // %bb.0: // %entry -; SDAG-NEXT: add v0.4s, v0.4s, v1.4s -; SDAG-NEXT: addv s0, v0.4s -; SDAG-NEXT: fmov w0, s0 -; SDAG-NEXT: ret +; SDAG-LABEL: addv_combine_i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: add v0.4s, v0.4s, v1.4s +; SDAG-NEXT: addv s0, v0.4s +; SDAG-NEXT: fmov w0, s0 +; SDAG-NEXT: ret ; ; GISEL-LABEL: addv_combine_i32: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: addv s0, v0.4s -; GISEL-NEXT: addv s1, v1.4s -; GISEL-NEXT: fmov w8, s0 -; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: addv s0, v0.4s +; GISEL-NEXT: addv s1, v1.4s +; GISEL-NEXT: fmov w8, s0 +; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: add w0, w8, w9 ; GISEL-NEXT: ret entry: @@ -171,19 +197,19 @@ } define i64 @addv_combine_i64(<2 x i64> %a1, <2 x i64> %a2) { -; SDAG-LABEL: addv_combine_i64: -; SDAG: // %bb.0: // %entry -; SDAG-NEXT: add v0.2d, v0.2d, v1.2d -; SDAG-NEXT: addp d0, v0.2d -; SDAG-NEXT: fmov x0, d0 -; SDAG-NEXT: ret +; SDAG-LABEL: addv_combine_i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: add v0.2d, v0.2d, v1.2d +; SDAG-NEXT: addp d0, v0.2d +; SDAG-NEXT: fmov x0, d0 +; SDAG-NEXT: ret ; ; GISEL-LABEL: addv_combine_i64: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: addp d0, v0.2d -; GISEL-NEXT: addp d1, v1.2d -; GISEL-NEXT: fmov x8, d0 -; GISEL-NEXT: fmov x9, d1 +; GISEL-NEXT: addp d0, v0.2d +; GISEL-NEXT: addp d1, v1.2d +; GISEL-NEXT: fmov x8, d0 +; GISEL-NEXT: fmov x9, d1 ; GISEL-NEXT: add x0, x8, x9 ; GISEL-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll --- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -5,21 +5,7 @@ ; Test efficient codegen of vector extends up from legal type to 128 bit ; and 256 bit vector types. -; CHECK-GI: warning: Instruction selection used fallback path for func3 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for func4 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for afunc3 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for afunc4 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bfunc1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bfunc2 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for zfunc1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for zfunc2 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bfunc3 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for cfunc4 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for zext_v4i8_to_v4i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v4i8_to_v4i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for zext_v8i8_to_v8i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v8i8_to_v8i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for zext_v32i1 +; CHECK-GI: warning: Instruction selection used fallback path for zext_v32i1 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v32i1 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for zext_v64i1 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v64i1 @@ -47,21 +33,35 @@ } define <16 x i16> @func3(<16 x i8> %v0) nounwind { -; CHECK-LABEL: func3: -; CHECK: // %bb.0: -; CHECK-NEXT: ushll2.8h v1, v0, #0 -; CHECK-NEXT: ushll.8h v0, v0, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func3: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ushll2.8h v1, v0, #0 +; CHECK-SD-NEXT: ushll.8h v0, v0, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func3: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d1, v0[1] +; CHECK-GI-NEXT: ushll.8h v0, v0, #0 +; CHECK-GI-NEXT: ushll.8h v1, v1, #0 +; CHECK-GI-NEXT: ret %r = zext <16 x i8> %v0 to <16 x i16> ret <16 x i16> %r } define <16 x i16> @func4(<16 x i8> %v0) nounwind { -; CHECK-LABEL: func4: -; CHECK: // %bb.0: -; CHECK-NEXT: sshll2.8h v1, v0, #0 -; CHECK-NEXT: sshll.8h v0, v0, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: func4: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sshll2.8h v1, v0, #0 +; CHECK-SD-NEXT: sshll.8h v0, v0, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: func4: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d1, v0[1] +; CHECK-GI-NEXT: sshll.8h v0, v0, #0 +; CHECK-GI-NEXT: sshll.8h v1, v1, #0 +; CHECK-GI-NEXT: ret %r = sext <16 x i8> %v0 to <16 x i16> ret <16 x i16> %r } @@ -89,43 +89,73 @@ } define <8 x i32> @afunc3(<8 x i16> %v0) nounwind { -; CHECK-LABEL: afunc3: -; CHECK: // %bb.0: -; CHECK-NEXT: ushll2.4s v1, v0, #0 -; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: afunc3: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ushll2.4s v1, v0, #0 +; CHECK-SD-NEXT: ushll.4s v0, v0, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: afunc3: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d1, v0[1] +; CHECK-GI-NEXT: ushll.4s v0, v0, #0 +; CHECK-GI-NEXT: ushll.4s v1, v1, #0 +; CHECK-GI-NEXT: ret %r = zext <8 x i16> %v0 to <8 x i32> ret <8 x i32> %r } define <8 x i32> @afunc4(<8 x i16> %v0) nounwind { -; CHECK-LABEL: afunc4: -; CHECK: // %bb.0: -; CHECK-NEXT: sshll2.4s v1, v0, #0 -; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: afunc4: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sshll2.4s v1, v0, #0 +; CHECK-SD-NEXT: sshll.4s v0, v0, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: afunc4: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d1, v0[1] +; CHECK-GI-NEXT: sshll.4s v0, v0, #0 +; CHECK-GI-NEXT: sshll.4s v1, v1, #0 +; CHECK-GI-NEXT: ret %r = sext <8 x i16> %v0 to <8 x i32> ret <8 x i32> %r } define <8 x i32> @bfunc1(<8 x i8> %v0) nounwind { -; CHECK-LABEL: bfunc1: -; CHECK: // %bb.0: -; CHECK-NEXT: ushll.8h v0, v0, #0 -; CHECK-NEXT: ushll2.4s v1, v0, #0 -; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: bfunc1: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ushll.8h v0, v0, #0 +; CHECK-SD-NEXT: ushll2.4s v1, v0, #0 +; CHECK-SD-NEXT: ushll.4s v0, v0, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: bfunc1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ushll.8h v0, v0, #0 +; CHECK-GI-NEXT: mov d1, v0[1] +; CHECK-GI-NEXT: ushll.4s v0, v0, #0 +; CHECK-GI-NEXT: ushll.4s v1, v1, #0 +; CHECK-GI-NEXT: ret %r = zext <8 x i8> %v0 to <8 x i32> ret <8 x i32> %r } define <8 x i32> @bfunc2(<8 x i8> %v0) nounwind { -; CHECK-LABEL: bfunc2: -; CHECK: // %bb.0: -; CHECK-NEXT: sshll.8h v0, v0, #0 -; CHECK-NEXT: sshll2.4s v1, v0, #0 -; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: bfunc2: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sshll.8h v0, v0, #0 +; CHECK-SD-NEXT: sshll2.4s v1, v0, #0 +; CHECK-SD-NEXT: sshll.4s v0, v0, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: bfunc2: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sshll.8h v0, v0, #0 +; CHECK-GI-NEXT: mov d1, v0[1] +; CHECK-GI-NEXT: sshll.4s v0, v0, #0 +; CHECK-GI-NEXT: sshll.4s v1, v1, #0 +; CHECK-GI-NEXT: ret %r = sext <8 x i8> %v0 to <8 x i32> ret <8 x i32> %r } @@ -135,100 +165,182 @@ ;----- define <4 x i64> @zfunc1(<4 x i32> %v0) nounwind { -; CHECK-LABEL: zfunc1: -; CHECK: // %bb.0: -; CHECK-NEXT: ushll2.2d v1, v0, #0 -; CHECK-NEXT: ushll.2d v0, v0, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: zfunc1: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ushll2.2d v1, v0, #0 +; CHECK-SD-NEXT: ushll.2d v0, v0, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zfunc1: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d1, v0[1] +; CHECK-GI-NEXT: ushll.2d v0, v0, #0 +; CHECK-GI-NEXT: ushll.2d v1, v1, #0 +; CHECK-GI-NEXT: ret %r = zext <4 x i32> %v0 to <4 x i64> ret <4 x i64> %r } define <4 x i64> @zfunc2(<4 x i32> %v0) nounwind { -; CHECK-LABEL: zfunc2: -; CHECK: // %bb.0: -; CHECK-NEXT: sshll2.2d v1, v0, #0 -; CHECK-NEXT: sshll.2d v0, v0, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: zfunc2: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sshll2.2d v1, v0, #0 +; CHECK-SD-NEXT: sshll.2d v0, v0, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zfunc2: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d1, v0[1] +; CHECK-GI-NEXT: sshll.2d v0, v0, #0 +; CHECK-GI-NEXT: sshll.2d v1, v1, #0 +; CHECK-GI-NEXT: ret %r = sext <4 x i32> %v0 to <4 x i64> ret <4 x i64> %r } define <4 x i64> @bfunc3(<4 x i16> %v0) nounwind { -; CHECK-LABEL: bfunc3: -; CHECK: // %bb.0: -; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: ushll2.2d v1, v0, #0 -; CHECK-NEXT: ushll.2d v0, v0, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: bfunc3: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ushll.4s v0, v0, #0 +; CHECK-SD-NEXT: ushll2.2d v1, v0, #0 +; CHECK-SD-NEXT: ushll.2d v0, v0, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: bfunc3: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ushll.4s v0, v0, #0 +; CHECK-GI-NEXT: mov d1, v0[1] +; CHECK-GI-NEXT: ushll.2d v0, v0, #0 +; CHECK-GI-NEXT: ushll.2d v1, v1, #0 +; CHECK-GI-NEXT: ret %r = zext <4 x i16> %v0 to <4 x i64> ret <4 x i64> %r } define <4 x i64> @cfunc4(<4 x i16> %v0) nounwind { -; CHECK-LABEL: cfunc4: -; CHECK: // %bb.0: -; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: sshll2.2d v1, v0, #0 -; CHECK-NEXT: sshll.2d v0, v0, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: cfunc4: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sshll.4s v0, v0, #0 +; CHECK-SD-NEXT: sshll2.2d v1, v0, #0 +; CHECK-SD-NEXT: sshll.2d v0, v0, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: cfunc4: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sshll.4s v0, v0, #0 +; CHECK-GI-NEXT: mov d1, v0[1] +; CHECK-GI-NEXT: sshll.2d v0, v0, #0 +; CHECK-GI-NEXT: sshll.2d v1, v1, #0 +; CHECK-GI-NEXT: ret %r = sext <4 x i16> %v0 to <4 x i64> ret <4 x i64> %r } define <4 x i64> @zext_v4i8_to_v4i64(<4 x i8> %v0) nounwind { -; CHECK-LABEL: zext_v4i8_to_v4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: bic.4h v0, #255, lsl #8 -; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: ushll2.2d v1, v0, #0 -; CHECK-NEXT: ushll.2d v0, v0, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: zext_v4i8_to_v4i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: bic.4h v0, #255, lsl #8 +; CHECK-SD-NEXT: ushll.4s v0, v0, #0 +; CHECK-SD-NEXT: ushll2.2d v1, v0, #0 +; CHECK-SD-NEXT: ushll.2d v0, v0, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v4i8_to_v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ushll.4s v0, v0, #0 +; CHECK-GI-NEXT: adrp x8, .LCPI14_0 +; CHECK-GI-NEXT: mov d1, v0[1] +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI14_0] +; CHECK-GI-NEXT: ushll.2d v0, v0, #0 +; CHECK-GI-NEXT: ushll.2d v1, v1, #0 +; CHECK-GI-NEXT: and.16b v0, v0, v2 +; CHECK-GI-NEXT: and.16b v1, v1, v2 +; CHECK-GI-NEXT: ret %r = zext <4 x i8> %v0 to <4 x i64> ret <4 x i64> %r } define <4 x i64> @sext_v4i8_to_v4i64(<4 x i8> %v0) nounwind { -; CHECK-LABEL: sext_v4i8_to_v4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: ushll.2d v1, v0, #0 -; CHECK-NEXT: ushll2.2d v0, v0, #0 -; CHECK-NEXT: shl.2d v2, v1, #56 -; CHECK-NEXT: shl.2d v0, v0, #56 -; CHECK-NEXT: sshr.2d v1, v0, #56 -; CHECK-NEXT: sshr.2d v0, v2, #56 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sext_v4i8_to_v4i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ushll.4s v0, v0, #0 +; CHECK-SD-NEXT: ushll.2d v1, v0, #0 +; CHECK-SD-NEXT: ushll2.2d v0, v0, #0 +; CHECK-SD-NEXT: shl.2d v2, v1, #56 +; CHECK-SD-NEXT: shl.2d v0, v0, #56 +; CHECK-SD-NEXT: sshr.2d v1, v0, #56 +; CHECK-SD-NEXT: sshr.2d v0, v2, #56 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v4i8_to_v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ushll.4s v0, v0, #0 +; CHECK-GI-NEXT: mov d1, v0[1] +; CHECK-GI-NEXT: ushll.2d v0, v0, #0 +; CHECK-GI-NEXT: shl.2d v0, v0, #56 +; CHECK-GI-NEXT: ushll.2d v1, v1, #0 +; CHECK-GI-NEXT: sshr.2d v0, v0, #56 +; CHECK-GI-NEXT: shl.2d v1, v1, #56 +; CHECK-GI-NEXT: sshr.2d v1, v1, #56 +; CHECK-GI-NEXT: ret %r = sext <4 x i8> %v0 to <4 x i64> ret <4 x i64> %r } define <8 x i64> @zext_v8i8_to_v8i64(<8 x i8> %v0) nounwind { -; CHECK-LABEL: zext_v8i8_to_v8i64: -; CHECK: // %bb.0: -; CHECK-NEXT: ushll.8h v0, v0, #0 -; CHECK-NEXT: ushll2.4s v2, v0, #0 -; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: ushll2.2d v3, v2, #0 -; CHECK-NEXT: ushll2.2d v1, v0, #0 -; CHECK-NEXT: ushll.2d v0, v0, #0 -; CHECK-NEXT: ushll.2d v2, v2, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: zext_v8i8_to_v8i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ushll.8h v0, v0, #0 +; CHECK-SD-NEXT: ushll2.4s v2, v0, #0 +; CHECK-SD-NEXT: ushll.4s v0, v0, #0 +; CHECK-SD-NEXT: ushll2.2d v3, v2, #0 +; CHECK-SD-NEXT: ushll2.2d v1, v0, #0 +; CHECK-SD-NEXT: ushll.2d v0, v0, #0 +; CHECK-SD-NEXT: ushll.2d v2, v2, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v8i8_to_v8i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ushll.8h v0, v0, #0 +; CHECK-GI-NEXT: mov d1, v0[1] +; CHECK-GI-NEXT: ushll.4s v0, v0, #0 +; CHECK-GI-NEXT: mov d2, v0[1] +; CHECK-GI-NEXT: ushll.4s v3, v1, #0 +; CHECK-GI-NEXT: ushll.2d v0, v0, #0 +; CHECK-GI-NEXT: mov d4, v3[1] +; CHECK-GI-NEXT: ushll.2d v1, v2, #0 +; CHECK-GI-NEXT: ushll.2d v2, v3, #0 +; CHECK-GI-NEXT: ushll.2d v3, v4, #0 +; CHECK-GI-NEXT: ret %r = zext <8 x i8> %v0 to <8 x i64> ret <8 x i64> %r } define <8 x i64> @sext_v8i8_to_v8i64(<8 x i8> %v0) nounwind { -; CHECK-LABEL: sext_v8i8_to_v8i64: -; CHECK: // %bb.0: -; CHECK-NEXT: sshll.8h v0, v0, #0 -; CHECK-NEXT: sshll2.4s v2, v0, #0 -; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: sshll2.2d v3, v2, #0 -; CHECK-NEXT: sshll2.2d v1, v0, #0 -; CHECK-NEXT: sshll.2d v0, v0, #0 -; CHECK-NEXT: sshll.2d v2, v2, #0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sext_v8i8_to_v8i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sshll.8h v0, v0, #0 +; CHECK-SD-NEXT: sshll2.4s v2, v0, #0 +; CHECK-SD-NEXT: sshll.4s v0, v0, #0 +; CHECK-SD-NEXT: sshll2.2d v3, v2, #0 +; CHECK-SD-NEXT: sshll2.2d v1, v0, #0 +; CHECK-SD-NEXT: sshll.2d v0, v0, #0 +; CHECK-SD-NEXT: sshll.2d v2, v2, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v8i8_to_v8i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sshll.8h v0, v0, #0 +; CHECK-GI-NEXT: mov d1, v0[1] +; CHECK-GI-NEXT: sshll.4s v0, v0, #0 +; CHECK-GI-NEXT: mov d2, v0[1] +; CHECK-GI-NEXT: sshll.4s v3, v1, #0 +; CHECK-GI-NEXT: sshll.2d v0, v0, #0 +; CHECK-GI-NEXT: mov d4, v3[1] +; CHECK-GI-NEXT: sshll.2d v1, v2, #0 +; CHECK-GI-NEXT: sshll.2d v2, v3, #0 +; CHECK-GI-NEXT: sshll.2d v3, v4, #0 +; CHECK-GI-NEXT: ret %r = sext <8 x i8> %v0 to <8 x i64> ret <8 x i64> %r } diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -3,10 +3,6 @@ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; CHECK-GI: warning: Instruction selection used fallback path for uabd16b_rdx -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uabd16b_rdx_i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sabd16b_rdx_i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uabd8h_rdx -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sabd8h_rdx ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uabd4s_rdx ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sabd4s_rdx ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for abs_8b @@ -281,13 +277,58 @@ } define i32 @uabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: uabd16b_rdx_i32: -; CHECK: // %bb.0: -; CHECK-NEXT: uabdl.8h v2, v0, v1 -; CHECK-NEXT: uabal2.8h v2, v0, v1 -; CHECK-NEXT: uaddlv.8h s0, v2 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uabd16b_rdx_i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uabdl.8h v2, v0, v1 +; CHECK-SD-NEXT: uabal2.8h v2, v0, v1 +; CHECK-SD-NEXT: uaddlv.8h s0, v2 +; CHECK-SD-NEXT: fmov w0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uabd16b_rdx_i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d3, v0[1] +; CHECK-GI-NEXT: ushll.8h v4, v1, #0 +; CHECK-GI-NEXT: mov d1, v1[1] +; CHECK-GI-NEXT: ushll.8h v0, v0, #0 +; CHECK-GI-NEXT: mov d6, v4[1] +; CHECK-GI-NEXT: ushll.8h v3, v3, #0 +; CHECK-GI-NEXT: mov d5, v0[1] +; CHECK-GI-NEXT: ushll.8h v1, v1, #0 +; CHECK-GI-NEXT: mov d7, v3[1] +; CHECK-GI-NEXT: mov d16, v1[1] +; CHECK-GI-NEXT: movi.2d v2, #0000000000000000 +; CHECK-GI-NEXT: usubl.4s v0, v0, v4 +; CHECK-GI-NEXT: usubl.4s v5, v5, v6 +; CHECK-GI-NEXT: usubl.4s v1, v3, v1 +; CHECK-GI-NEXT: usubl.4s v3, v7, v16 +; CHECK-GI-NEXT: cmgt.4s v4, v2, v0 +; CHECK-GI-NEXT: cmgt.4s v6, v2, v5 +; CHECK-GI-NEXT: cmgt.4s v7, v2, v1 +; CHECK-GI-NEXT: cmgt.4s v2, v2, v3 +; CHECK-GI-NEXT: shl.4s v4, v4, #31 +; CHECK-GI-NEXT: shl.4s v6, v6, #31 +; CHECK-GI-NEXT: shl.4s v7, v7, #31 +; CHECK-GI-NEXT: shl.4s v2, v2, #31 +; CHECK-GI-NEXT: sshr.4s v4, v4, #31 +; CHECK-GI-NEXT: neg.4s v17, v0 +; CHECK-GI-NEXT: sshr.4s v6, v6, #31 +; CHECK-GI-NEXT: neg.4s v16, v5 +; CHECK-GI-NEXT: neg.4s v18, v1 +; CHECK-GI-NEXT: neg.4s v19, v3 +; CHECK-GI-NEXT: sshr.4s v7, v7, #31 +; CHECK-GI-NEXT: sshr.4s v2, v2, #31 +; CHECK-GI-NEXT: bit.16b v0, v17, v4 +; CHECK-GI-NEXT: mov.16b v4, v6 +; CHECK-GI-NEXT: bsl.16b v4, v16, v5 +; CHECK-GI-NEXT: bit.16b v1, v18, v7 +; CHECK-GI-NEXT: bsl.16b v2, v19, v3 +; CHECK-GI-NEXT: add.4s v0, v0, v4 +; CHECK-GI-NEXT: add.4s v1, v1, v2 +; CHECK-GI-NEXT: add.4s v0, v0, v1 +; CHECK-GI-NEXT: addv.4s s0, v0 +; CHECK-GI-NEXT: fmov w0, s0 +; CHECK-GI-NEXT: ret %aext = zext <16 x i8> %a to <16 x i32> %bext = zext <16 x i8> %b to <16 x i32> %abdiff = sub nsw <16 x i32> %aext, %bext @@ -299,13 +340,58 @@ } define i32 @sabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: sabd16b_rdx_i32: -; CHECK: // %bb.0: -; CHECK-NEXT: sabdl.8h v2, v0, v1 -; CHECK-NEXT: sabal2.8h v2, v0, v1 -; CHECK-NEXT: uaddlv.8h s0, v2 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sabd16b_rdx_i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sabdl.8h v2, v0, v1 +; CHECK-SD-NEXT: sabal2.8h v2, v0, v1 +; CHECK-SD-NEXT: uaddlv.8h s0, v2 +; CHECK-SD-NEXT: fmov w0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sabd16b_rdx_i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d3, v0[1] +; CHECK-GI-NEXT: sshll.8h v4, v1, #0 +; CHECK-GI-NEXT: mov d1, v1[1] +; CHECK-GI-NEXT: sshll.8h v0, v0, #0 +; CHECK-GI-NEXT: mov d6, v4[1] +; CHECK-GI-NEXT: sshll.8h v3, v3, #0 +; CHECK-GI-NEXT: mov d5, v0[1] +; CHECK-GI-NEXT: sshll.8h v1, v1, #0 +; CHECK-GI-NEXT: mov d7, v3[1] +; CHECK-GI-NEXT: mov d16, v1[1] +; CHECK-GI-NEXT: movi.2d v2, #0000000000000000 +; CHECK-GI-NEXT: ssubl.4s v0, v0, v4 +; CHECK-GI-NEXT: ssubl.4s v5, v5, v6 +; CHECK-GI-NEXT: ssubl.4s v1, v3, v1 +; CHECK-GI-NEXT: ssubl.4s v3, v7, v16 +; CHECK-GI-NEXT: cmgt.4s v4, v2, v0 +; CHECK-GI-NEXT: cmgt.4s v6, v2, v5 +; CHECK-GI-NEXT: cmgt.4s v7, v2, v1 +; CHECK-GI-NEXT: cmgt.4s v2, v2, v3 +; CHECK-GI-NEXT: shl.4s v4, v4, #31 +; CHECK-GI-NEXT: shl.4s v6, v6, #31 +; CHECK-GI-NEXT: shl.4s v7, v7, #31 +; CHECK-GI-NEXT: shl.4s v2, v2, #31 +; CHECK-GI-NEXT: sshr.4s v4, v4, #31 +; CHECK-GI-NEXT: neg.4s v17, v0 +; CHECK-GI-NEXT: sshr.4s v6, v6, #31 +; CHECK-GI-NEXT: neg.4s v16, v5 +; CHECK-GI-NEXT: neg.4s v18, v1 +; CHECK-GI-NEXT: neg.4s v19, v3 +; CHECK-GI-NEXT: sshr.4s v7, v7, #31 +; CHECK-GI-NEXT: sshr.4s v2, v2, #31 +; CHECK-GI-NEXT: bit.16b v0, v17, v4 +; CHECK-GI-NEXT: mov.16b v4, v6 +; CHECK-GI-NEXT: bsl.16b v4, v16, v5 +; CHECK-GI-NEXT: bit.16b v1, v18, v7 +; CHECK-GI-NEXT: bsl.16b v2, v19, v3 +; CHECK-GI-NEXT: add.4s v0, v0, v4 +; CHECK-GI-NEXT: add.4s v1, v1, v2 +; CHECK-GI-NEXT: add.4s v0, v0, v1 +; CHECK-GI-NEXT: addv.4s s0, v0 +; CHECK-GI-NEXT: fmov w0, s0 +; CHECK-GI-NEXT: ret %aext = sext <16 x i8> %a to <16 x i32> %bext = sext <16 x i8> %b to <16 x i32> %abdiff = sub nsw <16 x i32> %aext, %bext @@ -321,14 +407,38 @@ declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) define i32 @uabd8h_rdx(ptr %a, ptr %b) { -; CHECK-LABEL: uabd8h_rdx: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: uabd.8h v0, v0, v1 -; CHECK-NEXT: uaddlv.8h s0, v0 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: uabd8h_rdx: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr q0, [x0] +; CHECK-SD-NEXT: ldr q1, [x1] +; CHECK-SD-NEXT: uabd.8h v0, v0, v1 +; CHECK-SD-NEXT: uaddlv.8h s0, v0 +; CHECK-SD-NEXT: fmov w0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: uabd8h_rdx: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q1, [x0] +; CHECK-GI-NEXT: ldr q2, [x1] +; CHECK-GI-NEXT: movi.2d v0, #0000000000000000 +; CHECK-GI-NEXT: mov d3, v1[1] +; CHECK-GI-NEXT: mov d4, v2[1] +; CHECK-GI-NEXT: usubl.4s v1, v1, v2 +; CHECK-GI-NEXT: usubl.4s v2, v3, v4 +; CHECK-GI-NEXT: cmgt.4s v3, v0, v1 +; CHECK-GI-NEXT: neg.4s v4, v1 +; CHECK-GI-NEXT: cmgt.4s v0, v0, v2 +; CHECK-GI-NEXT: shl.4s v3, v3, #31 +; CHECK-GI-NEXT: shl.4s v0, v0, #31 +; CHECK-GI-NEXT: neg.4s v5, v2 +; CHECK-GI-NEXT: sshr.4s v3, v3, #31 +; CHECK-GI-NEXT: sshr.4s v0, v0, #31 +; CHECK-GI-NEXT: bit.16b v1, v4, v3 +; CHECK-GI-NEXT: bsl.16b v0, v5, v2 +; CHECK-GI-NEXT: add.4s v0, v1, v0 +; CHECK-GI-NEXT: addv.4s s0, v0 +; CHECK-GI-NEXT: fmov w0, s0 +; CHECK-GI-NEXT: ret %aload = load <8 x i16>, ptr %a, align 1 %bload = load <8 x i16>, ptr %b, align 1 %aext = zext <8 x i16> %aload to <8 x i32> @@ -342,12 +452,34 @@ } define i32 @sabd8h_rdx(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: sabd8h_rdx: -; CHECK: // %bb.0: -; CHECK-NEXT: sabd.8h v0, v0, v1 -; CHECK-NEXT: uaddlv.8h s0, v0 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sabd8h_rdx: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sabd.8h v0, v0, v1 +; CHECK-SD-NEXT: uaddlv.8h s0, v0 +; CHECK-SD-NEXT: fmov w0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sabd8h_rdx: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d3, v0[1] +; CHECK-GI-NEXT: mov d4, v1[1] +; CHECK-GI-NEXT: movi.2d v2, #0000000000000000 +; CHECK-GI-NEXT: ssubl.4s v0, v0, v1 +; CHECK-GI-NEXT: ssubl.4s v1, v3, v4 +; CHECK-GI-NEXT: cmgt.4s v3, v2, v0 +; CHECK-GI-NEXT: neg.4s v4, v0 +; CHECK-GI-NEXT: cmgt.4s v2, v2, v1 +; CHECK-GI-NEXT: shl.4s v3, v3, #31 +; CHECK-GI-NEXT: shl.4s v2, v2, #31 +; CHECK-GI-NEXT: neg.4s v5, v1 +; CHECK-GI-NEXT: sshr.4s v3, v3, #31 +; CHECK-GI-NEXT: sshr.4s v2, v2, #31 +; CHECK-GI-NEXT: bit.16b v0, v4, v3 +; CHECK-GI-NEXT: bit.16b v1, v5, v2 +; CHECK-GI-NEXT: add.4s v0, v0, v1 +; CHECK-GI-NEXT: addv.4s s0, v0 +; CHECK-GI-NEXT: fmov w0, s0 +; CHECK-GI-NEXT: ret %aext = sext <8 x i16> %a to <8 x i32> %bext = sext <8 x i16> %b to <8 x i32> %abdiff = sub nsw <8 x i32> %aext, %bext diff --git a/llvm/test/CodeGen/AArch64/sext.ll b/llvm/test/CodeGen/AArch64/sext.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sext.ll @@ -0,0 +1,1216 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for sext_v3i8_v3i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v3i8_v3i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v3i10_v3i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v3i10_v3i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sext_v16i10_v16i16 + +define i16 @sext_i8_to_i16(i8 %a) { +; CHECK-LABEL: sext_i8_to_i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sxtb w0, w0 +; CHECK-NEXT: ret +entry: + %c = sext i8 %a to i16 + ret i16 %c +} + +define i32 @sext_i8_to_i32(i8 %a) { +; CHECK-LABEL: sext_i8_to_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sxtb w0, w0 +; CHECK-NEXT: ret +entry: + %c = sext i8 %a to i32 + ret i32 %c +} + +define i64 @sext_i8_to_i64(i8 %a) { +; CHECK-LABEL: sext_i8_to_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtb x0, w0 +; CHECK-NEXT: ret +entry: + %c = sext i8 %a to i64 + ret i64 %c +} + +define i10 @sext_i8_to_i10(i8 %a) { +; CHECK-LABEL: sext_i8_to_i10: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sxtb w0, w0 +; CHECK-NEXT: ret +entry: + %c = sext i8 %a to i10 + ret i10 %c +} + +define i32 @sext_i16_to_i32(i16 %a) { +; CHECK-LABEL: sext_i16_to_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sxth w0, w0 +; CHECK-NEXT: ret +entry: + %c = sext i16 %a to i32 + ret i32 %c +} + +define i64 @sext_i16_to_i64(i16 %a) { +; CHECK-LABEL: sext_i16_to_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxth x0, w0 +; CHECK-NEXT: ret +entry: + %c = sext i16 %a to i64 + ret i64 %c +} + +define i64 @sext_i32_to_i64(i32 %a) { +; CHECK-LABEL: sext_i32_to_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sxtw x0, w0 +; CHECK-NEXT: ret +entry: + %c = sext i32 %a to i64 + ret i64 %c +} + +define i16 @sext_i10_to_i16(i10 %a) { +; CHECK-LABEL: sext_i10_to_i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sbfx w0, w0, #0, #10 +; CHECK-NEXT: ret +entry: + %c = sext i10 %a to i16 + ret i16 %c +} + +define i32 @sext_i10_to_i32(i10 %a) { +; CHECK-LABEL: sext_i10_to_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sbfx w0, w0, #0, #10 +; CHECK-NEXT: ret +entry: + %c = sext i10 %a to i32 + ret i32 %c +} + +define i64 @sext_i10_to_i64(i10 %a) { +; CHECK-LABEL: sext_i10_to_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: sbfx x0, x0, #0, #10 +; CHECK-NEXT: ret +entry: + %c = sext i10 %a to i64 + ret i64 %c +} + +define <2 x i16> @sext_v2i8_v2i16(<2 x i8> %a) { +; CHECK-LABEL: sext_v2i8_v2i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: shl v0.2s, v0.2s, #24 +; CHECK-NEXT: sshr v0.2s, v0.2s, #24 +; CHECK-NEXT: ret +entry: + %c = sext <2 x i8> %a to <2 x i16> + ret <2 x i16> %c +} + +define <2 x i32> @sext_v2i8_v2i32(<2 x i8> %a) { +; CHECK-LABEL: sext_v2i8_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: shl v0.2s, v0.2s, #24 +; CHECK-NEXT: sshr v0.2s, v0.2s, #24 +; CHECK-NEXT: ret +entry: + %c = sext <2 x i8> %a to <2 x i32> + ret <2 x i32> %c +} + +define <2 x i64> @sext_v2i8_v2i64(<2 x i8> %a) { +; CHECK-LABEL: sext_v2i8_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: shl v0.2d, v0.2d, #56 +; CHECK-NEXT: sshr v0.2d, v0.2d, #56 +; CHECK-NEXT: ret +entry: + %c = sext <2 x i8> %a to <2 x i64> + ret <2 x i64> %c +} + +define <2 x i32> @sext_v2i16_v2i32(<2 x i16> %a) { +; CHECK-LABEL: sext_v2i16_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: shl v0.2s, v0.2s, #16 +; CHECK-NEXT: sshr v0.2s, v0.2s, #16 +; CHECK-NEXT: ret +entry: + %c = sext <2 x i16> %a to <2 x i32> + ret <2 x i32> %c +} + +define <2 x i64> @sext_v2i16_v2i64(<2 x i16> %a) { +; CHECK-LABEL: sext_v2i16_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: shl v0.2d, v0.2d, #48 +; CHECK-NEXT: sshr v0.2d, v0.2d, #48 +; CHECK-NEXT: ret +entry: + %c = sext <2 x i16> %a to <2 x i64> + ret <2 x i64> %c +} + +define <2 x i64> @sext_v2i32_v2i64(<2 x i32> %a) { +; CHECK-LABEL: sext_v2i32_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: ret +entry: + %c = sext <2 x i32> %a to <2 x i64> + ret <2 x i64> %c +} + +define <2 x i16> @sext_v2i10_v2i16(<2 x i10> %a) { +; CHECK-LABEL: sext_v2i10_v2i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: shl v0.2s, v0.2s, #22 +; CHECK-NEXT: sshr v0.2s, v0.2s, #22 +; CHECK-NEXT: ret +entry: + %c = sext <2 x i10> %a to <2 x i16> + ret <2 x i16> %c +} + +define <2 x i32> @sext_v2i10_v2i32(<2 x i10> %a) { +; CHECK-LABEL: sext_v2i10_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: shl v0.2s, v0.2s, #22 +; CHECK-NEXT: sshr v0.2s, v0.2s, #22 +; CHECK-NEXT: ret +entry: + %c = sext <2 x i10> %a to <2 x i32> + ret <2 x i32> %c +} + +define <2 x i64> @sext_v2i10_v2i64(<2 x i10> %a) { +; CHECK-LABEL: sext_v2i10_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: shl v0.2d, v0.2d, #54 +; CHECK-NEXT: sshr v0.2d, v0.2d, #54 +; CHECK-NEXT: ret +entry: + %c = sext <2 x i10> %a to <2 x i64> + ret <2 x i64> %c +} + +define <3 x i16> @sext_v3i8_v3i16(<3 x i8> %a) { +; CHECK-LABEL: sext_v3i8_v3i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: mov v0.h[1], w1 +; CHECK-NEXT: mov v0.h[2], w2 +; CHECK-NEXT: shl v0.4h, v0.4h, #8 +; CHECK-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-NEXT: ret +entry: + %c = sext <3 x i8> %a to <3 x i16> + ret <3 x i16> %c +} + +define <3 x i32> @sext_v3i8_v3i32(<3 x i8> %a) { +; CHECK-LABEL: sext_v3i8_v3i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: mov v0.h[1], w1 +; CHECK-NEXT: mov v0.h[2], w2 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: shl v0.4s, v0.4s, #24 +; CHECK-NEXT: sshr v0.4s, v0.4s, #24 +; CHECK-NEXT: ret +entry: + %c = sext <3 x i8> %a to <3 x i32> + ret <3 x i32> %c +} + +define <3 x i64> @sext_v3i8_v3i64(<3 x i8> %a) { +; CHECK-SD-LABEL: sext_v3i8_v3i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov s1, w0 +; CHECK-SD-NEXT: fmov s0, w2 +; CHECK-SD-NEXT: mov v1.s[1], w1 +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: shl v2.2d, v0.2d, #56 +; CHECK-SD-NEXT: ushll v0.2d, v1.2s, #0 +; CHECK-SD-NEXT: sshr v2.2d, v2.2d, #56 +; CHECK-SD-NEXT: shl v0.2d, v0.2d, #56 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-SD-NEXT: sshr v0.2d, v0.2d, #56 +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v3i8_v3i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-GI-NEXT: fmov d0, x0 +; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-GI-NEXT: // kill: def $w2 killed $w2 def $x2 +; CHECK-GI-NEXT: lsl x8, x2, #56 +; CHECK-GI-NEXT: asr x8, x8, #56 +; CHECK-GI-NEXT: mov v0.d[1], x1 +; CHECK-GI-NEXT: fmov d2, x8 +; CHECK-GI-NEXT: shl v0.2d, v0.2d, #56 +; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #56 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret +entry: + %c = sext <3 x i8> %a to <3 x i64> + ret <3 x i64> %c +} + +define <3 x i32> @sext_v3i16_v3i32(<3 x i16> %a) { +; CHECK-SD-LABEL: sext_v3i16_v3i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v3i16_v3i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: mov h2, v0.h[2] +; CHECK-GI-NEXT: sxth w8, w8 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fmov w8, s2 +; CHECK-GI-NEXT: sxth w9, w9 +; CHECK-GI-NEXT: sxth w8, w8 +; CHECK-GI-NEXT: mov v0.s[1], w9 +; CHECK-GI-NEXT: mov v0.s[2], w8 +; CHECK-GI-NEXT: mov v0.s[3], w8 +; CHECK-GI-NEXT: ret +entry: + %c = sext <3 x i16> %a to <3 x i32> + ret <3 x i32> %c +} + +define <3 x i64> @sext_v3i16_v3i64(<3 x i16> %a) { +; CHECK-SD-LABEL: sext_v3i16_v3i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sshll v2.4s, v0.4h, #0 +; CHECK-SD-NEXT: sshll v0.2d, v2.2s, #0 +; CHECK-SD-NEXT: sshll2 v2.2d, v2.4s, #0 +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v3i16_v3i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov h2, v0.h[2] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: fmov w10, s2 +; CHECK-GI-NEXT: sxth x8, w8 +; CHECK-GI-NEXT: sxth x9, w9 +; CHECK-GI-NEXT: sxth x10, w10 +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: fmov d1, x9 +; CHECK-GI-NEXT: fmov d2, x10 +; CHECK-GI-NEXT: ret +entry: + %c = sext <3 x i16> %a to <3 x i64> + ret <3 x i64> %c +} + +define <3 x i64> @sext_v3i32_v3i64(<3 x i32> %a) { +; CHECK-SD-LABEL: sext_v3i32_v3i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sshll v3.2d, v0.2s, #0 +; CHECK-SD-NEXT: sshll2 v2.2d, v0.4s, #0 +; CHECK-SD-NEXT: fmov d0, d3 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-SD-NEXT: ext v1.16b, v3.16b, v3.16b, #8 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v3i32_v3i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov s2, v0.s[2] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: fmov w10, s2 +; CHECK-GI-NEXT: sxtw x8, w8 +; CHECK-GI-NEXT: sxtw x9, w9 +; CHECK-GI-NEXT: sxtw x10, w10 +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: fmov d1, x9 +; CHECK-GI-NEXT: fmov d2, x10 +; CHECK-GI-NEXT: ret +entry: + %c = sext <3 x i32> %a to <3 x i64> + ret <3 x i64> %c +} + +define <3 x i16> @sext_v3i10_v3i16(<3 x i10> %a) { +; CHECK-LABEL: sext_v3i10_v3i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: mov v0.h[1], w1 +; CHECK-NEXT: mov v0.h[2], w2 +; CHECK-NEXT: shl v0.4h, v0.4h, #6 +; CHECK-NEXT: sshr v0.4h, v0.4h, #6 +; CHECK-NEXT: ret +entry: + %c = sext <3 x i10> %a to <3 x i16> + ret <3 x i16> %c +} + +define <3 x i32> @sext_v3i10_v3i32(<3 x i10> %a) { +; CHECK-LABEL: sext_v3i10_v3i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: mov v0.h[1], w1 +; CHECK-NEXT: mov v0.h[2], w2 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: shl v0.4s, v0.4s, #22 +; CHECK-NEXT: sshr v0.4s, v0.4s, #22 +; CHECK-NEXT: ret +entry: + %c = sext <3 x i10> %a to <3 x i32> + ret <3 x i32> %c +} + +define <3 x i64> @sext_v3i10_v3i64(<3 x i10> %a) { +; CHECK-SD-LABEL: sext_v3i10_v3i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov s1, w0 +; CHECK-SD-NEXT: fmov s0, w2 +; CHECK-SD-NEXT: mov v1.s[1], w1 +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: shl v2.2d, v0.2d, #54 +; CHECK-SD-NEXT: ushll v0.2d, v1.2s, #0 +; CHECK-SD-NEXT: sshr v2.2d, v2.2d, #54 +; CHECK-SD-NEXT: shl v0.2d, v0.2d, #54 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-SD-NEXT: sshr v0.2d, v0.2d, #54 +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v3i10_v3i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-GI-NEXT: fmov d0, x0 +; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-GI-NEXT: // kill: def $w2 killed $w2 def $x2 +; CHECK-GI-NEXT: lsl x8, x2, #54 +; CHECK-GI-NEXT: asr x8, x8, #54 +; CHECK-GI-NEXT: mov v0.d[1], x1 +; CHECK-GI-NEXT: fmov d2, x8 +; CHECK-GI-NEXT: shl v0.2d, v0.2d, #54 +; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #54 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret +entry: + %c = sext <3 x i10> %a to <3 x i64> + ret <3 x i64> %c +} + +define <4 x i16> @sext_v4i8_v4i16(<4 x i8> %a) { +; CHECK-LABEL: sext_v4i8_v4i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: shl v0.4h, v0.4h, #8 +; CHECK-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-NEXT: ret +entry: + %c = sext <4 x i8> %a to <4 x i16> + ret <4 x i16> %c +} + +define <4 x i32> @sext_v4i8_v4i32(<4 x i8> %a) { +; CHECK-LABEL: sext_v4i8_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: shl v0.4s, v0.4s, #24 +; CHECK-NEXT: sshr v0.4s, v0.4s, #24 +; CHECK-NEXT: ret +entry: + %c = sext <4 x i8> %a to <4 x i32> + ret <4 x i32> %c +} + +define <4 x i64> @sext_v4i8_v4i64(<4 x i8> %a) { +; CHECK-SD-LABEL: sext_v4i8_v4i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll v1.2d, v0.2s, #0 +; CHECK-SD-NEXT: ushll2 v0.2d, v0.4s, #0 +; CHECK-SD-NEXT: shl v2.2d, v1.2d, #56 +; CHECK-SD-NEXT: shl v0.2d, v0.2d, #56 +; CHECK-SD-NEXT: sshr v1.2d, v0.2d, #56 +; CHECK-SD-NEXT: sshr v0.2d, v2.2d, #56 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v4i8_v4i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: shl v0.2d, v0.2d, #56 +; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #56 +; CHECK-GI-NEXT: shl v1.2d, v1.2d, #56 +; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #56 +; CHECK-GI-NEXT: ret +entry: + %c = sext <4 x i8> %a to <4 x i64> + ret <4 x i64> %c +} + +define <4 x i32> @sext_v4i16_v4i32(<4 x i16> %a) { +; CHECK-LABEL: sext_v4i16_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: ret +entry: + %c = sext <4 x i16> %a to <4 x i32> + ret <4 x i32> %c +} + +define <4 x i64> @sext_v4i16_v4i64(<4 x i16> %a) { +; CHECK-SD-LABEL: sext_v4i16_v4i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: sshll2 v1.2d, v0.4s, #0 +; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v4i16_v4i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0 +; CHECK-GI-NEXT: ret +entry: + %c = sext <4 x i16> %a to <4 x i64> + ret <4 x i64> %c +} + +define <4 x i64> @sext_v4i32_v4i64(<4 x i32> %a) { +; CHECK-SD-LABEL: sext_v4i32_v4i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sshll2 v1.2d, v0.4s, #0 +; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v4i32_v4i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0 +; CHECK-GI-NEXT: ret +entry: + %c = sext <4 x i32> %a to <4 x i64> + ret <4 x i64> %c +} + +define <4 x i16> @sext_v4i10_v4i16(<4 x i10> %a) { +; CHECK-LABEL: sext_v4i10_v4i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: shl v0.4h, v0.4h, #6 +; CHECK-NEXT: sshr v0.4h, v0.4h, #6 +; CHECK-NEXT: ret +entry: + %c = sext <4 x i10> %a to <4 x i16> + ret <4 x i16> %c +} + +define <4 x i32> @sext_v4i10_v4i32(<4 x i10> %a) { +; CHECK-LABEL: sext_v4i10_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: shl v0.4s, v0.4s, #22 +; CHECK-NEXT: sshr v0.4s, v0.4s, #22 +; CHECK-NEXT: ret +entry: + %c = sext <4 x i10> %a to <4 x i32> + ret <4 x i32> %c +} + +define <4 x i64> @sext_v4i10_v4i64(<4 x i10> %a) { +; CHECK-SD-LABEL: sext_v4i10_v4i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll v1.2d, v0.2s, #0 +; CHECK-SD-NEXT: ushll2 v0.2d, v0.4s, #0 +; CHECK-SD-NEXT: shl v2.2d, v1.2d, #54 +; CHECK-SD-NEXT: shl v0.2d, v0.2d, #54 +; CHECK-SD-NEXT: sshr v1.2d, v0.2d, #54 +; CHECK-SD-NEXT: sshr v0.2d, v2.2d, #54 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v4i10_v4i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: shl v0.2d, v0.2d, #54 +; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #54 +; CHECK-GI-NEXT: shl v1.2d, v1.2d, #54 +; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #54 +; CHECK-GI-NEXT: ret +entry: + %c = sext <4 x i10> %a to <4 x i64> + ret <4 x i64> %c +} + +define <8 x i16> @sext_v8i8_v8i16(<8 x i8> %a) { +; CHECK-LABEL: sext_v8i8_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: ret +entry: + %c = sext <8 x i8> %a to <8 x i16> + ret <8 x i16> %c +} + +define <8 x i32> @sext_v8i8_v8i32(<8 x i8> %a) { +; CHECK-SD-LABEL: sext_v8i8_v8i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v8i8_v8i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: ret +entry: + %c = sext <8 x i8> %a to <8 x i32> + ret <8 x i32> %c +} + +define <8 x i64> @sext_v8i8_v8i64(<8 x i8> %a) { +; CHECK-SD-LABEL: sext_v8i8_v8i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: sshll2 v2.4s, v0.8h, #0 +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: sshll2 v1.2d, v0.4s, #0 +; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v8i8_v8i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: sshll v3.4s, v1.4h, #0 +; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: mov d4, v3.d[1] +; CHECK-GI-NEXT: sshll v1.2d, v2.2s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll v3.2d, v4.2s, #0 +; CHECK-GI-NEXT: ret +entry: + %c = sext <8 x i8> %a to <8 x i64> + ret <8 x i64> %c +} + +define <8 x i32> @sext_v8i16_v8i32(<8 x i16> %a) { +; CHECK-SD-LABEL: sext_v8i16_v8i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v8i16_v8i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: ret +entry: + %c = sext <8 x i16> %a to <8 x i32> + ret <8 x i32> %c +} + +define <8 x i64> @sext_v8i16_v8i64(<8 x i16> %a) { +; CHECK-SD-LABEL: sext_v8i16_v8i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sshll2 v2.4s, v0.8h, #0 +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: sshll2 v1.2d, v0.4s, #0 +; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v8i16_v8i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: sshll v3.4s, v1.4h, #0 +; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: mov d4, v3.d[1] +; CHECK-GI-NEXT: sshll v1.2d, v2.2s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll v3.2d, v4.2s, #0 +; CHECK-GI-NEXT: ret +entry: + %c = sext <8 x i16> %a to <8 x i64> + ret <8 x i64> %c +} + +define <8 x i64> @sext_v8i32_v8i64(<8 x i32> %a) { +; CHECK-SD-LABEL: sext_v8i32_v8i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sshll2 v4.2d, v0.4s, #0 +; CHECK-SD-NEXT: sshll2 v3.2d, v1.4s, #0 +; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: sshll v2.2d, v1.2s, #0 +; CHECK-SD-NEXT: mov v1.16b, v4.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v8i32_v8i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d3, v0.d[1] +; CHECK-GI-NEXT: mov d4, v1.d[1] +; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v1.2s, #0 +; CHECK-GI-NEXT: sshll v1.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll v3.2d, v4.2s, #0 +; CHECK-GI-NEXT: ret +entry: + %c = sext <8 x i32> %a to <8 x i64> + ret <8 x i64> %c +} + +define <8 x i16> @sext_v8i10_v8i16(<8 x i10> %a) { +; CHECK-LABEL: sext_v8i10_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: shl v0.8h, v0.8h, #6 +; CHECK-NEXT: sshr v0.8h, v0.8h, #6 +; CHECK-NEXT: ret +entry: + %c = sext <8 x i10> %a to <8 x i16> + ret <8 x i16> %c +} + +define <8 x i32> @sext_v8i10_v8i32(<8 x i10> %a) { +; CHECK-SD-LABEL: sext_v8i10_v8i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-SD-NEXT: shl v2.4s, v1.4s, #22 +; CHECK-SD-NEXT: shl v0.4s, v0.4s, #22 +; CHECK-SD-NEXT: sshr v1.4s, v0.4s, #22 +; CHECK-SD-NEXT: sshr v0.4s, v2.4s, #22 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v8i10_v8i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: shl v0.4s, v0.4s, #22 +; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: sshr v0.4s, v0.4s, #22 +; CHECK-GI-NEXT: shl v1.4s, v1.4s, #22 +; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #22 +; CHECK-GI-NEXT: ret +entry: + %c = sext <8 x i10> %a to <8 x i32> + ret <8 x i32> %c +} + +define <8 x i64> @sext_v8i10_v8i64(<8 x i10> %a) { +; CHECK-SD-LABEL: sext_v8i10_v8i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-SD-NEXT: ushll v2.2d, v1.2s, #0 +; CHECK-SD-NEXT: ushll v3.2d, v0.2s, #0 +; CHECK-SD-NEXT: ushll2 v1.2d, v1.4s, #0 +; CHECK-SD-NEXT: ushll2 v0.2d, v0.4s, #0 +; CHECK-SD-NEXT: shl v1.2d, v1.2d, #54 +; CHECK-SD-NEXT: shl v2.2d, v2.2d, #54 +; CHECK-SD-NEXT: shl v4.2d, v0.2d, #54 +; CHECK-SD-NEXT: shl v5.2d, v3.2d, #54 +; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #54 +; CHECK-SD-NEXT: sshr v0.2d, v2.2d, #54 +; CHECK-SD-NEXT: sshr v3.2d, v4.2d, #54 +; CHECK-SD-NEXT: sshr v2.2d, v5.2d, #54 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v8i10_v8i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: mov d3, v1.d[1] +; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-GI-NEXT: shl v0.2d, v0.2d, #54 +; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-GI-NEXT: shl v2.2d, v2.2d, #54 +; CHECK-GI-NEXT: ushll v3.2d, v3.2s, #0 +; CHECK-GI-NEXT: shl v4.2d, v1.2d, #54 +; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #54 +; CHECK-GI-NEXT: shl v3.2d, v3.2d, #54 +; CHECK-GI-NEXT: sshr v1.2d, v2.2d, #54 +; CHECK-GI-NEXT: sshr v2.2d, v4.2d, #54 +; CHECK-GI-NEXT: sshr v3.2d, v3.2d, #54 +; CHECK-GI-NEXT: ret +entry: + %c = sext <8 x i10> %a to <8 x i64> + ret <8 x i64> %c +} + +define <16 x i16> @sext_v16i8_v16i16(<16 x i8> %a) { +; CHECK-SD-LABEL: sext_v16i8_v16i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v16i8_v16i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-GI-NEXT: ret +entry: + %c = sext <16 x i8> %a to <16 x i16> + ret <16 x i16> %c +} + +define <16 x i32> @sext_v16i8_v16i32(<16 x i8> %a) { +; CHECK-SD-LABEL: sext_v16i8_v16i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sshll2 v2.8h, v0.16b, #0 +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: sshll2 v3.4s, v2.8h, #0 +; CHECK-SD-NEXT: sshll2 v1.4s, v0.8h, #0 +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: sshll v2.4s, v2.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v16i8_v16i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: sshll v3.8h, v1.8b, #0 +; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: mov d4, v3.d[1] +; CHECK-GI-NEXT: sshll v1.4s, v2.4h, #0 +; CHECK-GI-NEXT: sshll v2.4s, v3.4h, #0 +; CHECK-GI-NEXT: sshll v3.4s, v4.4h, #0 +; CHECK-GI-NEXT: ret +entry: + %c = sext <16 x i8> %a to <16 x i32> + ret <16 x i32> %c +} + +define <16 x i64> @sext_v16i8_v16i64(<16 x i8> %a) { +; CHECK-SD-LABEL: sext_v16i8_v16i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: sshll2 v2.4s, v1.8h, #0 +; CHECK-SD-NEXT: sshll2 v4.4s, v0.8h, #0 +; CHECK-SD-NEXT: sshll v16.4s, v1.4h, #0 +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: sshll2 v7.2d, v2.4s, #0 +; CHECK-SD-NEXT: sshll2 v3.2d, v4.4s, #0 +; CHECK-SD-NEXT: sshll2 v5.2d, v16.4s, #0 +; CHECK-SD-NEXT: sshll v6.2d, v2.2s, #0 +; CHECK-SD-NEXT: sshll2 v1.2d, v0.4s, #0 +; CHECK-SD-NEXT: sshll v2.2d, v4.2s, #0 +; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: sshll v4.2d, v16.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v16i8_v16i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: sshll v1.8h, v0.8b, #0 +; CHECK-GI-NEXT: sshll v0.4s, v1.4h, #0 +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: sshll v2.8h, v2.8b, #0 +; CHECK-GI-NEXT: mov d3, v0.d[1] +; CHECK-GI-NEXT: mov d5, v2.d[1] +; CHECK-GI-NEXT: sshll v4.4s, v1.4h, #0 +; CHECK-GI-NEXT: sshll v6.4s, v2.4h, #0 +; CHECK-GI-NEXT: sshll v1.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll v16.4s, v5.4h, #0 +; CHECK-GI-NEXT: mov d3, v4.d[1] +; CHECK-GI-NEXT: mov d7, v6.d[1] +; CHECK-GI-NEXT: mov d17, v16.d[1] +; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v4.2s, #0 +; CHECK-GI-NEXT: sshll v4.2d, v6.2s, #0 +; CHECK-GI-NEXT: sshll v3.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll v5.2d, v7.2s, #0 +; CHECK-GI-NEXT: sshll v6.2d, v16.2s, #0 +; CHECK-GI-NEXT: sshll v7.2d, v17.2s, #0 +; CHECK-GI-NEXT: ret +entry: + %c = sext <16 x i8> %a to <16 x i64> + ret <16 x i64> %c +} + +define <16 x i32> @sext_v16i16_v16i32(<16 x i16> %a) { +; CHECK-SD-LABEL: sext_v16i16_v16i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sshll2 v4.4s, v0.8h, #0 +; CHECK-SD-NEXT: sshll2 v3.4s, v1.8h, #0 +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: sshll v2.4s, v1.4h, #0 +; CHECK-SD-NEXT: mov v1.16b, v4.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v16i16_v16i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d3, v0.d[1] +; CHECK-GI-NEXT: mov d4, v1.d[1] +; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshll v2.4s, v1.4h, #0 +; CHECK-GI-NEXT: sshll v1.4s, v3.4h, #0 +; CHECK-GI-NEXT: sshll v3.4s, v4.4h, #0 +; CHECK-GI-NEXT: ret +entry: + %c = sext <16 x i16> %a to <16 x i32> + ret <16 x i32> %c +} + +define <16 x i64> @sext_v16i16_v16i64(<16 x i16> %a) { +; CHECK-SD-LABEL: sext_v16i16_v16i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sshll2 v2.4s, v0.8h, #0 +; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: sshll2 v4.4s, v1.8h, #0 +; CHECK-SD-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-SD-NEXT: sshll2 v16.2d, v0.4s, #0 +; CHECK-SD-NEXT: sshll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: sshll2 v7.2d, v4.4s, #0 +; CHECK-SD-NEXT: sshll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: sshll2 v5.2d, v1.4s, #0 +; CHECK-SD-NEXT: sshll v6.2d, v4.2s, #0 +; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: sshll v4.2d, v1.2s, #0 +; CHECK-SD-NEXT: mov v1.16b, v16.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v16i16_v16i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d3, v0.d[1] +; CHECK-GI-NEXT: mov d4, v1.d[1] +; CHECK-GI-NEXT: sshll v2.4s, v0.4h, #0 +; CHECK-GI-NEXT: sshll v5.4s, v1.4h, #0 +; CHECK-GI-NEXT: mov d1, v2.d[1] +; CHECK-GI-NEXT: sshll v0.2d, v2.2s, #0 +; CHECK-GI-NEXT: mov d6, v5.d[1] +; CHECK-GI-NEXT: sshll v2.4s, v3.4h, #0 +; CHECK-GI-NEXT: sshll v3.4s, v4.4h, #0 +; CHECK-GI-NEXT: mov d7, v2.d[1] +; CHECK-GI-NEXT: mov d16, v3.d[1] +; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0 +; CHECK-GI-NEXT: sshll v2.2d, v2.2s, #0 +; CHECK-GI-NEXT: sshll v4.2d, v5.2s, #0 +; CHECK-GI-NEXT: sshll v5.2d, v6.2s, #0 +; CHECK-GI-NEXT: sshll v6.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll v3.2d, v7.2s, #0 +; CHECK-GI-NEXT: sshll v7.2d, v16.2s, #0 +; CHECK-GI-NEXT: ret +entry: + %c = sext <16 x i16> %a to <16 x i64> + ret <16 x i64> %c +} + +define <16 x i64> @sext_v16i32_v16i64(<16 x i32> %a) { +; CHECK-SD-LABEL: sext_v16i32_v16i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: sshll2 v17.2d, v0.4s, #0 +; CHECK-SD-NEXT: sshll2 v18.2d, v1.4s, #0 +; CHECK-SD-NEXT: sshll v16.2d, v1.2s, #0 +; CHECK-SD-NEXT: sshll2 v5.2d, v2.4s, #0 +; CHECK-SD-NEXT: sshll2 v7.2d, v3.4s, #0 +; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: sshll v4.2d, v2.2s, #0 +; CHECK-SD-NEXT: sshll v6.2d, v3.2s, #0 +; CHECK-SD-NEXT: mov v1.16b, v17.16b +; CHECK-SD-NEXT: mov v2.16b, v16.16b +; CHECK-SD-NEXT: mov v3.16b, v18.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v16i32_v16i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d5, v1.d[1] +; CHECK-GI-NEXT: mov d6, v2.d[1] +; CHECK-GI-NEXT: sshll v16.2d, v0.2s, #0 +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: sshll v4.2d, v2.2s, #0 +; CHECK-GI-NEXT: mov d2, v3.d[1] +; CHECK-GI-NEXT: sshll v17.2d, v1.2s, #0 +; CHECK-GI-NEXT: sshll v18.2d, v5.2s, #0 +; CHECK-GI-NEXT: sshll v1.2d, v0.2s, #0 +; CHECK-GI-NEXT: sshll v5.2d, v6.2s, #0 +; CHECK-GI-NEXT: sshll v6.2d, v3.2s, #0 +; CHECK-GI-NEXT: sshll v7.2d, v2.2s, #0 +; CHECK-GI-NEXT: mov v0.16b, v16.16b +; CHECK-GI-NEXT: mov v2.16b, v17.16b +; CHECK-GI-NEXT: mov v3.16b, v18.16b +; CHECK-GI-NEXT: ret +entry: + %c = sext <16 x i32> %a to <16 x i64> + ret <16 x i64> %c +} + +define <16 x i16> @sext_v16i10_v16i16(<16 x i10> %a) { +; CHECK-LABEL: sext_v16i10_v16i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w8, [sp] +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: ldr w9, [sp, #16] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ldr w8, [sp, #8] +; CHECK-NEXT: mov v1.h[1], w1 +; CHECK-NEXT: mov v0.h[1], w8 +; CHECK-NEXT: ldr w8, [sp, #24] +; CHECK-NEXT: mov v1.h[2], w2 +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: ldr w9, [sp, #32] +; CHECK-NEXT: mov v1.h[3], w3 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: ldr w8, [sp, #40] +; CHECK-NEXT: mov v1.h[4], w4 +; CHECK-NEXT: mov v0.h[4], w9 +; CHECK-NEXT: ldr w9, [sp, #48] +; CHECK-NEXT: mov v1.h[5], w5 +; CHECK-NEXT: mov v0.h[5], w8 +; CHECK-NEXT: ldr w8, [sp, #56] +; CHECK-NEXT: mov v1.h[6], w6 +; CHECK-NEXT: mov v0.h[6], w9 +; CHECK-NEXT: mov v1.h[7], w7 +; CHECK-NEXT: mov v0.h[7], w8 +; CHECK-NEXT: shl v1.8h, v1.8h, #6 +; CHECK-NEXT: shl v2.8h, v0.8h, #6 +; CHECK-NEXT: sshr v0.8h, v1.8h, #6 +; CHECK-NEXT: sshr v1.8h, v2.8h, #6 +; CHECK-NEXT: ret +entry: + %c = sext <16 x i10> %a to <16 x i16> + ret <16 x i16> %c +} + +define <16 x i32> @sext_v16i10_v16i32(<16 x i10> %a) { +; CHECK-SD-LABEL: sext_v16i10_v16i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ldr w11, [sp, #32] +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: ldr w12, [sp] +; CHECK-SD-NEXT: fmov s1, w4 +; CHECK-SD-NEXT: ldr w10, [sp, #40] +; CHECK-SD-NEXT: ldr w15, [sp, #8] +; CHECK-SD-NEXT: fmov s3, w11 +; CHECK-SD-NEXT: fmov s2, w12 +; CHECK-SD-NEXT: ldr w9, [sp, #48] +; CHECK-SD-NEXT: mov v0.h[1], w1 +; CHECK-SD-NEXT: ldr w14, [sp, #16] +; CHECK-SD-NEXT: mov v1.h[1], w5 +; CHECK-SD-NEXT: ldr w8, [sp, #56] +; CHECK-SD-NEXT: mov v2.h[1], w15 +; CHECK-SD-NEXT: ldr w13, [sp, #24] +; CHECK-SD-NEXT: mov v3.h[1], w10 +; CHECK-SD-NEXT: mov v0.h[2], w2 +; CHECK-SD-NEXT: mov v1.h[2], w6 +; CHECK-SD-NEXT: mov v2.h[2], w14 +; CHECK-SD-NEXT: mov v3.h[2], w9 +; CHECK-SD-NEXT: mov v0.h[3], w3 +; CHECK-SD-NEXT: mov v1.h[3], w7 +; CHECK-SD-NEXT: mov v2.h[3], w13 +; CHECK-SD-NEXT: mov v3.h[3], w8 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-SD-NEXT: ushll v2.4s, v2.4h, #0 +; CHECK-SD-NEXT: ushll v3.4s, v3.4h, #0 +; CHECK-SD-NEXT: shl v0.4s, v0.4s, #22 +; CHECK-SD-NEXT: shl v1.4s, v1.4s, #22 +; CHECK-SD-NEXT: shl v2.4s, v2.4s, #22 +; CHECK-SD-NEXT: shl v3.4s, v3.4s, #22 +; CHECK-SD-NEXT: sshr v0.4s, v0.4s, #22 +; CHECK-SD-NEXT: sshr v1.4s, v1.4s, #22 +; CHECK-SD-NEXT: sshr v2.4s, v2.4s, #22 +; CHECK-SD-NEXT: sshr v3.4s, v3.4s, #22 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v16i10_v16i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldr s0, [sp] +; CHECK-GI-NEXT: fmov s7, w0 +; CHECK-GI-NEXT: ldr s1, [sp, #8] +; CHECK-GI-NEXT: fmov s17, w4 +; CHECK-GI-NEXT: ldr s4, [sp, #32] +; CHECK-GI-NEXT: ldr s5, [sp, #40] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: ldr s2, [sp, #16] +; CHECK-GI-NEXT: mov v7.s[1], w1 +; CHECK-GI-NEXT: ldr s6, [sp, #48] +; CHECK-GI-NEXT: mov v17.s[1], w5 +; CHECK-GI-NEXT: ldr s3, [sp, #24] +; CHECK-GI-NEXT: mov v4.s[1], v5.s[0] +; CHECK-GI-NEXT: ldr s16, [sp, #56] +; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] +; CHECK-GI-NEXT: mov v7.s[2], w2 +; CHECK-GI-NEXT: mov v17.s[2], w6 +; CHECK-GI-NEXT: mov v4.s[2], v6.s[0] +; CHECK-GI-NEXT: mov v0.s[3], v3.s[0] +; CHECK-GI-NEXT: mov v7.s[3], w3 +; CHECK-GI-NEXT: mov v17.s[3], w7 +; CHECK-GI-NEXT: mov v4.s[3], v16.s[0] +; CHECK-GI-NEXT: shl v3.4s, v0.4s, #22 +; CHECK-GI-NEXT: shl v1.4s, v7.4s, #22 +; CHECK-GI-NEXT: shl v2.4s, v17.4s, #22 +; CHECK-GI-NEXT: shl v4.4s, v4.4s, #22 +; CHECK-GI-NEXT: sshr v0.4s, v1.4s, #22 +; CHECK-GI-NEXT: sshr v1.4s, v2.4s, #22 +; CHECK-GI-NEXT: sshr v2.4s, v3.4s, #22 +; CHECK-GI-NEXT: sshr v3.4s, v4.4s, #22 +; CHECK-GI-NEXT: ret +entry: + %c = sext <16 x i10> %a to <16 x i32> + ret <16 x i32> %c +} + +define <16 x i64> @sext_v16i10_v16i64(<16 x i10> %a) { +; CHECK-SD-LABEL: sext_v16i10_v16i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ldr s0, [sp] +; CHECK-SD-NEXT: add x8, sp, #8 +; CHECK-SD-NEXT: fmov s1, w0 +; CHECK-SD-NEXT: fmov s2, w2 +; CHECK-SD-NEXT: fmov s3, w4 +; CHECK-SD-NEXT: fmov s4, w6 +; CHECK-SD-NEXT: ld1 { v0.s }[1], [x8] +; CHECK-SD-NEXT: add x8, sp, #24 +; CHECK-SD-NEXT: ldr s5, [sp, #16] +; CHECK-SD-NEXT: add x9, sp, #40 +; CHECK-SD-NEXT: ldr s6, [sp, #32] +; CHECK-SD-NEXT: add x10, sp, #56 +; CHECK-SD-NEXT: ldr s7, [sp, #48] +; CHECK-SD-NEXT: mov v1.s[1], w1 +; CHECK-SD-NEXT: ld1 { v5.s }[1], [x8] +; CHECK-SD-NEXT: mov v2.s[1], w3 +; CHECK-SD-NEXT: ld1 { v6.s }[1], [x9] +; CHECK-SD-NEXT: mov v3.s[1], w5 +; CHECK-SD-NEXT: ld1 { v7.s }[1], [x10] +; CHECK-SD-NEXT: mov v4.s[1], w7 +; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0 +; CHECK-SD-NEXT: ushll v4.2d, v4.2s, #0 +; CHECK-SD-NEXT: ushll v16.2d, v0.2s, #0 +; CHECK-SD-NEXT: ushll v5.2d, v5.2s, #0 +; CHECK-SD-NEXT: ushll v6.2d, v6.2s, #0 +; CHECK-SD-NEXT: ushll v7.2d, v7.2s, #0 +; CHECK-SD-NEXT: shl v0.2d, v1.2d, #54 +; CHECK-SD-NEXT: shl v1.2d, v2.2d, #54 +; CHECK-SD-NEXT: shl v2.2d, v3.2d, #54 +; CHECK-SD-NEXT: shl v3.2d, v4.2d, #54 +; CHECK-SD-NEXT: shl v4.2d, v16.2d, #54 +; CHECK-SD-NEXT: shl v5.2d, v5.2d, #54 +; CHECK-SD-NEXT: shl v6.2d, v6.2d, #54 +; CHECK-SD-NEXT: shl v7.2d, v7.2d, #54 +; CHECK-SD-NEXT: sshr v0.2d, v0.2d, #54 +; CHECK-SD-NEXT: sshr v1.2d, v1.2d, #54 +; CHECK-SD-NEXT: sshr v2.2d, v2.2d, #54 +; CHECK-SD-NEXT: sshr v3.2d, v3.2d, #54 +; CHECK-SD-NEXT: sshr v4.2d, v4.2d, #54 +; CHECK-SD-NEXT: sshr v5.2d, v5.2d, #54 +; CHECK-SD-NEXT: sshr v6.2d, v6.2d, #54 +; CHECK-SD-NEXT: sshr v7.2d, v7.2d, #54 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sext_v16i10_v16i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldr s0, [sp] +; CHECK-GI-NEXT: fmov s6, w0 +; CHECK-GI-NEXT: ldr s1, [sp, #8] +; CHECK-GI-NEXT: fmov s16, w2 +; CHECK-GI-NEXT: ldr s2, [sp, #16] +; CHECK-GI-NEXT: fmov s18, w4 +; CHECK-GI-NEXT: ldr s3, [sp, #24] +; CHECK-GI-NEXT: fmov s19, w6 +; CHECK-GI-NEXT: ldr s4, [sp, #32] +; CHECK-GI-NEXT: ldr s5, [sp, #40] +; CHECK-GI-NEXT: ldr s7, [sp, #48] +; CHECK-GI-NEXT: ldr s17, [sp, #56] +; CHECK-GI-NEXT: mov v6.s[1], w1 +; CHECK-GI-NEXT: mov v16.s[1], w3 +; CHECK-GI-NEXT: mov v18.s[1], w5 +; CHECK-GI-NEXT: mov v19.s[1], w7 +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v2.s[1], v3.s[0] +; CHECK-GI-NEXT: mov v4.s[1], v5.s[0] +; CHECK-GI-NEXT: mov v7.s[1], v17.s[0] +; CHECK-GI-NEXT: ushll v1.2d, v6.2s, #0 +; CHECK-GI-NEXT: ushll v3.2d, v16.2s, #0 +; CHECK-GI-NEXT: ushll v5.2d, v18.2s, #0 +; CHECK-GI-NEXT: ushll v6.2d, v19.2s, #0 +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-GI-NEXT: ushll v4.2d, v4.2s, #0 +; CHECK-GI-NEXT: ushll v7.2d, v7.2s, #0 +; CHECK-GI-NEXT: shl v1.2d, v1.2d, #54 +; CHECK-GI-NEXT: shl v3.2d, v3.2d, #54 +; CHECK-GI-NEXT: shl v5.2d, v5.2d, #54 +; CHECK-GI-NEXT: shl v6.2d, v6.2d, #54 +; CHECK-GI-NEXT: shl v16.2d, v0.2d, #54 +; CHECK-GI-NEXT: shl v17.2d, v2.2d, #54 +; CHECK-GI-NEXT: shl v18.2d, v4.2d, #54 +; CHECK-GI-NEXT: shl v7.2d, v7.2d, #54 +; CHECK-GI-NEXT: sshr v0.2d, v1.2d, #54 +; CHECK-GI-NEXT: sshr v1.2d, v3.2d, #54 +; CHECK-GI-NEXT: sshr v2.2d, v5.2d, #54 +; CHECK-GI-NEXT: sshr v3.2d, v6.2d, #54 +; CHECK-GI-NEXT: sshr v4.2d, v16.2d, #54 +; CHECK-GI-NEXT: sshr v5.2d, v17.2d, #54 +; CHECK-GI-NEXT: sshr v6.2d, v18.2d, #54 +; CHECK-GI-NEXT: sshr v7.2d, v7.2d, #54 +; CHECK-GI-NEXT: ret +entry: + %c = sext <16 x i10> %a to <16 x i64> + ret <16 x i64> %c +} diff --git a/llvm/test/CodeGen/AArch64/zext.ll b/llvm/test/CodeGen/AArch64/zext.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/zext.ll @@ -0,0 +1,1345 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for zext_v16i10_v16i16 + +define i16 @zext_i8_to_i16(i8 %a) { +; CHECK-LABEL: zext_i8_to_i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and w0, w0, #0xff +; CHECK-NEXT: ret +entry: + %c = zext i8 %a to i16 + ret i16 %c +} + +define i32 @zext_i8_to_i32(i8 %a) { +; CHECK-LABEL: zext_i8_to_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and w0, w0, #0xff +; CHECK-NEXT: ret +entry: + %c = zext i8 %a to i32 + ret i32 %c +} + +define i64 @zext_i8_to_i64(i8 %a) { +; CHECK-LABEL: zext_i8_to_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: and x0, x0, #0xff +; CHECK-NEXT: ret +entry: + %c = zext i8 %a to i64 + ret i64 %c +} + +define i10 @zext_i8_to_i10(i8 %a) { +; CHECK-LABEL: zext_i8_to_i10: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and w0, w0, #0xff +; CHECK-NEXT: ret +entry: + %c = zext i8 %a to i10 + ret i10 %c +} + +define i32 @zext_i16_to_i32(i16 %a) { +; CHECK-LABEL: zext_i16_to_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and w0, w0, #0xffff +; CHECK-NEXT: ret +entry: + %c = zext i16 %a to i32 + ret i32 %c +} + +define i64 @zext_i16_to_i64(i16 %a) { +; CHECK-LABEL: zext_i16_to_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: and x0, x0, #0xffff +; CHECK-NEXT: ret +entry: + %c = zext i16 %a to i64 + ret i64 %c +} + +define i64 @zext_i32_to_i64(i32 %a) { +; CHECK-LABEL: zext_i32_to_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w0, w0 +; CHECK-NEXT: ret +entry: + %c = zext i32 %a to i64 + ret i64 %c +} + +define i16 @zext_i10_to_i16(i10 %a) { +; CHECK-LABEL: zext_i10_to_i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and w0, w0, #0x3ff +; CHECK-NEXT: ret +entry: + %c = zext i10 %a to i16 + ret i16 %c +} + +define i32 @zext_i10_to_i32(i10 %a) { +; CHECK-LABEL: zext_i10_to_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and w0, w0, #0x3ff +; CHECK-NEXT: ret +entry: + %c = zext i10 %a to i32 + ret i32 %c +} + +define i64 @zext_i10_to_i64(i10 %a) { +; CHECK-LABEL: zext_i10_to_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: and x0, x0, #0x3ff +; CHECK-NEXT: ret +entry: + %c = zext i10 %a to i64 + ret i64 %c +} + +define <2 x i16> @zext_v2i8_v2i16(<2 x i8> %a) { +; CHECK-SD-LABEL: zext_v2i8_v2i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff +; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v2i8_v2i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: adrp x8, .LCPI10_0 +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI10_0] +; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ret +entry: + %c = zext <2 x i8> %a to <2 x i16> + ret <2 x i16> %c +} + +define <2 x i32> @zext_v2i8_v2i32(<2 x i8> %a) { +; CHECK-SD-LABEL: zext_v2i8_v2i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff +; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v2i8_v2i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: adrp x8, .LCPI11_0 +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI11_0] +; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ret +entry: + %c = zext <2 x i8> %a to <2 x i32> + ret <2 x i32> %c +} + +define <2 x i64> @zext_v2i8_v2i64(<2 x i8> %a) { +; CHECK-SD-LABEL: zext_v2i8_v2i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff +; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v2i8_v2i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: adrp x8, .LCPI12_0 +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] +; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: ret +entry: + %c = zext <2 x i8> %a to <2 x i64> + ret <2 x i64> %c +} + +define <2 x i32> @zext_v2i16_v2i32(<2 x i16> %a) { +; CHECK-SD-LABEL: zext_v2i16_v2i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: movi d1, #0x00ffff0000ffff +; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v2i16_v2i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: adrp x8, .LCPI13_0 +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI13_0] +; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ret +entry: + %c = zext <2 x i16> %a to <2 x i32> + ret <2 x i32> %c +} + +define <2 x i64> @zext_v2i16_v2i64(<2 x i16> %a) { +; CHECK-SD-LABEL: zext_v2i16_v2i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: movi d1, #0x00ffff0000ffff +; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v2i16_v2i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: adrp x8, .LCPI14_0 +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] +; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: ret +entry: + %c = zext <2 x i16> %a to <2 x i64> + ret <2 x i64> %c +} + +define <2 x i64> @zext_v2i32_v2i64(<2 x i32> %a) { +; CHECK-LABEL: zext_v2i32_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ret +entry: + %c = zext <2 x i32> %a to <2 x i64> + ret <2 x i64> %c +} + +define <2 x i16> @zext_v2i10_v2i16(<2 x i10> %a) { +; CHECK-SD-LABEL: zext_v2i10_v2i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: movi v1.2s, #3, msl #8 +; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v2i10_v2i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: adrp x8, .LCPI16_0 +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI16_0] +; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ret +entry: + %c = zext <2 x i10> %a to <2 x i16> + ret <2 x i16> %c +} + +define <2 x i32> @zext_v2i10_v2i32(<2 x i10> %a) { +; CHECK-SD-LABEL: zext_v2i10_v2i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: movi v1.2s, #3, msl #8 +; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v2i10_v2i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: adrp x8, .LCPI17_0 +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI17_0] +; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ret +entry: + %c = zext <2 x i10> %a to <2 x i32> + ret <2 x i32> %c +} + +define <2 x i64> @zext_v2i10_v2i64(<2 x i10> %a) { +; CHECK-SD-LABEL: zext_v2i10_v2i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: movi v1.2s, #3, msl #8 +; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v2i10_v2i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: adrp x8, .LCPI18_0 +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI18_0] +; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: ret +entry: + %c = zext <2 x i10> %a to <2 x i64> + ret <2 x i64> %c +} + +define <3 x i16> @zext_v3i8_v3i16(<3 x i8> %a) { +; CHECK-SD-LABEL: zext_v3i8_v3i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: mov v0.h[1], w1 +; CHECK-SD-NEXT: mov v0.h[2], w2 +; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v3i8_v3i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w8, #255 // =0xff +; CHECK-GI-NEXT: fmov s1, w0 +; CHECK-GI-NEXT: fmov s2, w1 +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] +; CHECK-GI-NEXT: fmov s2, w2 +; CHECK-GI-NEXT: mov v3.16b, v0.16b +; CHECK-GI-NEXT: mov v3.h[1], v0.h[0] +; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] +; CHECK-GI-NEXT: mov v3.h[2], v0.h[0] +; CHECK-GI-NEXT: mov v1.h[3], v0.h[0] +; CHECK-GI-NEXT: mov v3.h[3], v0.h[0] +; CHECK-GI-NEXT: and v0.8b, v1.8b, v3.8b +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov h2, v0.h[2] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[2], v2.h[0] +; CHECK-GI-NEXT: mov v0.h[3], v0.h[0] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret +entry: + %c = zext <3 x i8> %a to <3 x i16> + ret <3 x i16> %c +} + +define <3 x i32> @zext_v3i8_v3i32(<3 x i8> %a) { +; CHECK-SD-LABEL: zext_v3i8_v3i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: movi v1.2d, #0x0000ff000000ff +; CHECK-SD-NEXT: mov v0.h[1], w1 +; CHECK-SD-NEXT: mov v0.h[2], w2 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v3i8_v3i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w8, #255 // =0xff +; CHECK-GI-NEXT: fmov s0, w0 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: mov v0.s[1], w1 +; CHECK-GI-NEXT: mov v1.s[1], w8 +; CHECK-GI-NEXT: mov v0.s[2], w2 +; CHECK-GI-NEXT: mov v1.s[2], w8 +; CHECK-GI-NEXT: mov v0.s[3], w8 +; CHECK-GI-NEXT: mov v1.s[3], w8 +; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov s2, v0.s[2] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] +; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] +; CHECK-GI-NEXT: ret +entry: + %c = zext <3 x i8> %a to <3 x i32> + ret <3 x i32> %c +} + +define <3 x i64> @zext_v3i8_v3i64(<3 x i8> %a) { +; CHECK-SD-LABEL: zext_v3i8_v3i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov s1, w0 +; CHECK-SD-NEXT: fmov s3, w2 +; CHECK-SD-NEXT: movi v0.2d, #0x000000000000ff +; CHECK-SD-NEXT: movi v2.2d, #0000000000000000 +; CHECK-SD-NEXT: mov v1.s[1], w1 +; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0 +; CHECK-SD-NEXT: mov v2.b[0], v3.b[0] +; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-SD-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v3i8_v3i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-GI-NEXT: fmov d0, x0 +; CHECK-GI-NEXT: adrp x8, .LCPI21_0 +; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-GI-NEXT: // kill: def $w2 killed $w2 def $x2 +; CHECK-GI-NEXT: mov v0.d[1], x1 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI21_0] +; CHECK-GI-NEXT: and x8, x2, #0xff +; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: fmov d2, x8 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret +entry: + %c = zext <3 x i8> %a to <3 x i64> + ret <3 x i64> %c +} + +define <3 x i32> @zext_v3i16_v3i32(<3 x i16> %a) { +; CHECK-SD-LABEL: zext_v3i16_v3i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v3i16_v3i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: mov h2, v0.h[2] +; CHECK-GI-NEXT: uxth w8, w8 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fmov w8, s2 +; CHECK-GI-NEXT: uxth w9, w9 +; CHECK-GI-NEXT: uxth w8, w8 +; CHECK-GI-NEXT: mov v0.s[1], w9 +; CHECK-GI-NEXT: mov v0.s[2], w8 +; CHECK-GI-NEXT: mov v0.s[3], w8 +; CHECK-GI-NEXT: ret +entry: + %c = zext <3 x i16> %a to <3 x i32> + ret <3 x i32> %c +} + +define <3 x i64> @zext_v3i16_v3i64(<3 x i16> %a) { +; CHECK-SD-LABEL: zext_v3i16_v3i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll v2.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll v0.2d, v2.2s, #0 +; CHECK-SD-NEXT: ushll2 v2.2d, v2.4s, #0 +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v3i16_v3i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov h2, v0.h[2] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: fmov w10, s2 +; CHECK-GI-NEXT: ubfx x8, x8, #0, #16 +; CHECK-GI-NEXT: ubfx x9, x9, #0, #16 +; CHECK-GI-NEXT: ubfx x10, x10, #0, #16 +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: fmov d1, x9 +; CHECK-GI-NEXT: fmov d2, x10 +; CHECK-GI-NEXT: ret +entry: + %c = zext <3 x i16> %a to <3 x i64> + ret <3 x i64> %c +} + +define <3 x i64> @zext_v3i32_v3i64(<3 x i32> %a) { +; CHECK-SD-LABEL: zext_v3i32_v3i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll v3.2d, v0.2s, #0 +; CHECK-SD-NEXT: ushll2 v2.2d, v0.4s, #0 +; CHECK-SD-NEXT: fmov d0, d3 +; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-SD-NEXT: ext v1.16b, v3.16b, v3.16b, #8 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v3i32_v3i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov s2, v0.s[2] +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: fmov w9, s1 +; CHECK-GI-NEXT: fmov w10, s2 +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: fmov d1, x9 +; CHECK-GI-NEXT: fmov d2, x10 +; CHECK-GI-NEXT: ret +entry: + %c = zext <3 x i32> %a to <3 x i64> + ret <3 x i64> %c +} + +define <3 x i16> @zext_v3i10_v3i16(<3 x i10> %a) { +; CHECK-SD-LABEL: zext_v3i10_v3i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: mov v0.h[1], w1 +; CHECK-SD-NEXT: mov v0.h[2], w2 +; CHECK-SD-NEXT: bic v0.4h, #252, lsl #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v3i10_v3i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w8, #1023 // =0x3ff +; CHECK-GI-NEXT: fmov s1, w0 +; CHECK-GI-NEXT: fmov s2, w1 +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] +; CHECK-GI-NEXT: fmov s2, w2 +; CHECK-GI-NEXT: mov v3.16b, v0.16b +; CHECK-GI-NEXT: mov v3.h[1], v0.h[0] +; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] +; CHECK-GI-NEXT: mov v3.h[2], v0.h[0] +; CHECK-GI-NEXT: mov v1.h[3], v0.h[0] +; CHECK-GI-NEXT: mov v3.h[3], v0.h[0] +; CHECK-GI-NEXT: and v0.8b, v1.8b, v3.8b +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: mov h2, v0.h[2] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v0.h[2], v2.h[0] +; CHECK-GI-NEXT: mov v0.h[3], v0.h[0] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret +entry: + %c = zext <3 x i10> %a to <3 x i16> + ret <3 x i16> %c +} + +define <3 x i32> @zext_v3i10_v3i32(<3 x i10> %a) { +; CHECK-SD-LABEL: zext_v3i10_v3i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: movi v1.4s, #3, msl #8 +; CHECK-SD-NEXT: mov v0.h[1], w1 +; CHECK-SD-NEXT: mov v0.h[2], w2 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v3i10_v3i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov w8, #1023 // =0x3ff +; CHECK-GI-NEXT: fmov s0, w0 +; CHECK-GI-NEXT: fmov s1, w8 +; CHECK-GI-NEXT: mov v0.s[1], w1 +; CHECK-GI-NEXT: mov v1.s[1], w8 +; CHECK-GI-NEXT: mov v0.s[2], w2 +; CHECK-GI-NEXT: mov v1.s[2], w8 +; CHECK-GI-NEXT: mov v0.s[3], w8 +; CHECK-GI-NEXT: mov v1.s[3], w8 +; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov s2, v0.s[2] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] +; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] +; CHECK-GI-NEXT: ret +entry: + %c = zext <3 x i10> %a to <3 x i32> + ret <3 x i32> %c +} + +define <3 x i64> @zext_v3i10_v3i64(<3 x i10> %a) { +; CHECK-SD-LABEL: zext_v3i10_v3i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: mov w8, #1023 // =0x3ff +; CHECK-SD-NEXT: fmov s3, w2 +; CHECK-SD-NEXT: mov v0.s[1], w1 +; CHECK-SD-NEXT: dup v2.2d, x8 +; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0 +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: and v2.8b, v3.8b, v2.8b +; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v3i10_v3i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-GI-NEXT: fmov d0, x0 +; CHECK-GI-NEXT: adrp x8, .LCPI27_0 +; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-GI-NEXT: // kill: def $w2 killed $w2 def $x2 +; CHECK-GI-NEXT: mov v0.d[1], x1 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI27_0] +; CHECK-GI-NEXT: and x8, x2, #0x3ff +; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: fmov d2, x8 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-GI-NEXT: ret +entry: + %c = zext <3 x i10> %a to <3 x i64> + ret <3 x i64> %c +} + +define <4 x i16> @zext_v4i8_v4i16(<4 x i8> %a) { +; CHECK-SD-LABEL: zext_v4i8_v4i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v4i8_v4i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: adrp x8, .LCPI28_0 +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI28_0] +; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ret +entry: + %c = zext <4 x i8> %a to <4 x i16> + ret <4 x i16> %c +} + +define <4 x i32> @zext_v4i8_v4i32(<4 x i8> %a) { +; CHECK-SD-LABEL: zext_v4i8_v4i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v4i8_v4i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: adrp x8, .LCPI29_0 +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI29_0] +; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: ret +entry: + %c = zext <4 x i8> %a to <4 x i32> + ret <4 x i32> %c +} + +define <4 x i64> @zext_v4i8_v4i64(<4 x i8> %a) { +; CHECK-SD-LABEL: zext_v4i8_v4i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll2 v1.2d, v0.4s, #0 +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v4i8_v4i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: adrp x8, .LCPI30_0 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI30_0] +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-GI-NEXT: ret +entry: + %c = zext <4 x i8> %a to <4 x i64> + ret <4 x i64> %c +} + +define <4 x i32> @zext_v4i16_v4i32(<4 x i16> %a) { +; CHECK-LABEL: zext_v4i16_v4i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ret +entry: + %c = zext <4 x i16> %a to <4 x i32> + ret <4 x i32> %c +} + +define <4 x i64> @zext_v4i16_v4i64(<4 x i16> %a) { +; CHECK-SD-LABEL: zext_v4i16_v4i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll2 v1.2d, v0.4s, #0 +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v4i16_v4i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-GI-NEXT: ret +entry: + %c = zext <4 x i16> %a to <4 x i64> + ret <4 x i64> %c +} + +define <4 x i64> @zext_v4i32_v4i64(<4 x i32> %a) { +; CHECK-SD-LABEL: zext_v4i32_v4i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll2 v1.2d, v0.4s, #0 +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v4i32_v4i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-GI-NEXT: ret +entry: + %c = zext <4 x i32> %a to <4 x i64> + ret <4 x i64> %c +} + +define <4 x i16> @zext_v4i10_v4i16(<4 x i10> %a) { +; CHECK-SD-LABEL: zext_v4i10_v4i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: bic v0.4h, #252, lsl #8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v4i10_v4i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: adrp x8, .LCPI34_0 +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI34_0] +; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: ret +entry: + %c = zext <4 x i10> %a to <4 x i16> + ret <4 x i16> %c +} + +define <4 x i32> @zext_v4i10_v4i32(<4 x i10> %a) { +; CHECK-SD-LABEL: zext_v4i10_v4i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: bic v0.4h, #252, lsl #8 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v4i10_v4i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: adrp x8, .LCPI35_0 +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI35_0] +; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: ret +entry: + %c = zext <4 x i10> %a to <4 x i32> + ret <4 x i32> %c +} + +define <4 x i64> @zext_v4i10_v4i64(<4 x i10> %a) { +; CHECK-SD-LABEL: zext_v4i10_v4i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: bic v0.4h, #252, lsl #8 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll2 v1.2d, v0.4s, #0 +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v4i10_v4i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: adrp x8, .LCPI36_0 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0] +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-GI-NEXT: ret +entry: + %c = zext <4 x i10> %a to <4 x i64> + ret <4 x i64> %c +} + +define <8 x i16> @zext_v8i8_v8i16(<8 x i8> %a) { +; CHECK-LABEL: zext_v8i8_v8i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ret +entry: + %c = zext <8 x i8> %a to <8 x i16> + ret <8 x i16> %c +} + +define <8 x i32> @zext_v8i8_v8i32(<8 x i8> %a) { +; CHECK-SD-LABEL: zext_v8i8_v8i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v8i8_v8i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: ret +entry: + %c = zext <8 x i8> %a to <8 x i32> + ret <8 x i32> %c +} + +define <8 x i64> @zext_v8i8_v8i64(<8 x i8> %a) { +; CHECK-SD-LABEL: zext_v8i8_v8i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll2 v2.4s, v0.8h, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: ushll2 v1.2d, v0.4s, #0 +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v8i8_v8i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: ushll v3.4s, v1.4h, #0 +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: mov d4, v3.d[1] +; CHECK-GI-NEXT: ushll v1.2d, v2.2s, #0 +; CHECK-GI-NEXT: ushll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: ushll v3.2d, v4.2s, #0 +; CHECK-GI-NEXT: ret +entry: + %c = zext <8 x i8> %a to <8 x i64> + ret <8 x i64> %c +} + +define <8 x i32> @zext_v8i16_v8i32(<8 x i16> %a) { +; CHECK-SD-LABEL: zext_v8i16_v8i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v8i16_v8i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: ret +entry: + %c = zext <8 x i16> %a to <8 x i32> + ret <8 x i32> %c +} + +define <8 x i64> @zext_v8i16_v8i64(<8 x i16> %a) { +; CHECK-SD-LABEL: zext_v8i16_v8i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll2 v2.4s, v0.8h, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: ushll2 v1.2d, v0.4s, #0 +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v8i16_v8i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: ushll v3.4s, v1.4h, #0 +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: mov d4, v3.d[1] +; CHECK-GI-NEXT: ushll v1.2d, v2.2s, #0 +; CHECK-GI-NEXT: ushll v2.2d, v3.2s, #0 +; CHECK-GI-NEXT: ushll v3.2d, v4.2s, #0 +; CHECK-GI-NEXT: ret +entry: + %c = zext <8 x i16> %a to <8 x i64> + ret <8 x i64> %c +} + +define <8 x i64> @zext_v8i32_v8i64(<8 x i32> %a) { +; CHECK-SD-LABEL: zext_v8i32_v8i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll2 v4.2d, v0.4s, #0 +; CHECK-SD-NEXT: ushll2 v3.2d, v1.4s, #0 +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ushll v2.2d, v1.2s, #0 +; CHECK-SD-NEXT: mov v1.16b, v4.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v8i32_v8i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d3, v0.d[1] +; CHECK-GI-NEXT: mov d4, v1.d[1] +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll v2.2d, v1.2s, #0 +; CHECK-GI-NEXT: ushll v1.2d, v3.2s, #0 +; CHECK-GI-NEXT: ushll v3.2d, v4.2s, #0 +; CHECK-GI-NEXT: ret +entry: + %c = zext <8 x i32> %a to <8 x i64> + ret <8 x i64> %c +} + +define <8 x i16> @zext_v8i10_v8i16(<8 x i10> %a) { +; CHECK-SD-LABEL: zext_v8i10_v8i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: bic v0.8h, #252, lsl #8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v8i10_v8i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: adrp x8, .LCPI43_0 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI43_0] +; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: ret +entry: + %c = zext <8 x i10> %a to <8 x i16> + ret <8 x i16> %c +} + +define <8 x i32> @zext_v8i10_v8i32(<8 x i10> %a) { +; CHECK-SD-LABEL: zext_v8i10_v8i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: bic v0.8h, #252, lsl #8 +; CHECK-SD-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v8i10_v8i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: adrp x8, .LCPI44_0 +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI44_0] +; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b +; CHECK-GI-NEXT: ret +entry: + %c = zext <8 x i10> %a to <8 x i32> + ret <8 x i32> %c +} + +define <8 x i64> @zext_v8i10_v8i64(<8 x i10> %a) { +; CHECK-SD-LABEL: zext_v8i10_v8i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: bic v0.8h, #252, lsl #8 +; CHECK-SD-NEXT: ushll2 v2.4s, v0.8h, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: ushll2 v1.2d, v0.4s, #0 +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v8i10_v8i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: adrp x8, .LCPI45_0 +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI45_0] +; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: mov d4, v1.d[1] +; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-GI-NEXT: ushll v5.2d, v1.2s, #0 +; CHECK-GI-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-GI-NEXT: ushll v4.2d, v4.2s, #0 +; CHECK-GI-NEXT: and v1.16b, v2.16b, v3.16b +; CHECK-GI-NEXT: and v2.16b, v5.16b, v3.16b +; CHECK-GI-NEXT: and v3.16b, v4.16b, v3.16b +; CHECK-GI-NEXT: ret +entry: + %c = zext <8 x i10> %a to <8 x i64> + ret <8 x i64> %c +} + +define <16 x i16> @zext_v16i8_v16i16(<16 x i8> %a) { +; CHECK-SD-LABEL: zext_v16i8_v16i16: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v16i8_v16i16: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-GI-NEXT: ret +entry: + %c = zext <16 x i8> %a to <16 x i16> + ret <16 x i16> %c +} + +define <16 x i32> @zext_v16i8_v16i32(<16 x i8> %a) { +; CHECK-SD-LABEL: zext_v16i8_v16i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll2 v2.8h, v0.16b, #0 +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll2 v3.4s, v2.8h, #0 +; CHECK-SD-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll v2.4s, v2.4h, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v16i8_v16i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d1, v0.d[1] +; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: ushll v3.8h, v1.8b, #0 +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: mov d4, v3.d[1] +; CHECK-GI-NEXT: ushll v1.4s, v2.4h, #0 +; CHECK-GI-NEXT: ushll v2.4s, v3.4h, #0 +; CHECK-GI-NEXT: ushll v3.4s, v4.4h, #0 +; CHECK-GI-NEXT: ret +entry: + %c = zext <16 x i8> %a to <16 x i32> + ret <16 x i32> %c +} + +define <16 x i64> @zext_v16i8_v16i64(<16 x i8> %a) { +; CHECK-SD-LABEL: zext_v16i8_v16i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll2 v2.4s, v1.8h, #0 +; CHECK-SD-NEXT: ushll2 v4.4s, v0.8h, #0 +; CHECK-SD-NEXT: ushll v16.4s, v1.4h, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll2 v7.2d, v2.4s, #0 +; CHECK-SD-NEXT: ushll2 v3.2d, v4.4s, #0 +; CHECK-SD-NEXT: ushll2 v5.2d, v16.4s, #0 +; CHECK-SD-NEXT: ushll v6.2d, v2.2s, #0 +; CHECK-SD-NEXT: ushll2 v1.2d, v0.4s, #0 +; CHECK-SD-NEXT: ushll v2.2d, v4.2s, #0 +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ushll v4.2d, v16.2s, #0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v16i8_v16i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d2, v0.d[1] +; CHECK-GI-NEXT: ushll v1.8h, v0.8b, #0 +; CHECK-GI-NEXT: ushll v0.4s, v1.4h, #0 +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: ushll v2.8h, v2.8b, #0 +; CHECK-GI-NEXT: mov d3, v0.d[1] +; CHECK-GI-NEXT: mov d5, v2.d[1] +; CHECK-GI-NEXT: ushll v4.4s, v1.4h, #0 +; CHECK-GI-NEXT: ushll v6.4s, v2.4h, #0 +; CHECK-GI-NEXT: ushll v1.2d, v3.2s, #0 +; CHECK-GI-NEXT: ushll v16.4s, v5.4h, #0 +; CHECK-GI-NEXT: mov d3, v4.d[1] +; CHECK-GI-NEXT: mov d7, v6.d[1] +; CHECK-GI-NEXT: mov d17, v16.d[1] +; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll v2.2d, v4.2s, #0 +; CHECK-GI-NEXT: ushll v4.2d, v6.2s, #0 +; CHECK-GI-NEXT: ushll v3.2d, v3.2s, #0 +; CHECK-GI-NEXT: ushll v5.2d, v7.2s, #0 +; CHECK-GI-NEXT: ushll v6.2d, v16.2s, #0 +; CHECK-GI-NEXT: ushll v7.2d, v17.2s, #0 +; CHECK-GI-NEXT: ret +entry: + %c = zext <16 x i8> %a to <16 x i64> + ret <16 x i64> %c +} + +define <16 x i32> @zext_v16i16_v16i32(<16 x i16> %a) { +; CHECK-SD-LABEL: zext_v16i16_v16i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll2 v4.4s, v0.8h, #0 +; CHECK-SD-NEXT: ushll2 v3.4s, v1.8h, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll v2.4s, v1.4h, #0 +; CHECK-SD-NEXT: mov v1.16b, v4.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v16i16_v16i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d3, v0.d[1] +; CHECK-GI-NEXT: mov d4, v1.d[1] +; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll v2.4s, v1.4h, #0 +; CHECK-GI-NEXT: ushll v1.4s, v3.4h, #0 +; CHECK-GI-NEXT: ushll v3.4s, v4.4h, #0 +; CHECK-GI-NEXT: ret +entry: + %c = zext <16 x i16> %a to <16 x i32> + ret <16 x i32> %c +} + +define <16 x i64> @zext_v16i16_v16i64(<16 x i16> %a) { +; CHECK-SD-LABEL: zext_v16i16_v16i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll2 v2.4s, v0.8h, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll2 v4.4s, v1.8h, #0 +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-SD-NEXT: ushll2 v16.2d, v0.4s, #0 +; CHECK-SD-NEXT: ushll2 v3.2d, v2.4s, #0 +; CHECK-SD-NEXT: ushll2 v7.2d, v4.4s, #0 +; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: ushll2 v5.2d, v1.4s, #0 +; CHECK-SD-NEXT: ushll v6.2d, v4.2s, #0 +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ushll v4.2d, v1.2s, #0 +; CHECK-SD-NEXT: mov v1.16b, v16.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v16i16_v16i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d3, v0.d[1] +; CHECK-GI-NEXT: mov d4, v1.d[1] +; CHECK-GI-NEXT: ushll v2.4s, v0.4h, #0 +; CHECK-GI-NEXT: ushll v5.4s, v1.4h, #0 +; CHECK-GI-NEXT: mov d1, v2.d[1] +; CHECK-GI-NEXT: ushll v0.2d, v2.2s, #0 +; CHECK-GI-NEXT: mov d6, v5.d[1] +; CHECK-GI-NEXT: ushll v2.4s, v3.4h, #0 +; CHECK-GI-NEXT: ushll v3.4s, v4.4h, #0 +; CHECK-GI-NEXT: mov d7, v2.d[1] +; CHECK-GI-NEXT: mov d16, v3.d[1] +; CHECK-GI-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-GI-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-GI-NEXT: ushll v4.2d, v5.2s, #0 +; CHECK-GI-NEXT: ushll v5.2d, v6.2s, #0 +; CHECK-GI-NEXT: ushll v6.2d, v3.2s, #0 +; CHECK-GI-NEXT: ushll v3.2d, v7.2s, #0 +; CHECK-GI-NEXT: ushll v7.2d, v16.2s, #0 +; CHECK-GI-NEXT: ret +entry: + %c = zext <16 x i16> %a to <16 x i64> + ret <16 x i64> %c +} + +define <16 x i64> @zext_v16i32_v16i64(<16 x i32> %a) { +; CHECK-SD-LABEL: zext_v16i32_v16i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ushll2 v17.2d, v0.4s, #0 +; CHECK-SD-NEXT: ushll2 v18.2d, v1.4s, #0 +; CHECK-SD-NEXT: ushll v16.2d, v1.2s, #0 +; CHECK-SD-NEXT: ushll2 v5.2d, v2.4s, #0 +; CHECK-SD-NEXT: ushll2 v7.2d, v3.4s, #0 +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ushll v4.2d, v2.2s, #0 +; CHECK-SD-NEXT: ushll v6.2d, v3.2s, #0 +; CHECK-SD-NEXT: mov v1.16b, v17.16b +; CHECK-SD-NEXT: mov v2.16b, v16.16b +; CHECK-SD-NEXT: mov v3.16b, v18.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v16i32_v16i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: mov d5, v1.d[1] +; CHECK-GI-NEXT: mov d6, v2.d[1] +; CHECK-GI-NEXT: ushll v16.2d, v0.2s, #0 +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: ushll v4.2d, v2.2s, #0 +; CHECK-GI-NEXT: mov d2, v3.d[1] +; CHECK-GI-NEXT: ushll v17.2d, v1.2s, #0 +; CHECK-GI-NEXT: ushll v18.2d, v5.2s, #0 +; CHECK-GI-NEXT: ushll v1.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll v5.2d, v6.2s, #0 +; CHECK-GI-NEXT: ushll v6.2d, v3.2s, #0 +; CHECK-GI-NEXT: ushll v7.2d, v2.2s, #0 +; CHECK-GI-NEXT: mov v0.16b, v16.16b +; CHECK-GI-NEXT: mov v2.16b, v17.16b +; CHECK-GI-NEXT: mov v3.16b, v18.16b +; CHECK-GI-NEXT: ret +entry: + %c = zext <16 x i32> %a to <16 x i64> + ret <16 x i64> %c +} + +define <16 x i16> @zext_v16i10_v16i16(<16 x i10> %a) { +; CHECK-LABEL: zext_v16i10_v16i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr w8, [sp] +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: ldr w8, [sp, #8] +; CHECK-NEXT: mov v0.h[1], w1 +; CHECK-NEXT: mov v1.h[1], w8 +; CHECK-NEXT: ldr w8, [sp, #16] +; CHECK-NEXT: mov v0.h[2], w2 +; CHECK-NEXT: mov v1.h[2], w8 +; CHECK-NEXT: ldr w8, [sp, #24] +; CHECK-NEXT: mov v0.h[3], w3 +; CHECK-NEXT: mov v1.h[3], w8 +; CHECK-NEXT: ldr w8, [sp, #32] +; CHECK-NEXT: mov v0.h[4], w4 +; CHECK-NEXT: mov v1.h[4], w8 +; CHECK-NEXT: ldr w8, [sp, #40] +; CHECK-NEXT: mov v0.h[5], w5 +; CHECK-NEXT: mov v1.h[5], w8 +; CHECK-NEXT: ldr w8, [sp, #48] +; CHECK-NEXT: mov v0.h[6], w6 +; CHECK-NEXT: mov v1.h[6], w8 +; CHECK-NEXT: ldr w8, [sp, #56] +; CHECK-NEXT: mov v0.h[7], w7 +; CHECK-NEXT: mov v1.h[7], w8 +; CHECK-NEXT: bic v0.8h, #252, lsl #8 +; CHECK-NEXT: bic v1.8h, #252, lsl #8 +; CHECK-NEXT: ret +entry: + %c = zext <16 x i10> %a to <16 x i16> + ret <16 x i16> %c +} + +define <16 x i32> @zext_v16i10_v16i32(<16 x i10> %a) { +; CHECK-SD-LABEL: zext_v16i10_v16i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: ldr w11, [sp, #32] +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: ldr w13, [sp] +; CHECK-SD-NEXT: fmov s1, w4 +; CHECK-SD-NEXT: ldr w10, [sp, #40] +; CHECK-SD-NEXT: ldr w15, [sp, #8] +; CHECK-SD-NEXT: fmov s3, w11 +; CHECK-SD-NEXT: fmov s2, w13 +; CHECK-SD-NEXT: ldr w9, [sp, #48] +; CHECK-SD-NEXT: mov v0.h[1], w1 +; CHECK-SD-NEXT: ldr w14, [sp, #16] +; CHECK-SD-NEXT: mov v1.h[1], w5 +; CHECK-SD-NEXT: ldr w8, [sp, #56] +; CHECK-SD-NEXT: mov v2.h[1], w15 +; CHECK-SD-NEXT: ldr w12, [sp, #24] +; CHECK-SD-NEXT: mov v3.h[1], w10 +; CHECK-SD-NEXT: mov v0.h[2], w2 +; CHECK-SD-NEXT: mov v1.h[2], w6 +; CHECK-SD-NEXT: mov v2.h[2], w14 +; CHECK-SD-NEXT: mov v3.h[2], w9 +; CHECK-SD-NEXT: mov v0.h[3], w3 +; CHECK-SD-NEXT: mov v1.h[3], w7 +; CHECK-SD-NEXT: mov v2.h[3], w12 +; CHECK-SD-NEXT: mov v3.h[3], w8 +; CHECK-SD-NEXT: movi v4.4s, #3, msl #8 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-SD-NEXT: ushll v2.4s, v2.4h, #0 +; CHECK-SD-NEXT: ushll v3.4s, v3.4h, #0 +; CHECK-SD-NEXT: and v0.16b, v0.16b, v4.16b +; CHECK-SD-NEXT: and v1.16b, v1.16b, v4.16b +; CHECK-SD-NEXT: and v2.16b, v2.16b, v4.16b +; CHECK-SD-NEXT: and v3.16b, v3.16b, v4.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v16i10_v16i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldr s0, [sp] +; CHECK-GI-NEXT: fmov s16, w0 +; CHECK-GI-NEXT: ldr s1, [sp, #8] +; CHECK-GI-NEXT: fmov s17, w4 +; CHECK-GI-NEXT: ldr s4, [sp, #32] +; CHECK-GI-NEXT: adrp x8, .LCPI53_0 +; CHECK-GI-NEXT: ldr s5, [sp, #40] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: ldr s2, [sp, #16] +; CHECK-GI-NEXT: mov v16.s[1], w1 +; CHECK-GI-NEXT: ldr s6, [sp, #48] +; CHECK-GI-NEXT: mov v4.s[1], v5.s[0] +; CHECK-GI-NEXT: ldr s3, [sp, #24] +; CHECK-GI-NEXT: mov v17.s[1], w5 +; CHECK-GI-NEXT: ldr s7, [sp, #56] +; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI53_0] +; CHECK-GI-NEXT: mov v16.s[2], w2 +; CHECK-GI-NEXT: mov v4.s[2], v6.s[0] +; CHECK-GI-NEXT: mov v17.s[2], w6 +; CHECK-GI-NEXT: mov v0.s[3], v3.s[0] +; CHECK-GI-NEXT: mov v16.s[3], w3 +; CHECK-GI-NEXT: mov v4.s[3], v7.s[0] +; CHECK-GI-NEXT: mov v17.s[3], w7 +; CHECK-GI-NEXT: and v2.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: and v0.16b, v16.16b, v1.16b +; CHECK-GI-NEXT: and v3.16b, v4.16b, v1.16b +; CHECK-GI-NEXT: and v1.16b, v17.16b, v1.16b +; CHECK-GI-NEXT: ret +entry: + %c = zext <16 x i10> %a to <16 x i32> + ret <16 x i32> %c +} + +define <16 x i64> @zext_v16i10_v16i64(<16 x i10> %a) { +; CHECK-SD-LABEL: zext_v16i10_v16i64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mov w8, #1023 // =0x3ff +; CHECK-SD-NEXT: ldr s4, [sp] +; CHECK-SD-NEXT: ldr s5, [sp, #16] +; CHECK-SD-NEXT: add x9, sp, #24 +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: fmov s1, w2 +; CHECK-SD-NEXT: dup v7.2d, x8 +; CHECK-SD-NEXT: add x8, sp, #8 +; CHECK-SD-NEXT: fmov s2, w4 +; CHECK-SD-NEXT: fmov s3, w6 +; CHECK-SD-NEXT: ld1 { v5.s }[1], [x9] +; CHECK-SD-NEXT: add x9, sp, #56 +; CHECK-SD-NEXT: ld1 { v4.s }[1], [x8] +; CHECK-SD-NEXT: add x8, sp, #40 +; CHECK-SD-NEXT: ldr s6, [sp, #32] +; CHECK-SD-NEXT: ldr s16, [sp, #48] +; CHECK-SD-NEXT: mov v0.s[1], w1 +; CHECK-SD-NEXT: mov v1.s[1], w3 +; CHECK-SD-NEXT: ld1 { v6.s }[1], [x8] +; CHECK-SD-NEXT: mov v2.s[1], w5 +; CHECK-SD-NEXT: ld1 { v16.s }[1], [x9] +; CHECK-SD-NEXT: mov v3.s[1], w7 +; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-SD-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-SD-NEXT: ushll v3.2d, v3.2s, #0 +; CHECK-SD-NEXT: ushll v4.2d, v4.2s, #0 +; CHECK-SD-NEXT: ushll v5.2d, v5.2s, #0 +; CHECK-SD-NEXT: ushll v6.2d, v6.2s, #0 +; CHECK-SD-NEXT: ushll v16.2d, v16.2s, #0 +; CHECK-SD-NEXT: and v0.16b, v0.16b, v7.16b +; CHECK-SD-NEXT: and v1.16b, v1.16b, v7.16b +; CHECK-SD-NEXT: and v2.16b, v2.16b, v7.16b +; CHECK-SD-NEXT: and v3.16b, v3.16b, v7.16b +; CHECK-SD-NEXT: and v4.16b, v4.16b, v7.16b +; CHECK-SD-NEXT: and v5.16b, v5.16b, v7.16b +; CHECK-SD-NEXT: and v6.16b, v6.16b, v7.16b +; CHECK-SD-NEXT: and v7.16b, v16.16b, v7.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: zext_v16i10_v16i64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: ldr s0, [sp] +; CHECK-GI-NEXT: fmov s6, w0 +; CHECK-GI-NEXT: ldr s1, [sp, #8] +; CHECK-GI-NEXT: fmov s16, w2 +; CHECK-GI-NEXT: ldr s2, [sp, #16] +; CHECK-GI-NEXT: fmov s18, w4 +; CHECK-GI-NEXT: ldr s3, [sp, #24] +; CHECK-GI-NEXT: fmov s19, w6 +; CHECK-GI-NEXT: ldr s4, [sp, #32] +; CHECK-GI-NEXT: adrp x8, .LCPI54_0 +; CHECK-GI-NEXT: ldr s5, [sp, #40] +; CHECK-GI-NEXT: ldr s7, [sp, #48] +; CHECK-GI-NEXT: ldr s17, [sp, #56] +; CHECK-GI-NEXT: mov v6.s[1], w1 +; CHECK-GI-NEXT: mov v16.s[1], w3 +; CHECK-GI-NEXT: mov v18.s[1], w5 +; CHECK-GI-NEXT: mov v19.s[1], w7 +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v2.s[1], v3.s[0] +; CHECK-GI-NEXT: mov v4.s[1], v5.s[0] +; CHECK-GI-NEXT: mov v7.s[1], v17.s[0] +; CHECK-GI-NEXT: ldr q17, [x8, :lo12:.LCPI54_0] +; CHECK-GI-NEXT: ushll v1.2d, v6.2s, #0 +; CHECK-GI-NEXT: ushll v3.2d, v16.2s, #0 +; CHECK-GI-NEXT: ushll v5.2d, v18.2s, #0 +; CHECK-GI-NEXT: ushll v6.2d, v19.2s, #0 +; CHECK-GI-NEXT: ushll v16.2d, v0.2s, #0 +; CHECK-GI-NEXT: ushll v18.2d, v2.2s, #0 +; CHECK-GI-NEXT: ushll v19.2d, v4.2s, #0 +; CHECK-GI-NEXT: ushll v7.2d, v7.2s, #0 +; CHECK-GI-NEXT: and v0.16b, v1.16b, v17.16b +; CHECK-GI-NEXT: and v1.16b, v3.16b, v17.16b +; CHECK-GI-NEXT: and v2.16b, v5.16b, v17.16b +; CHECK-GI-NEXT: and v3.16b, v6.16b, v17.16b +; CHECK-GI-NEXT: and v4.16b, v16.16b, v17.16b +; CHECK-GI-NEXT: and v5.16b, v18.16b, v17.16b +; CHECK-GI-NEXT: and v6.16b, v19.16b, v17.16b +; CHECK-GI-NEXT: and v7.16b, v7.16b, v17.16b +; CHECK-GI-NEXT: ret +entry: + %c = zext <16 x i10> %a to <16 x i64> + ret <16 x i64> %c +}