diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8986,12 +8986,13 @@ if (V.isUndef()) continue; else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - !isa(V.getOperand(1))) { + !isa(V.getOperand(1)) || + V.getOperand(0).getValueType().isScalableVector()) { LLVM_DEBUG( dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from " - "various elements of other vectors, provided their " - "indices are constant\n"); + "various elements of other fixed-width vectors, provided " + "their indices are constant\n"); return SDValue(); } @@ -9035,8 +9036,8 @@ for (auto &Src : Sources) { EVT SrcVT = Src.ShuffleVec.getValueType(); - uint64_t SrcVTSize = SrcVT.getFixedSizeInBits(); - if (SrcVTSize == VTSize) + TypeSize SrcVTSize = SrcVT.getSizeInBits(); + if (SrcVTSize == TypeSize::Fixed(VTSize)) continue; // This stage of the search produces a source with the same element type as @@ -9045,7 +9046,7 @@ unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits(); EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts); - if (SrcVTSize < VTSize) { + if (SrcVTSize.getFixedValue() < VTSize) { assert(2 * SrcVTSize == VTSize); // We can pad out the smaller vector for free, so if it's part of a // shuffle... @@ -9055,7 +9056,7 @@ continue; } - if (SrcVTSize != 2 * VTSize) { + if (SrcVTSize.getFixedValue() != 2 * VTSize) { LLVM_DEBUG( dbgs() << "Reshuffle failed: result vector too small to extract\n"); return SDValue(); diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll @@ -361,6 +361,106 @@ ret <16 x i8> %retval } + +; Predicates + +define <2 x i1> @extract_v2i1_nxv2i1( %inmask) { +; CHECK-LABEL: extract_v2i1_nxv2i1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: mov x8, v0.d[1] +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %mask = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1( %inmask, i64 0) + ret <2 x i1> %mask +} + +define <4 x i1> @extract_v4i1_nxv4i1( %inmask) { +; CHECK-LABEL: extract_v4i1_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 +; CHECK-NEXT: mov w8, v1.s[1] +; CHECK-NEXT: mov w9, v1.s[2] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov v0.h[1], w8 +; CHECK-NEXT: mov w8, v1.s[3] +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %mask = call <4 x i1> @llvm.experimental.vector.extract.v4i1.nxv4i1( %inmask, i64 0) + ret <4 x i1> %mask +} + +define <8 x i1> @extract_v8i1_nxv8i1( %inmask) { +; CHECK-LABEL: extract_v8i1_nxv8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1 +; CHECK-NEXT: umov w8, v1.h[1] +; CHECK-NEXT: umov w9, v1.h[2] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov v0.b[1], w8 +; CHECK-NEXT: umov w8, v1.h[3] +; CHECK-NEXT: mov v0.b[2], w9 +; CHECK-NEXT: umov w9, v1.h[4] +; CHECK-NEXT: mov v0.b[3], w8 +; CHECK-NEXT: umov w8, v1.h[5] +; CHECK-NEXT: mov v0.b[4], w9 +; CHECK-NEXT: umov w9, v1.h[6] +; CHECK-NEXT: mov v0.b[5], w8 +; CHECK-NEXT: umov w8, v1.h[7] +; CHECK-NEXT: mov v0.b[6], w9 +; CHECK-NEXT: mov v0.b[7], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %mask = call <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv8i1( %inmask, i64 0) + ret <8 x i1> %mask +} + +define <16 x i1> @extract_v16i1_nxv16i1( %inmask) { +; CHECK-LABEL: extract_v16i1_nxv16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 +; CHECK-NEXT: umov w8, v1.b[1] +; CHECK-NEXT: umov w9, v1.b[2] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov v0.b[1], w8 +; CHECK-NEXT: umov w8, v1.b[3] +; CHECK-NEXT: mov v0.b[2], w9 +; CHECK-NEXT: umov w9, v1.b[4] +; CHECK-NEXT: mov v0.b[3], w8 +; CHECK-NEXT: umov w8, v1.b[5] +; CHECK-NEXT: mov v0.b[4], w9 +; CHECK-NEXT: umov w9, v1.b[6] +; CHECK-NEXT: mov v0.b[5], w8 +; CHECK-NEXT: umov w8, v1.b[7] +; CHECK-NEXT: mov v0.b[6], w9 +; CHECK-NEXT: umov w9, v1.b[8] +; CHECK-NEXT: mov v0.b[7], w8 +; CHECK-NEXT: umov w8, v1.b[9] +; CHECK-NEXT: mov v0.b[8], w9 +; CHECK-NEXT: umov w9, v1.b[10] +; CHECK-NEXT: mov v0.b[9], w8 +; CHECK-NEXT: umov w8, v1.b[11] +; CHECK-NEXT: mov v0.b[10], w9 +; CHECK-NEXT: umov w9, v1.b[12] +; CHECK-NEXT: mov v0.b[11], w8 +; CHECK-NEXT: umov w8, v1.b[13] +; CHECK-NEXT: mov v0.b[12], w9 +; CHECK-NEXT: umov w9, v1.b[14] +; CHECK-NEXT: mov v0.b[13], w8 +; CHECK-NEXT: umov w8, v1.b[15] +; CHECK-NEXT: mov v0.b[14], w9 +; CHECK-NEXT: mov v0.b[15], w8 +; CHECK-NEXT: ret + %mask = call <16 x i1> @llvm.experimental.vector.extract.v16i1.nxv16i1( %inmask, i64 0) + ret <16 x i1> %mask +} + + ; Fixed length clamping define <2 x i64> @extract_fixed_v2i64_nxv2i64( %vec) nounwind #0 { @@ -479,6 +579,11 @@ declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv4i8(, i64) declare <16 x i8> @llvm.experimental.vector.extract.v16i8.nxv2i8(, i64) +declare <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1(, i64) +declare <4 x i1> @llvm.experimental.vector.extract.v4i1.nxv4i1(, i64) +declare <8 x i1> @llvm.experimental.vector.extract.v8i1.nxv8i1(, i64) +declare <16 x i1> @llvm.experimental.vector.extract.v16i1.nxv16i1(, i64) + declare <4 x i64> @llvm.experimental.vector.extract.v4i64.nxv2i64(, i64) declare <2 x float> @llvm.experimental.vector.extract.v2f32.nxv4f32(, i64) declare <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv8i32(, i64) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +; == Matching first N elements == + +define <4 x i1> @reshuffle_v4i1_nxv4i1( %a) #0 { +; CHECK-LABEL: reshuffle_v4i1_nxv4i1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 +; CHECK-NEXT: mov w8, v1.s[1] +; CHECK-NEXT: mov w9, v1.s[2] +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov v0.h[1], w8 +; CHECK-NEXT: mov w8, v1.s[3] +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: ret + %el0 = extractelement %a, i32 0 + %el1 = extractelement %a, i32 1 + %el2 = extractelement %a, i32 2 + %el3 = extractelement %a, i32 3 + %v0 = insertelement <4 x i1> undef, i1 %el0, i32 0 + %v1 = insertelement <4 x i1> %v0, i1 %el1, i32 1 + %v2 = insertelement <4 x i1> %v1, i1 %el2, i32 2 + %v3 = insertelement <4 x i1> %v2, i1 %el3, i32 3 + ret <4 x i1> %v3 +} + +attributes #0 = { "target-features"="+sve" }