Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9039,19 +9039,13 @@ if (SrcVTSize == TypeSize::Fixed(VTSize)) continue; - if (SrcVTSize.isScalable()) { - LLVM_DEBUG( - dbgs() << "Reshuffle failed: scalable vectors not yet supported\n"); - return SDValue(); - } - // This stage of the search produces a source with the same element type as // the original, but with a total width matching the BUILD_VECTOR output. EVT EltVT = SrcVT.getVectorElementType(); unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits(); EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts); - if (SrcVTSize.getFixedValue() < VTSize) { + if (!SrcVTSize.isScalable() && SrcVTSize.getFixedValue() < VTSize) { assert(2 * SrcVTSize == VTSize); // We can pad out the smaller vector for free, so if it's part of a // shuffle... @@ -9061,7 +9055,8 @@ continue; } - if (SrcVTSize.getFixedValue() != 2 * VTSize) { + if ((!SrcVTSize.isScalable() && SrcVTSize.getFixedValue() != 2 * VTSize) || + (SrcVTSize.isScalable() && SrcVTSize.getKnownMinValue() != VTSize)) { LLVM_DEBUG( dbgs() << "Reshuffle failed: result vector too small to extract\n"); return SDValue(); @@ -9122,10 +9117,11 @@ Src.WindowBase *= Src.WindowScale; } - // Final check before we try to actually produce a shuffle. LLVM_DEBUG(for (auto Src - : Sources) - assert(Src.ShuffleVec.getValueType() == ShuffleVT);); + : Sources) assert(Src.ShuffleVec.getValueType() + .getSizeInBits() + .getKnownMinValue() == + ShuffleVT.getFixedSizeInBits());); // The stars all align, our next step is to produce the mask for the shuffle. SmallVector Mask(ShuffleVT.getVectorNumElements(), -1); Index: llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll @@ -39,6 +39,7 @@ } +; ReconstructShuffle fails define <4 x i1> @reshuffle_v4i1_nxv4i1( %a) #0 { ; CHECK-LABEL: reshuffle_v4i1_nxv4i1: ; CHECK: // %bb.0: @@ -64,6 +65,7 @@ } +; ReconstructShuffle fails define <4 x i1> @reshuffle_v4i1_nxv8i1( %a) #0 { ; CHECK-LABEL: reshuffle_v4i1_nxv8i1: ; CHECK: // %bb.0: @@ -89,6 +91,7 @@ } +; ReconstructShuffle fails define <8 x i1> @reshuffle_v8i1_nxv8i1( %a) #0 { ; CHECK-LABEL: reshuffle_v8i1_nxv8i1: ; CHECK: // %bb.0: @@ -130,6 +133,7 @@ } +; ReconstructShuffle fails define <16 x i1> @reshuffle_v16i1_nxv16i1( %a) #0 { ; CHECK-LABEL: reshuffle_v16i1_nxv16i1: ; CHECK: // %bb.0: @@ -424,6 +428,7 @@ ; == Reversed first N elements == +; ReconstructShuffle - fails define <4 x i16> @reshuffle_v4i16_nxv4i16_reverse( %a) #0 { ; CHECK-LABEL: reshuffle_v4i16_nxv4i16_reverse: ; CHECK: // %bb.0: @@ -449,6 +454,7 @@ } +; ReconstructShuffle - fails define <8 x i16> @reshuffle_v8i16_nxv8i16_reverse( %a) #0 { ; CHECK-LABEL: reshuffle_v8i16_nxv8i16_reverse: ; CHECK: // %bb.0: @@ -538,11 +544,10 @@ } +; ReconstructShuffle - succeeds define <4 x i32> @reshuffle_v4i32_nxv4i32_undef( %a) #0 { ; CHECK-LABEL: reshuffle_v4i32_nxv4i32_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %el0 = extractelement %a, i32 0 @@ -553,13 +558,11 @@ } +; ReconstructShuffle - succeeds define <4 x i32> @reshuffle_v4i32_nxv4i32_undef2( %a) #0 { ; CHECK-LABEL: reshuffle_v4i32_nxv4i32_undef2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, v0.s[2] -; CHECK-NEXT: mov w9, v0.s[3] -; CHECK-NEXT: mov v0.s[2], w8 -; CHECK-NEXT: mov v0.s[3], w9 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %el2 = extractelement %a, i32 2 %el3 = extractelement %a, i32 3 @@ -592,6 +595,7 @@ } +; ReconstructShuffle - fails define <4 x i16> @reshuffle_v4i16_nxv8i16_undef( %a) #0 { ; CHECK-LABEL: reshuffle_v4i16_nxv8i16_undef: ; CHECK: // %bb.0: @@ -606,6 +610,7 @@ ret <4 x i16> %v1 } +; ReconstructShuffle - fails define <4 x i16> @reshuffle_v4i16_nxv8i16_undef2( %a) #0 { ; CHECK-LABEL: reshuffle_v4i16_nxv8i16_undef2: ; CHECK: // %bb.0: @@ -623,15 +628,10 @@ } +; ReconstructShuffle - succeeds define <8 x i16> @reshuffle_v8i16_nxv8i16_undef( %a) #0 { ; CHECK-LABEL: reshuffle_v8i16_nxv8i16_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: umov w9, v0.h[2] -; CHECK-NEXT: umov w10, v0.h[3] -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: mov v0.h[2], w9 -; CHECK-NEXT: mov v0.h[3], w10 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %el0 = extractelement %a, i32 0 @@ -646,18 +646,11 @@ } +; ReconstructShuffle - succeeds define <8 x i16> @reshuffle_v8i16_nxv8i16_undef2( %a) #0 { ; CHECK-LABEL: reshuffle_v8i16_nxv8i16_undef2: ; CHECK: // %bb.0: -; CHECK-NEXT: umov w8, v0.h[4] -; CHECK-NEXT: umov w9, v0.h[5] -; CHECK-NEXT: mov v1.h[4], w8 -; CHECK-NEXT: umov w8, v0.h[6] -; CHECK-NEXT: mov v1.h[5], w9 -; CHECK-NEXT: umov w9, v0.h[7] -; CHECK-NEXT: mov v1.h[6], w8 -; CHECK-NEXT: mov v1.h[7], w9 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %el4 = extractelement %a, i32 4 %el5 = extractelement %a, i32 5 @@ -671,13 +664,11 @@ } +; ReconstructShuffle - succeeds define <4 x i32> @reshuffle_v4i32_nxv4i32_reverse_undef( %a) #0 { ; CHECK-LABEL: reshuffle_v4i32_nxv4i32_reverse_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: rev64 v0.4s, v0.4s ; CHECK-NEXT: ret %el0 = extractelement %a, i32 0 %el1 = extractelement %a, i32 1 @@ -687,13 +678,12 @@ } +; ReconstructShuffle - succeeds define <4 x i32> @reshuffle_v4i32_nxv4i32_reverse_undef2( %a) #0 { ; CHECK-LABEL: reshuffle_v4i32_nxv4i32_reverse_undef2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, v0.s[3] -; CHECK-NEXT: mov w9, v0.s[2] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12 ; CHECK-NEXT: ret %el2 = extractelement %a, i32 2 %el3 = extractelement %a, i32 3 @@ -703,18 +693,11 @@ } +; ReconstructShuffle - succeeds define <8 x i16> @reshuffle_v8i16_nxv8i16_reverse_undef( %a) #0 { ; CHECK-LABEL: reshuffle_v8i16_nxv8i16_reverse_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: umov w8, v0.h[3] -; CHECK-NEXT: umov w9, v0.h[2] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: mov v1.h[1], w9 -; CHECK-NEXT: mov v1.h[2], w8 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: rev64 v0.8h, v0.8h ; CHECK-NEXT: ret %el0 = extractelement %a, i32 0 %el1 = extractelement %a, i32 1 @@ -728,6 +711,7 @@ } +; ReconstructShuffle - fails define <8 x i16> @reshuffle_v8i16_nxv8i16_reverse_undef2( %a) #0 { ; CHECK-LABEL: reshuffle_v8i16_nxv8i16_reverse_undef2: ; CHECK: // %bb.0: @@ -785,15 +769,11 @@ } +; ReconstructShuffle - succeeds define <4 x i32> @reshuffle_v4i32_2x_nxv4i32( %a, %b) #0 { ; CHECK-LABEL: reshuffle_v4i32_2x_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: mov w9, v1.s[1] -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov v0.s[2], w8 -; CHECK-NEXT: mov v0.s[3], w9 +; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %el0 = extractelement %a, i32 0 @@ -808,6 +788,7 @@ } +; ReconstructShuffle - fails define <4 x i16> @reshuffle_v4i16_2x_nxv8i16( %a, %b) #0 { ; CHECK-LABEL: reshuffle_v4i16_2x_nxv8i16: ; CHECK: // %bb.0: @@ -831,23 +812,11 @@ } +; ReconstructShuffle - succeeds define <8 x i16> @reshuffle_v8i16_2x_nxv8i16( %a, %b) #0 { ; CHECK-LABEL: reshuffle_v8i16_2x_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: umov w9, v0.h[2] -; CHECK-NEXT: umov w10, v0.h[3] -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov v0.h[2], w9 -; CHECK-NEXT: umov w9, v1.h[1] -; CHECK-NEXT: mov v0.h[3], w10 -; CHECK-NEXT: mov v0.h[4], w8 -; CHECK-NEXT: umov w8, v1.h[2] -; CHECK-NEXT: mov v0.h[5], w9 -; CHECK-NEXT: umov w9, v1.h[3] -; CHECK-NEXT: mov v0.h[6], w8 -; CHECK-NEXT: mov v0.h[7], w9 +; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %el0 = extractelement %a, i32 0 @@ -873,12 +842,11 @@ ; == Shuffle comes from two input sources and result requires padding with undef == +; ReconstructShuffle - succeeds define <4 x i32> @reshuffle_v4i32_2x_nxv4i32_undef1( %a, %b) #0 { ; CHECK-LABEL: reshuffle_v4i32_2x_nxv4i32_undef1: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: zip1 v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %el0 = extractelement %a, i32 0 %el1 = extractelement %b, i32 0 @@ -888,12 +856,11 @@ } +; ReconstructShuffle - succeeds define <8 x i16> @reshuffle_v8i16_2x_nxv8i16_undef1( %a, %b) #0 { ; CHECK-LABEL: reshuffle_v8i16_2x_nxv8i16_undef1: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: zip1 v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %el0 = extractelement %a, i32 0 %el1 = extractelement %b, i32 0 @@ -903,6 +870,7 @@ } +; ReconstructShuffle - fails define <8 x i16> @reshuffle_v8i16_2x_nxv8i16_undef2( %a, %b) #0 { ; CHECK-LABEL: reshuffle_v8i16_2x_nxv8i16_undef2: ; CHECK: // %bb.0: