Index: llvm/include/llvm/Support/TypeSize.h =================================================================== --- llvm/include/llvm/Support/TypeSize.h +++ llvm/include/llvm/Support/TypeSize.h @@ -296,6 +296,8 @@ ScalarTy getKnownMinValue() const { return this->Value; } /// Returns whether the size is scaled by a runtime quantity (vscale). bool isScalable() const { return this->UnivariateDim == ScalableDim; } + /// Returns true if the size is known at compile time. + bool isFixed() const { return this->UnivariateDim == FixedDim; } /// A return value of true indicates we know at compile time that the number /// of elements (vscale * Min) is definitely even. However, returning false /// does not guarantee that the total number of elements is odd. Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8986,13 +8986,12 @@ if (V.isUndef()) continue; else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - !isa(V.getOperand(1)) || - V.getOperand(0).getValueType().isScalableVector()) { + !isa(V.getOperand(1))) { LLVM_DEBUG( dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from " - "various elements of other fixed-width vectors, provided " - "their indices are constant\n"); + "various elements of other vectors, provided their" + "indices are constant\n"); return SDValue(); } @@ -9046,7 +9045,7 @@ unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits(); EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts); - if (SrcVTSize.getFixedValue() < VTSize) { + if (SrcVTSize.isFixed() && SrcVTSize.getFixedValue() < VTSize) { assert(2 * SrcVTSize == VTSize); // We can pad out the smaller vector for free, so if it's part of a // shuffle... @@ -9056,7 +9055,8 @@ continue; } - if (SrcVTSize.getFixedValue() != 2 * VTSize) { + if ((SrcVTSize.isFixed() && SrcVTSize.getFixedValue() != 2 * VTSize) || + (SrcVTSize.isScalable() && SrcVTSize.getKnownMinValue() != VTSize)) { LLVM_DEBUG( dbgs() << "Reshuffle failed: result vector too small to extract\n"); return SDValue(); @@ -9117,10 +9117,11 @@ Src.WindowBase *= Src.WindowScale; } - // Final check before we try to actually produce a shuffle. LLVM_DEBUG(for (auto Src - : Sources) - assert(Src.ShuffleVec.getValueType() == ShuffleVT);); + : Sources) assert(Src.ShuffleVec.getValueType() + .getSizeInBits() + .getKnownMinValue() == + ShuffleVT.getFixedSizeInBits());); // The stars all align, our next step is to produce the mask for the shuffle. SmallVector Mask(ShuffleVT.getVectorNumElements(), -1); Index: llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll +++ llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll @@ -39,6 +39,7 @@ } +; ReconstructShuffle fails define <4 x i1> @reshuffle_v4i1_nxv4i1( %a) #0 { ; CHECK-LABEL: reshuffle_v4i1_nxv4i1: ; CHECK: // %bb.0: @@ -64,6 +65,7 @@ } +; ReconstructShuffle fails define <4 x i1> @reshuffle_v4i1_nxv8i1( %a) #0 { ; CHECK-LABEL: reshuffle_v4i1_nxv8i1: ; CHECK: // %bb.0: @@ -89,6 +91,7 @@ } +; ReconstructShuffle fails define <8 x i1> @reshuffle_v8i1_nxv8i1( %a) #0 { ; CHECK-LABEL: reshuffle_v8i1_nxv8i1: ; CHECK: // %bb.0: @@ -130,6 +133,7 @@ } +; ReconstructShuffle fails define <16 x i1> @reshuffle_v16i1_nxv16i1( %a) #0 { ; CHECK-LABEL: reshuffle_v16i1_nxv16i1: ; CHECK: // %bb.0: @@ -424,6 +428,7 @@ ; == Reversed first N elements == +; ReconstructShuffle - fails define <4 x i16> @reshuffle_v4i16_nxv4i16_reverse( %a) #0 { ; CHECK-LABEL: reshuffle_v4i16_nxv4i16_reverse: ; CHECK: // %bb.0: @@ -449,6 +454,7 @@ } +; ReconstructShuffle - fails define <8 x i16> @reshuffle_v8i16_nxv8i16_reverse( %a) #0 { ; CHECK-LABEL: reshuffle_v8i16_nxv8i16_reverse: ; CHECK: // %bb.0: @@ -538,11 +544,10 @@ } +; ReconstructShuffle - succeeds define <4 x i32> @reshuffle_v4i32_nxv4i32_undef( %a) #0 { ; CHECK-LABEL: reshuffle_v4i32_nxv4i32_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %el0 = extractelement %a, i32 0 @@ -553,13 +558,11 @@ } +; ReconstructShuffle - succeeds define <4 x i32> @reshuffle_v4i32_nxv4i32_undef2( %a) #0 { ; CHECK-LABEL: reshuffle_v4i32_nxv4i32_undef2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, v0.s[2] -; CHECK-NEXT: mov w9, v0.s[3] -; CHECK-NEXT: mov v0.s[2], w8 -; CHECK-NEXT: mov v0.s[3], w9 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %el2 = extractelement %a, i32 2 %el3 = extractelement %a, i32 3 @@ -592,6 +595,7 @@ } +; ReconstructShuffle - fails define <4 x i16> @reshuffle_v4i16_nxv8i16_undef( %a) #0 { ; CHECK-LABEL: reshuffle_v4i16_nxv8i16_undef: ; CHECK: // %bb.0: @@ -606,6 +610,7 @@ ret <4 x i16> %v1 } +; ReconstructShuffle - fails define <4 x i16> @reshuffle_v4i16_nxv8i16_undef2( %a) #0 { ; CHECK-LABEL: reshuffle_v4i16_nxv8i16_undef2: ; CHECK: // %bb.0: @@ -623,15 +628,10 @@ } +; ReconstructShuffle - succeeds define <8 x i16> @reshuffle_v8i16_nxv8i16_undef( %a) #0 { ; CHECK-LABEL: reshuffle_v8i16_nxv8i16_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: umov w9, v0.h[2] -; CHECK-NEXT: umov w10, v0.h[3] -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: mov v0.h[2], w9 -; CHECK-NEXT: mov v0.h[3], w10 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %el0 = extractelement %a, i32 0 @@ -646,18 +646,11 @@ } +; ReconstructShuffle - succeeds define <8 x i16> @reshuffle_v8i16_nxv8i16_undef2( %a) #0 { ; CHECK-LABEL: reshuffle_v8i16_nxv8i16_undef2: ; CHECK: // %bb.0: -; CHECK-NEXT: umov w8, v0.h[4] -; CHECK-NEXT: umov w9, v0.h[5] -; CHECK-NEXT: mov v1.h[4], w8 -; CHECK-NEXT: umov w8, v0.h[6] -; CHECK-NEXT: mov v1.h[5], w9 -; CHECK-NEXT: umov w9, v0.h[7] -; CHECK-NEXT: mov v1.h[6], w8 -; CHECK-NEXT: mov v1.h[7], w9 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %el4 = extractelement %a, i32 4 %el5 = extractelement %a, i32 5 @@ -671,13 +664,11 @@ } +; ReconstructShuffle - succeeds define <4 x i32> @reshuffle_v4i32_nxv4i32_reverse_undef( %a) #0 { ; CHECK-LABEL: reshuffle_v4i32_nxv4i32_reverse_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: rev64 v0.4s, v0.4s ; CHECK-NEXT: ret %el0 = extractelement %a, i32 0 %el1 = extractelement %a, i32 1 @@ -687,13 +678,12 @@ } +; ReconstructShuffle - succeeds define <4 x i32> @reshuffle_v4i32_nxv4i32_reverse_undef2( %a) #0 { ; CHECK-LABEL: reshuffle_v4i32_nxv4i32_reverse_undef2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, v0.s[3] -; CHECK-NEXT: mov w9, v0.s[2] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12 ; CHECK-NEXT: ret %el2 = extractelement %a, i32 2 %el3 = extractelement %a, i32 3 @@ -703,18 +693,11 @@ } +; ReconstructShuffle - succeeds define <8 x i16> @reshuffle_v8i16_nxv8i16_reverse_undef( %a) #0 { ; CHECK-LABEL: reshuffle_v8i16_nxv8i16_reverse_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: umov w8, v0.h[3] -; CHECK-NEXT: umov w9, v0.h[2] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: mov v1.h[1], w9 -; CHECK-NEXT: mov v1.h[2], w8 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: rev64 v0.8h, v0.8h ; CHECK-NEXT: ret %el0 = extractelement %a, i32 0 %el1 = extractelement %a, i32 1 @@ -728,6 +711,7 @@ } +; ReconstructShuffle - fails define <8 x i16> @reshuffle_v8i16_nxv8i16_reverse_undef2( %a) #0 { ; CHECK-LABEL: reshuffle_v8i16_nxv8i16_reverse_undef2: ; CHECK: // %bb.0: @@ -785,15 +769,11 @@ } +; ReconstructShuffle - succeeds define <4 x i32> @reshuffle_v4i32_2x_nxv4i32( %a, %b) #0 { ; CHECK-LABEL: reshuffle_v4i32_2x_nxv4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: mov w9, v1.s[1] -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov v0.s[2], w8 -; CHECK-NEXT: mov v0.s[3], w9 +; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %el0 = extractelement %a, i32 0 @@ -808,6 +788,7 @@ } +; ReconstructShuffle - fails define <4 x i16> @reshuffle_v4i16_2x_nxv8i16( %a, %b) #0 { ; CHECK-LABEL: reshuffle_v4i16_2x_nxv8i16: ; CHECK: // %bb.0: @@ -831,23 +812,11 @@ } +; ReconstructShuffle - succeeds define <8 x i16> @reshuffle_v8i16_2x_nxv8i16( %a, %b) #0 { ; CHECK-LABEL: reshuffle_v8i16_2x_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: umov w9, v0.h[2] -; CHECK-NEXT: umov w10, v0.h[3] -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov v0.h[2], w9 -; CHECK-NEXT: umov w9, v1.h[1] -; CHECK-NEXT: mov v0.h[3], w10 -; CHECK-NEXT: mov v0.h[4], w8 -; CHECK-NEXT: umov w8, v1.h[2] -; CHECK-NEXT: mov v0.h[5], w9 -; CHECK-NEXT: umov w9, v1.h[3] -; CHECK-NEXT: mov v0.h[6], w8 -; CHECK-NEXT: mov v0.h[7], w9 +; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %el0 = extractelement %a, i32 0 @@ -873,12 +842,11 @@ ; == Shuffle comes from two input sources and result requires padding with undef == +; ReconstructShuffle - succeeds define <4 x i32> @reshuffle_v4i32_2x_nxv4i32_undef1( %a, %b) #0 { ; CHECK-LABEL: reshuffle_v4i32_2x_nxv4i32_undef1: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: zip1 v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %el0 = extractelement %a, i32 0 %el1 = extractelement %b, i32 0 @@ -888,12 +856,11 @@ } +; ReconstructShuffle - succeeds define <8 x i16> @reshuffle_v8i16_2x_nxv8i16_undef1( %a, %b) #0 { ; CHECK-LABEL: reshuffle_v8i16_2x_nxv8i16_undef1: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: zip1 v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %el0 = extractelement %a, i32 0 %el1 = extractelement %b, i32 0 @@ -903,6 +870,7 @@ } +; ReconstructShuffle - fails define <8 x i16> @reshuffle_v8i16_2x_nxv8i16_undef2( %a, %b) #0 { ; CHECK-LABEL: reshuffle_v8i16_2x_nxv8i16_undef2: ; CHECK: // %bb.0: