diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2456,35 +2456,6 @@ return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG); } -// Is the mask a slidedown that shifts in undefs. -static int matchShuffleAsSlideDown(ArrayRef Mask) { - int Size = Mask.size(); - - // Elements shifted in should be undef. - auto CheckUndefs = [&](int Shift) { - for (int i = Size - Shift; i != Size; ++i) - if (Mask[i] >= 0) - return false; - return true; - }; - - // Elements should be shifted or undef. - auto MatchShift = [&](int Shift) { - for (int i = 0; i != Size - Shift; ++i) - if (Mask[i] >= 0 && Mask[i] != Shift + i) - return false; - return true; - }; - - // Try all possible shifts. - for (int Shift = 1; Shift != Size; ++Shift) - if (CheckUndefs(Shift) && MatchShift(Shift)) - return Shift; - - // No match. - return -1; -} - static bool isInterleaveShuffle(ArrayRef Mask, MVT VT, bool &SwapSources, const RISCVSubtarget &Subtarget) { // We need to be able to widen elements to the next larger integer type. @@ -2527,7 +2498,19 @@ return true; } -static int isElementRotate(SDValue &V1, SDValue &V2, ArrayRef Mask) { +/// Match shuffles that concatenate two vectors, rotate the concatenation, +/// and then extract the original number of elements from the rotated result. +/// This is equivalent to vector.splice or X86's PALIGNR instruction. The +/// returned rotation amount is for a rotate right, where elements move from +/// higher elements to lower elements. \p LoSrc indicates the first source +/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector +/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be +/// 0 or 1 if a rotation is found. +/// +/// NOTE: We talk about rotate to the right which matches how bit shift and +/// rotate instructions are described where LSBs are on the right, but LLVM IR +/// and the table below write vectors with the lowest elements on the left. +static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef Mask) { int Size = Mask.size(); // We need to detect various ways of spelling a rotation: @@ -2538,7 +2521,8 @@ // [-1, 4, 5, 6, -1, -1, 9, -1] // [-1, 4, 5, 6, -1, -1, -1, -1] int Rotation = 0; - SDValue Lo, Hi; + LoSrc = -1; + HiSrc = -1; for (int i = 0; i != Size; ++i) { int M = Mask[i]; if (M < 0) @@ -2562,18 +2546,18 @@ return -1; // Compute which value this mask is pointing at. - SDValue MaskV = M < Size ? V1 : V2; + int MaskSrc = M < Size ? 0 : 1; // Compute which of the two target values this index should be assigned to. // This reflects whether the high elements are remaining or the low elemnts // are remaining. - SDValue &TargetV = StartIdx < 0 ? Hi : Lo; + int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc; // Either set up this value if we've not encountered it before, or check // that it remains consistent. - if (!TargetV) - TargetV = MaskV; - else if (TargetV != MaskV) + if (TargetSrc < 0) + TargetSrc = MaskSrc; + else if (TargetSrc != MaskSrc) // This may be a rotation, but it pulls from the inputs in some // unsupported interleaving. return -1; @@ -2581,14 +2565,8 @@ // Check that we successfully analyzed the mask, and normalize the results. assert(Rotation != 0 && "Failed to locate a viable rotation!"); - assert((Lo || Hi) && "Failed to find a rotated input vector!"); - - // Make sure we've found a value for both halves. - if (!Lo || !Hi) - return -1; - - V1 = Lo; - V2 = Hi; + assert((LoSrc >= 0 || HiSrc >= 0) && + "Failed to find a rotated input vector!"); return Rotation; } @@ -2685,45 +2663,43 @@ ArrayRef Mask = SVN->getMask(); - // Try to match as a slidedown. - int SlideAmt = matchShuffleAsSlideDown(Mask); - if (SlideAmt >= 0) { - // TODO: Should we reduce the VL to account for the upper undef elements? - // Requires additional vsetvlis, but might be faster to execute. - V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); - SDValue SlideDown = - DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, - DAG.getUNDEF(ContainerVT), V1, - DAG.getConstant(SlideAmt, DL, XLenVT), - TrueMask, VL); - return convertFromScalableVector(VT, SlideDown, DAG, Subtarget); - } - - // Match shuffles that concatenate two vectors, rotate the concatenation, - // and then extract the original number of elements from the rotated result. - // This is equivalent to vector.splice or X86's PALIGNR instruction. Lower - // it to a SLIDEDOWN and a SLIDEUP. - // FIXME: We don't really need it to be a concatenation. We just need two - // regions with contiguous elements that need to be shifted down and up. - int Rotation = isElementRotate(V1, V2, Mask); + // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may + // be undef which can be handled with a single SLIDEDOWN/UP. + int LoSrc, HiSrc; + int Rotation = isElementRotate(LoSrc, HiSrc, Mask); if (Rotation > 0) { - // We found a rotation. We need to slide V1 down by Rotation. Using - // (NumElts - Rotation) for VL. Then we need to slide V2 up by - // (NumElts - Rotation) using NumElts for VL. - V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); - V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget); + SDValue LoV, HiV; + if (LoSrc >= 0) { + LoV = LoSrc == 0 ? V1 : V2; + LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget); + } + if (HiSrc >= 0) { + HiV = HiSrc == 0 ? V1 : V2; + HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget); + } + // We found a rotation. We need to slide HiV down by Rotation. Then we need + // to slide LoV up by (NumElts - Rotation). unsigned InvRotate = NumElts - Rotation; - SDValue SlideDown = - DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, - DAG.getUNDEF(ContainerVT), V2, - DAG.getConstant(Rotation, DL, XLenVT), - TrueMask, DAG.getConstant(InvRotate, DL, XLenVT)); - SDValue SlideUp = - DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, SlideDown, V1, - DAG.getConstant(InvRotate, DL, XLenVT), - TrueMask, VL); - return convertFromScalableVector(VT, SlideUp, DAG, Subtarget); + + SDValue Res = DAG.getUNDEF(ContainerVT); + if (HiV) { + // If we are doing a SLIDEDOWN+SLIDEUP, reduce the VL for the SLIDEDOWN. + // FIXME: If we are only doing a SLIDEDOWN, don't reduce the VL as it + // causes multiple vsetvlis in some test cases such as lowering + // reduce.mul + SDValue DownVL = VL; + if (LoV) + DownVL = DAG.getConstant(InvRotate, DL, XLenVT); + Res = + DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, Res, HiV, + DAG.getConstant(Rotation, DL, XLenVT), TrueMask, DownVL); + } + if (LoV) + Res = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Res, LoV, + DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL); + + return convertFromScalableVector(VT, Res, DAG, Subtarget); } // Detect an interleave shuffle and lower to @@ -2947,7 +2923,8 @@ MVT SVT = VT.getSimpleVT(); bool SwapSources; - return (matchShuffleAsSlideDown(M) >= 0) || + int LoSrc, HiSrc; + return (isElementRotate(LoSrc, HiSrc, M) > 0) || isInterleaveShuffle(M, SVT, SwapSources, Subtarget); } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -256,6 +256,28 @@ ret <8 x float> %s } +define <4 x half> @slideup_v4f16(<4 x half> %x) { +; CHECK-LABEL: slideup_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %s = shufflevector <4 x half> %x, <4 x half> poison, <4 x i32> + ret <4 x half> %s +} + +define <8 x float> @slideup_v8f32(<8 x float> %x) { +; CHECK-LABEL: slideup_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vslideup.vi v10, v8, 3 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %s = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> + ret <8 x float> %s +} + define <8 x float> @splice_unary(<8 x float> %x) { ; CHECK-LABEL: splice_unary: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -555,6 +555,28 @@ ret <8 x i32> %s } +define <4 x i16> @slideup_v4i16(<4 x i16> %x) { +; CHECK-LABEL: slideup_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu +; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> + ret <4 x i16> %s +} + +define <8 x i32> @slideup_v8i32(<8 x i32> %x) { +; CHECK-LABEL: slideup_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu +; CHECK-NEXT: vslideup.vi v10, v8, 3 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %s = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> + ret <8 x i32> %s +} + define <8 x i16> @splice_unary(<8 x i16> %x) { ; CHECK-LABEL: splice_unary: ; CHECK: # %bb.0: