diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3266,9 +3266,9 @@ /// Is this shuffle interleaving contiguous elements from one vector into the /// even elements and contiguous elements from another vector into the odd -/// elements. \p Src1 will contain the element that should be in the first even -/// element. \p Src2 will contain the element that should be in the first odd -/// element. These can be the first element in a source or the element half +/// elements. \p EvenSrc will contain the element that should be in the first +/// even element. \p OddSrc will contain the element that should be in the first +/// odd element. These can be the first element in a source or the element half /// way through the source. static bool isInterleaveShuffle(ArrayRef Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget) { @@ -3277,7 +3277,8 @@ return false; int Size = Mask.size(); - assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); + unsigned NumElts = VT.getVectorNumElements(); + assert(Size == (int)NumElts && "Unexpected mask size"); SmallVector StartIndexes; if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes)) @@ -3290,6 +3291,19 @@ if (EvenSrc != 0 && OddSrc != 0) return false; + // If both elements of the even and odd vector are smaller than the size of + // the VT, then it's an unary interleave like: + // (vector_shuffle <0,2,1,3> x:v4i8, y:v4i8) + // i.e. only the first operand is being interleaved. + // So the actual number of elements in Even and Odd is half of VT's. + if (EvenSrc < NumElts && OddSrc < NumElts) + NumElts /= 2; + + // We need to make sure that the start indices of each vector are within range + // e.g we can't interleave (vector_shuffle <0,3,1,4> x:v2i8, y:v2i8) + if (EvenSrc > NumElts || OddSrc > NumElts) + return false; + return true; } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -494,6 +494,31 @@ ret <4 x i8> %a } +; This shouldn't be interleaved +define <4 x i8> @unary_interleave_v4i8_invalid(<4 x i8> %x) { +; V128-LABEL: unary_interleave_v4i8_invalid: +; V128: # %bb.0: +; V128-NEXT: lui a0, %hi(.LCPI17_0) +; V128-NEXT: addi a0, a0, %lo(.LCPI17_0) +; V128-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; V128-NEXT: vle8.v v10, (a0) +; V128-NEXT: vrgather.vv v9, v8, v10 +; V128-NEXT: vmv1r.v v8, v9 +; V128-NEXT: ret +; +; V512-LABEL: unary_interleave_v4i8_invalid: +; V512: # %bb.0: +; V512-NEXT: lui a0, %hi(.LCPI17_0) +; V512-NEXT: addi a0, a0, %lo(.LCPI17_0) +; V512-NEXT: vsetivli zero, 4, e8, mf8, ta, ma +; V512-NEXT: vle8.v v10, (a0) +; V512-NEXT: vrgather.vv v9, v8, v10 +; V512-NEXT: vmv1r.v v8, v9 +; V512-NEXT: ret + %a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> + ret <4 x i8> %a +} + define <4 x i16> @unary_interleave_v4i16(<4 x i16> %x) { ; V128-LABEL: unary_interleave_v4i16: ; V128: # %bb.0: @@ -548,8 +573,8 @@ define <4 x i64> @unary_interleave_v4i64(<4 x i64> %x) { ; RV32-V128-LABEL: unary_interleave_v4i64: ; RV32-V128: # %bb.0: -; RV32-V128-NEXT: lui a0, %hi(.LCPI19_0) -; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI19_0) +; RV32-V128-NEXT: lui a0, %hi(.LCPI20_0) +; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI20_0) ; RV32-V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-V128-NEXT: vle16.v v12, (a0) ; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v12 @@ -558,8 +583,8 @@ ; ; RV64-V128-LABEL: unary_interleave_v4i64: ; RV64-V128: # %bb.0: -; RV64-V128-NEXT: lui a0, %hi(.LCPI19_0) -; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI19_0) +; RV64-V128-NEXT: lui a0, %hi(.LCPI20_0) +; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI20_0) ; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-V128-NEXT: vle64.v v12, (a0) ; RV64-V128-NEXT: vrgather.vv v10, v8, v12 @@ -568,8 +593,8 @@ ; ; RV32-V512-LABEL: unary_interleave_v4i64: ; RV32-V512: # %bb.0: -; RV32-V512-NEXT: lui a0, %hi(.LCPI19_0) -; RV32-V512-NEXT: addi a0, a0, %lo(.LCPI19_0) +; RV32-V512-NEXT: lui a0, %hi(.LCPI20_0) +; RV32-V512-NEXT: addi a0, a0, %lo(.LCPI20_0) ; RV32-V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; RV32-V512-NEXT: vle16.v v10, (a0) ; RV32-V512-NEXT: vrgatherei16.vv v9, v8, v10 @@ -578,8 +603,8 @@ ; ; RV64-V512-LABEL: unary_interleave_v4i64: ; RV64-V512: # %bb.0: -; RV64-V512-NEXT: lui a0, %hi(.LCPI19_0) -; RV64-V512-NEXT: addi a0, a0, %lo(.LCPI19_0) +; RV64-V512-NEXT: lui a0, %hi(.LCPI20_0) +; RV64-V512-NEXT: addi a0, a0, %lo(.LCPI20_0) ; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; RV64-V512-NEXT: vle64.v v10, (a0) ; RV64-V512-NEXT: vrgather.vv v9, v8, v10