diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3267,9 +3267,9 @@ /// Is this shuffle interleaving contiguous elements from one vector into the /// even elements and contiguous elements from another vector into the odd -/// elements. \p Src1 will contain the element that should be in the first even -/// element. \p Src2 will contain the element that should be in the first odd -/// element. These can be the first element in a source or the element half +/// elements. \p EvenSrc will contain the element that should be in the first +/// even element. \p OddSrc will contain the element that should be in the first +/// odd element. These can be the first element in a source or the element half /// way through the source. static bool isInterleaveShuffle(ArrayRef Mask, MVT VT, int &EvenSrc, int &OddSrc, const RISCVSubtarget &Subtarget) { @@ -3278,7 +3278,8 @@ return false; int Size = Mask.size(); - assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); + int NumElts = VT.getVectorNumElements(); + assert(Size == (int)NumElts && "Unexpected mask size"); SmallVector StartIndexes; if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes)) @@ -3291,7 +3292,14 @@ if (EvenSrc != 0 && OddSrc != 0) return false; - return true; + // Subvectors will be subtracted from either at the start of the two input + // vectors, or at the start and middle of the first vector if it's an unary + // interleave. + // In both cases, HalfNumElts will be extracted. + // So make sure that EvenSrc/OddSrc are within range. + int HalfNumElts = NumElts / 2; + return (((EvenSrc % NumElts) + HalfNumElts) <= NumElts) && + (((OddSrc % NumElts) + HalfNumElts) <= NumElts); } /// Match shuffles that concatenate two vectors, rotate the concatenation, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -172,6 +172,58 @@ ret <8 x i32> %a } +; %y should be slid down by 2 +define <4 x i32> @interleave_v4i32_offset_2(<4 x i32> %x, <4 x i32> %y) { +; V128-LABEL: interleave_v4i32_offset_2: +; V128: # %bb.0: +; V128-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; V128-NEXT: vslidedown.vi v10, v9, 2 +; V128-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; V128-NEXT: vwaddu.vv v9, v8, v10 +; V128-NEXT: li a0, -1 +; V128-NEXT: vwmaccu.vx v9, a0, v10 +; V128-NEXT: vmv1r.v v8, v9 +; V128-NEXT: ret +; +; V512-LABEL: interleave_v4i32_offset_2: +; V512: # %bb.0: +; V512-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; V512-NEXT: vslidedown.vi v10, v9, 2 +; V512-NEXT: vwaddu.vv v9, v8, v10 +; V512-NEXT: li a0, -1 +; V512-NEXT: vwmaccu.vx v9, a0, v10 +; V512-NEXT: vmv1r.v v8, v9 +; V512-NEXT: ret + %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> + ret <4 x i32> %a +} + +; %y should be slid down by 1 +define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) { +; V128-LABEL: interleave_v4i32_offset_1: +; V128: # %bb.0: +; V128-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; V128-NEXT: vslidedown.vi v10, v9, 1 +; V128-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; V128-NEXT: vwaddu.vv v9, v8, v10 +; V128-NEXT: li a0, -1 +; V128-NEXT: vwmaccu.vx v9, a0, v10 +; V128-NEXT: vmv1r.v v8, v9 +; V128-NEXT: ret +; +; V512-LABEL: interleave_v4i32_offset_1: +; V512: # %bb.0: +; V512-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; V512-NEXT: vslidedown.vi v10, v9, 1 +; V512-NEXT: vwaddu.vv v9, v8, v10 +; V512-NEXT: li a0, -1 +; V512-NEXT: vwmaccu.vx v9, a0, v10 +; V512-NEXT: vmv1r.v v8, v9 +; V512-NEXT: ret + %a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> + ret <4 x i32> %a +} + define <16 x i8> @interleave_v8i8(<8 x i8> %x, <8 x i8> %y) { ; V128-LABEL: interleave_v8i8: ; V128: # %bb.0: @@ -362,8 +414,8 @@ ; RV32-V128-NEXT: slli a0, a0, 4 ; RV32-V128-NEXT: sub sp, sp, a0 ; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV32-V128-NEXT: lui a0, %hi(.LCPI15_0) -; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI15_0) +; RV32-V128-NEXT: lui a0, %hi(.LCPI17_0) +; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI17_0) ; RV32-V128-NEXT: li a1, 32 ; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-V128-NEXT: vle32.v v0, (a0) @@ -371,8 +423,8 @@ ; RV32-V128-NEXT: vrgather.vv v8, v24, v0 ; RV32-V128-NEXT: addi a0, sp, 16 ; RV32-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-V128-NEXT: lui a0, %hi(.LCPI15_1) -; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI15_1) +; RV32-V128-NEXT: lui a0, %hi(.LCPI17_1) +; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI17_1) ; RV32-V128-NEXT: vle32.v v24, (a0) ; RV32-V128-NEXT: csrr a0, vlenb ; RV32-V128-NEXT: slli a0, a0, 3 @@ -413,8 +465,8 @@ ; RV64-V128-NEXT: slli a0, a0, 4 ; RV64-V128-NEXT: sub sp, sp, a0 ; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV64-V128-NEXT: lui a0, %hi(.LCPI15_0) -; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI15_0) +; RV64-V128-NEXT: lui a0, %hi(.LCPI17_0) +; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI17_0) ; RV64-V128-NEXT: li a1, 32 ; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV64-V128-NEXT: vle32.v v0, (a0) @@ -422,8 +474,8 @@ ; RV64-V128-NEXT: vrgather.vv v8, v24, v0 ; RV64-V128-NEXT: addi a0, sp, 16 ; RV64-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV64-V128-NEXT: lui a0, %hi(.LCPI15_1) -; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI15_1) +; RV64-V128-NEXT: lui a0, %hi(.LCPI17_1) +; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI17_1) ; RV64-V128-NEXT: vle32.v v24, (a0) ; RV64-V128-NEXT: csrr a0, vlenb ; RV64-V128-NEXT: slli a0, a0, 3 @@ -494,6 +546,31 @@ ret <4 x i8> %a } +; This shouldn't be interleaved +define <4 x i8> @unary_interleave_v4i8_invalid(<4 x i8> %x) { +; V128-LABEL: unary_interleave_v4i8_invalid: +; V128: # %bb.0: +; V128-NEXT: lui a0, %hi(.LCPI19_0) +; V128-NEXT: addi a0, a0, %lo(.LCPI19_0) +; V128-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; V128-NEXT: vle8.v v10, (a0) +; V128-NEXT: vrgather.vv v9, v8, v10 +; V128-NEXT: vmv1r.v v8, v9 +; V128-NEXT: ret +; +; V512-LABEL: unary_interleave_v4i8_invalid: +; V512: # %bb.0: +; V512-NEXT: lui a0, %hi(.LCPI19_0) +; V512-NEXT: addi a0, a0, %lo(.LCPI19_0) +; V512-NEXT: vsetivli zero, 4, e8, mf8, ta, ma +; V512-NEXT: vle8.v v10, (a0) +; V512-NEXT: vrgather.vv v9, v8, v10 +; V512-NEXT: vmv1r.v v8, v9 +; V512-NEXT: ret + %a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> + ret <4 x i8> %a +} + define <4 x i16> @unary_interleave_v4i16(<4 x i16> %x) { ; V128-LABEL: unary_interleave_v4i16: ; V128: # %bb.0: @@ -548,8 +625,8 @@ define <4 x i64> @unary_interleave_v4i64(<4 x i64> %x) { ; RV32-V128-LABEL: unary_interleave_v4i64: ; RV32-V128: # %bb.0: -; RV32-V128-NEXT: lui a0, %hi(.LCPI19_0) -; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI19_0) +; RV32-V128-NEXT: lui a0, %hi(.LCPI22_0) +; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI22_0) ; RV32-V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-V128-NEXT: vle16.v v12, (a0) ; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v12 @@ -558,8 +635,8 @@ ; ; RV64-V128-LABEL: unary_interleave_v4i64: ; RV64-V128: # %bb.0: -; RV64-V128-NEXT: lui a0, %hi(.LCPI19_0) -; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI19_0) +; RV64-V128-NEXT: lui a0, %hi(.LCPI22_0) +; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI22_0) ; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-V128-NEXT: vle64.v v12, (a0) ; RV64-V128-NEXT: vrgather.vv v10, v8, v12 @@ -568,8 +645,8 @@ ; ; RV32-V512-LABEL: unary_interleave_v4i64: ; RV32-V512: # %bb.0: -; RV32-V512-NEXT: lui a0, %hi(.LCPI19_0) -; RV32-V512-NEXT: addi a0, a0, %lo(.LCPI19_0) +; RV32-V512-NEXT: lui a0, %hi(.LCPI22_0) +; RV32-V512-NEXT: addi a0, a0, %lo(.LCPI22_0) ; RV32-V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; RV32-V512-NEXT: vle16.v v10, (a0) ; RV32-V512-NEXT: vrgatherei16.vv v9, v8, v10 @@ -578,8 +655,8 @@ ; ; RV64-V512-LABEL: unary_interleave_v4i64: ; RV64-V512: # %bb.0: -; RV64-V512-NEXT: lui a0, %hi(.LCPI19_0) -; RV64-V512-NEXT: addi a0, a0, %lo(.LCPI19_0) +; RV64-V512-NEXT: lui a0, %hi(.LCPI22_0) +; RV64-V512-NEXT: addi a0, a0, %lo(.LCPI22_0) ; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; RV64-V512-NEXT: vle64.v v10, (a0) ; RV64-V512-NEXT: vrgather.vv v9, v8, v10