diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -742,7 +742,8 @@ /// Returns whether or not generating a interleaved load/store intrinsic for /// this type will be legal. - bool isLegalInterleavedAccessType(VectorType *, unsigned Factor, + bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, + Align Alignment, unsigned AddrSpace, const DataLayout &) const; /// Return true if a stride load store of the given result type and diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16689,13 +16689,16 @@ } bool RISCVTargetLowering::isLegalInterleavedAccessType( - VectorType *VTy, unsigned Factor, const DataLayout &DL) const { + VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace, + const DataLayout &DL) const { EVT VT = getValueType(DL, VTy); // Don't lower vlseg/vsseg for vector types that can't be split. if (!isTypeLegal(VT)) return false; - if (!isLegalElementTypeForRVV(VT.getScalarType())) + if (!isLegalElementTypeForRVV(VT.getScalarType()) || + !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace, + Alignment)) return false; MVT ContainerVT = VT.getSimpleVT(); @@ -16762,7 +16765,8 @@ IRBuilder<> Builder(LI); auto *VTy = cast(Shuffles[0]->getType()); - if (!isLegalInterleavedAccessType(VTy, Factor, + if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(), + LI->getPointerAddressSpace(), LI->getModule()->getDataLayout())) return false; @@ -16815,7 +16819,8 @@ // Given SVI : , then VTy : auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(), ShuffleVTy->getNumElements() / Factor); - if (!isLegalInterleavedAccessType(VTy, Factor, + if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(), + SI->getPointerAddressSpace(), SI->getModule()->getDataLayout())) return false; @@ -16859,7 +16864,8 @@ VectorType *VTy = cast(DI->getOperand(0)->getType()); VectorType *ResVTy = cast(DI->getType()->getContainedType(0)); - if (!isLegalInterleavedAccessType(ResVTy, Factor, + if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(), + LI->getPointerAddressSpace(), LI->getModule()->getDataLayout())) return false; @@ -16908,7 +16914,8 @@ VectorType *VTy = cast(II->getType()); VectorType *InVTy = cast(II->getOperand(0)->getType()); - if (!isLegalInterleavedAccessType(InVTy, Factor, + if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(), + SI->getPointerAddressSpace(), SI->getModule()->getDataLayout())) return false; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -471,7 +471,8 @@ // it's getMemoryOpCost returns a really expensive cost for types like // <6 x i8>, which show up when doing interleaves of Factor=3 etc. // Should the memory op cost of these be cheaper? - if (TLI->isLegalInterleavedAccessType(LegalFVTy, Factor, DL)) { + if (TLI->isLegalInterleavedAccessType(LegalFVTy, Factor, Alignment, + AddressSpace, DL)) { InstructionCost LegalMemCost = getMemoryOpCost( Opcode, LegalFVTy, Alignment, AddressSpace, CostKind); return LT.first + LegalMemCost; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll @@ -82,12 +82,16 @@ ret {<16 x i8>, <16 x i8>} %retval } -; FIXME: Shouldn't be lowered to vlseg because it's unaligned +; Shouldn't be lowered to vlseg because it's unaligned define {<8 x i16>, <8 x i16>} @vector_deinterleave_load_v8i16_v16i16_align1(ptr %p) { -; CHECK-LABEL: vector_deinterleave_load_v8i16_v16i16: +; CHECK-LABEL: vector_deinterleave_load_v8i16_v16i16_align1: ; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vle8.v v10, (a0) ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v8, (a0) +; CHECK-NEXT: vnsrl.wi v8, v10, 0 +; CHECK-NEXT: vnsrl.wi v9, v10, 16 ; CHECK-NEXT: ret %vec = load <16 x i16>, ptr %p, align 1 %retval = call {<8 x i16>, <8 x i16>} @llvm.experimental.vector.deinterleave2.v16i16(<16 x i16> %vec) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll @@ -29,12 +29,17 @@ ret void } -; FIXME: Shouldn't be lowered to vsseg because it's unaligned +; Shouldn't be lowered to vsseg because it's unaligned define void @vector_interleave_store_v16i16_v8i16_align1(<8 x i16> %a, <8 x i16> %b, ptr %p) { ; CHECK-LABEL: vector_interleave_store_v16i16_v8i16_align1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vsseg2e16.v v8, (a0) +; CHECK-NEXT: vwaddu.vv v10, v8, v9 +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vwmaccu.vx v10, a1, v9 +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vse8.v v10, (a0) ; CHECK-NEXT: ret %res = call <16 x i16> @llvm.experimental.vector.interleave2.v16i16(<8 x i16> %a, <8 x i16> %b) store <16 x i16> %res, ptr %p, align 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -39,12 +39,14 @@ ret {, } %retval } -; FIXME: Shouldn't be lowered to vlseg because it's unaligned +; Shouldn't be lowered to vlseg because it's unaligned define {, } @vector_deinterleave_load_nxv8i16_nxv16i16_align1(ptr %p) { ; CHECK-LABEL: vector_deinterleave_load_nxv8i16_nxv16i16_align1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v8, (a0) +; CHECK-NEXT: vl4r.v v12, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-NEXT: vnsrl.wi v10, v12, 16 ; CHECK-NEXT: ret %vec = load , ptr %p, align 1 %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv16i16( %vec) diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll @@ -32,12 +32,15 @@ ret void } -; FIXME: Shouldn't be lowered to vsseg because it's unaligned +; Shouldn't be lowered to vsseg because it's unaligned define void @vector_interleave_store_nxv16i16_nxv8i16_align1( %a, %b, ptr %p) { ; CHECK-LABEL: vector_interleave_store_nxv16i16_nxv8i16_align1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; CHECK-NEXT: vsseg2e16.v v8, (a0) +; CHECK-NEXT: vwaddu.vv v12, v8, v10 +; CHECK-NEXT: li a1, -1 +; CHECK-NEXT: vwmaccu.vx v12, a1, v10 +; CHECK-NEXT: vs4r.v v12, (a0) ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.interleave2.nxv16i16( %a, %b) store %res, ptr %p, align 1