diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -339,6 +339,15 @@
                              Instruction *I = nullptr) const override;
   bool isLegalICmpImmediate(int64_t Imm) const override;
   bool isLegalAddImmediate(int64_t Imm) const override;
+
+  unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
+  bool isLegalInterleavedAccessType(VectorType *VecTy,
+                                    const DataLayout &DL) const;
+  bool lowerInterleavedLoad(LoadInst *LI,
+                            ArrayRef<ShuffleVectorInst *> Shuffles,
+                            ArrayRef<unsigned> Indices,
+                            unsigned Factor) const override;
+
   bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
   bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
   bool isZExtFree(SDValue Val, EVT VT2) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -21,6 +21,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -1183,6 +1184,122 @@
   return isInt<12>(Imm);
 }
 
+bool RISCVTargetLowering::isLegalInterleavedAccessType(
+    VectorType *VecTy, const DataLayout &DL) const {
+
+  unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
+
+  unsigned NumElements =
+      isa<ScalableVectorType>(VecTy)
+          ? cast<ScalableVectorType>(VecTy)->getMinNumElements()
+          : cast<FixedVectorType>(VecTy)->getNumElements();
+  // Ensure the number of vector elements is greater than 1.
+  if (NumElements < 2)
+    return false;
+
+  // Ensure the element type is legal.
+  if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
+    return false;
+
+  if (VecTy->getElementType()->isHalfTy())
+    return Subtarget.hasStdExtZfh();
+  if (VecTy->getElementType()->isFloatTy())
+    return Subtarget.hasStdExtF();
+  if (VecTy->getElementType()->isDoubleTy())
+    return Subtarget.hasStdExtD();
+
+  return true;
+}
+
+bool RISCVTargetLowering::lowerInterleavedLoad(
+    LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
+    ArrayRef<unsigned> Indices, unsigned Factor) const {
+  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+         "Invalid interleave factor");
+  assert(!Shuffles.empty() && "Empty shufflevector input");
+  assert(Shuffles.size() == Indices.size() &&
+         "Unmatched number of shufflevectors and indices");
+
+  const DataLayout &DL = LI->getModule()->getDataLayout();
+
+  VectorType *VTy = Shuffles[0]->getType();
+
+  // Skip if we do not have StdExtV and skip illegal vector types.
+  if (!Subtarget.hasStdExtV() || !isLegalInterleavedAccessType(VTy, DL))
+    return false;
+
+  auto *FVTy = cast<FixedVectorType>(VTy);
+  // FIXME: Support large vectors.
+  if (DL.getTypeSizeInBits(FVTy->getElementType()) * FVTy->getNumElements() >
+      Subtarget.getMinRVVVectorSizeInBits() * 8)
+    return false;
+
+  // A pointer vector can not be the return type of the vlsegN intrinsics. Need
+  // to load integer vectors first and then convert to pointer vectors.
+  Type *EltTy = FVTy->getElementType();
+  if (EltTy->isPointerTy())
+    FVTy =
+        FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());
+
+  IRBuilder<> Builder(LI);
+
+  // The base address of the load.
+  Value *BaseAddr = LI->getPointerOperand();
+
+  Type *PtrTy =
+      FVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace());
+  Type *IntTy;
+  Triple T = getTargetMachine().getTargetTriple();
+  if (T.isArch64Bit())
+    IntTy = IntegerType::get(FVTy->getContext(), 64);
+  else {
+    assert(T.isArch32Bit() && "only RV32 and RV64 are currently supported");
+    IntTy = IntegerType::get(FVTy->getContext(), 32);
+  }
+
+  Type *Tys[] = {FVTy, PtrTy, IntTy};
+  static const Intrinsic::ID VlsegInts[3] = {Intrinsic::riscv_seg2_load,
+                                             Intrinsic::riscv_seg3_load,
+                                             Intrinsic::riscv_seg4_load};
+  Function *VlsegNFunc =
+      Intrinsic::getDeclaration(LI->getModule(), VlsegInts[Factor - 2], Tys);
+
+  DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
+
+  Value *VL =
+      ConstantInt::get(IntTy, cast<FixedVectorType>(VTy)->getNumElements());
+  CallInst *VlsegN = Builder.CreateCall(
+      VlsegNFunc, {Builder.CreateBitCast(BaseAddr, PtrTy), VL}, "vlsegN");
+
+  // Extract and store the sub-vectors returned by the load intrinsic.
+  for (unsigned I = 0; I < Shuffles.size(); I++) {
+    ShuffleVectorInst *SVI = Shuffles[I];
+    unsigned Index = Indices[I];
+
+    Value *SubVec = Builder.CreateExtractValue(VlsegN, Index);
+
+    // Convert the integer vector to pointer vector if the element is pointer.
+    if (EltTy->isPointerTy())
+      SubVec = Builder.CreateIntToPtr(
+          SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
+                                       FVTy->getNumElements()));
+    SubVecs[SVI].push_back(SubVec);
+  }
+
+  // Replace uses of the shufflevector instructions with the sub-vectors
+  // returned by the load intrinsic. If a shufflevector instruction is
+  // associated with more than one sub-vector, those sub-vectors will be
+  // concatenated into a single wide vector.
+  for (ShuffleVectorInst *SVI : Shuffles) {
+    auto &SubVec = SubVecs[SVI];
+    auto *WideVec =
+        SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
+    SVI->replaceAllUsesWith(WideVec);
+  }
+
+  return true;
+}
+
 // On RV32, 64-bit integers are split into their high and low parts and held
 // in two different registers, so the trunc is free since the low register can
 // just be used.
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -162,6 +162,7 @@
   addPass(createRISCVGatherScatterLoweringPass());
 
   TargetPassConfig::addIRPasses();
+  addPass(createInterleavedAccessPass());
 }
 
 bool RISCVPassConfig::addInstSelector() {
diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
--- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll
@@ -31,6 +31,7 @@
 ; CHECK-NEXT:       Expand vector predication intrinsics
 ; CHECK-NEXT:       Scalarize Masked Memory Intrinsics
 ; CHECK-NEXT:       Expand reduction intrinsics
+; CHECK-NEXT:       Interleaved Access Pass
 ; CHECK-NEXT:       Exception handling preparation
 ; CHECK-NEXT:       Safe Stack instrumentation pass
 ; CHECK-NEXT:       Insert stack protectors
diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
--- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll
+++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll
@@ -57,6 +57,7 @@
 ; CHECK-NEXT:       Expand reduction intrinsics
 ; CHECK-NEXT:       Natural Loop Information
 ; CHECK-NEXT:       TLS Variable Hoist
+; CHECK-NEXT:       Interleaved Access Pass
 ; CHECK-NEXT:       CodeGen Prepare
 ; CHECK-NEXT:       Dominator Tree Construction
 ; CHECK-NEXT:       Exception handling preparation
diff --git a/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple riscv64-linux-gnu -interleaved-access -mattr=+v -riscv-v-vector-bits-min=128 \
+; RUN:     -S < %s | FileCheck %s
+
+define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
+; CHECK-LABEL: @load_factor2(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8>* [[PTR:%.*]] to i8*
+; CHECK-NEXT:    [[VLSEGN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.riscv.seg2.load.v8i8.p0i8.i64(i8* [[TMP1]], i64 8)
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLSEGN]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLSEGN]], 0
+; CHECK-NEXT:    ret <8 x i8> [[TMP2]]
+;
+  %interleaved.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
+  %v0 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %v1 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret <8 x i8> %v1
+}
+
+define <4 x i32> @load_factor3(<12 x i32>* %ptr) {
+; CHECK-LABEL: @load_factor3(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <12 x i32>* [[PTR:%.*]] to i32*
+; CHECK-NEXT:    [[VLSEGN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg3.load.v4i32.p0i32.i64(i32* [[TMP1]], i64 4)
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLSEGN]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLSEGN]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[VLSEGN]], 0
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %interleaved.vec = load <12 x i32>, <12 x i32>* %ptr, align 4
+  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+  ret <4 x i32> %v2
+}
+
+define <4 x i32> @load_factor4(<16 x i32>* %ptr) {
+; CHECK-LABEL: @load_factor4(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i32>* [[PTR:%.*]] to i32*
+; CHECK-NEXT:    [[VLSEGN:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.riscv.seg4.load.v4i32.p0i32.i64(i32* [[TMP1]], i64 4)
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLSEGN]], 3
+; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLSEGN]], 2
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLSEGN]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLSEGN]], 0
+; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+;
+  %interleaved.vec = load <16 x i32>, <16 x i32>* %ptr, align 4
+  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+  %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+  ret <4 x i32> %v3
+}
diff --git a/llvm/test/Transforms/InterleavedAccess/RISCV/lit.local.cfg b/llvm/test/Transforms/InterleavedAccess/RISCV/lit.local.cfg
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/InterleavedAccess/RISCV/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'RISCV' in config.root.targets:
+  config.unsupported = True