diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -308,6 +308,15 @@
                              Instruction *I = nullptr) const override;
   bool isLegalICmpImmediate(int64_t Imm) const override;
   bool isLegalAddImmediate(int64_t Imm) const override;
+
+  unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
+  bool isLegalInterleavedAccessType(VectorType *VecTy,
+                                    const DataLayout &DL) const;
+  bool lowerInterleavedLoad(LoadInst *LI,
+                            ArrayRef<ShuffleVectorInst *> Shuffles,
+                            ArrayRef<unsigned> Indices,
+                            unsigned Factor) const override;
+
   bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
   bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
   bool isZExtFree(SDValue Val, EVT VT2) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -20,6 +20,7 @@
 #include "RISCVTargetMachine.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -28,8 +29,8 @@
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
-#include "llvm/IR/IntrinsicsRISCV.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicsRISCV.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/KnownBits.h"
@@ -953,6 +954,143 @@
   return isInt<12>(Imm);
 }
 
+bool RISCVTargetLowering::isLegalInterleavedAccessType(
+    VectorType *VecTy, const DataLayout &DL) const {
+
+  unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
+
+  unsigned NumElements =
+      isa<ScalableVectorType>(VecTy)
+          ? cast<ScalableVectorType>(VecTy)->getMinNumElements()
+          : cast<FixedVectorType>(VecTy)->getNumElements();
+  // Ensure the number of vector elements is greater than 1.
+  if (NumElements < 2)
+    return false;
+
+  // Ensure the element type is legal.
+  if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
+    return false;
+
+  if (VecTy->getElementType()->isHalfTy())
+    return Subtarget.hasStdExtZfh();
+  if (VecTy->getElementType()->isFloatTy())
+    return Subtarget.hasStdExtF();
+  if (VecTy->getElementType()->isDoubleTy())
+    return Subtarget.hasStdExtD();
+
+  return true;
+}
+
+bool RISCVTargetLowering::lowerInterleavedLoad(
+    LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
+    ArrayRef<unsigned> Indices, unsigned Factor) const {
+  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+         "Invalid interleave factor");
+  assert(!Shuffles.empty() && "Empty shufflevector input");
+  assert(Shuffles.size() == Indices.size() &&
+         "Unmatched number of shufflevectors and indices");
+
+  const DataLayout &DL = LI->getModule()->getDataLayout();
+
+  VectorType *VTy = Shuffles[0]->getType();
+
+  // Skip if we do not have StdExtV and skip illegal vector types.
+  if (!Subtarget.hasStdExtV() || !isLegalInterleavedAccessType(VTy, DL))
+    return false;
+
+  ScalableVectorType *SVTy;
+  bool IsScalableVector = isa<ScalableVectorType>(VTy);
+  if (!IsScalableVector) {
+    auto *FVTy = cast<FixedVectorType>(VTy);
+    SVTy =
+        ScalableVectorType::get(FVTy->getElementType(), FVTy->getNumElements());
+  } else {
+    SVTy = cast<ScalableVectorType>(VTy);
+  }
+
+  // A pointer vector can not be the return type of the vlsegN intrinsics. Need
+  // to load integer vectors first and then convert to pointer vectors.
+  Type *EltTy = SVTy->getElementType();
+  if (EltTy->isPointerTy())
+    SVTy = ScalableVectorType::get(DL.getIntPtrType(EltTy),
+                                   SVTy->getMinNumElements());
+
+  IRBuilder<> Builder(LI);
+
+  // The base address of the load.
+  Value *BaseAddr = LI->getPointerOperand();
+
+  Type *PtrTy =
+      SVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace());
+  Type *IntTy;
+  Triple T = getTargetMachine().getTargetTriple();
+  if (T.isArch64Bit())
+    IntTy = IntegerType::get(SVTy->getContext(), 64);
+  else {
+    assert(T.isArch32Bit() && "only RV32 and RV64 are currently supported");
+    IntTy = IntegerType::get(SVTy->getContext(), 32);
+  }
+
+  Type *Tys[] = {SVTy, IntTy};
+  static const Intrinsic::ID VlsegInts[3] = {Intrinsic::riscv_vlseg2,
+                                             Intrinsic::riscv_vlseg3,
+                                             Intrinsic::riscv_vlseg4};
+  Function *VlsegNFunc =
+      Intrinsic::getDeclaration(LI->getModule(), VlsegInts[Factor - 2], Tys);
+
+  DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
+
+  Value *VL;
+  if (!IsScalableVector)
+    VL = ConstantInt::get(IntTy, cast<FixedVectorType>(VTy)->getNumElements());
+  else {
+    Function *VscaleFunc =
+        Intrinsic::getDeclaration(LI->getModule(), Intrinsic::vscale, IntTy);
+    VL = Builder.CreateCall(VscaleFunc, {}, "vscale");
+  }
+  CallInst *VlsegN = Builder.CreateCall(
+      VlsegNFunc, {Builder.CreateBitCast(BaseAddr, PtrTy), VL}, "vlsegN");
+
+  // Extract and store the sub-vectors returned by the load intrinsic.
+  for (unsigned I = 0; I < Shuffles.size(); I++) {
+    ShuffleVectorInst *SVI = Shuffles[I];
+    unsigned Index = Indices[I];
+
+    Value *SubVec = Builder.CreateExtractValue(VlsegN, Index);
+
+    // Convert the integer vector to pointer vector if the element is pointer.
+    if (EltTy->isPointerTy())
+      SubVec = Builder.CreateIntToPtr(
+          SubVec, ScalableVectorType::get(SVI->getType()->getElementType(),
+                                          SVTy->getMinNumElements()));
+
+    if (!IsScalableVector) {
+      Type *IntrinsicTypes[] = {cast<FixedVectorType>(VTy), SubVec->getType()};
+      Function *VExtractFunc = Intrinsic::getDeclaration(
+          LI->getModule(), Intrinsic::experimental_vector_extract,
+          IntrinsicTypes);
+      Value *ExtractSubVec = Builder.CreateCall(
+          VExtractFunc, {SubVec, ConstantInt::get(IntTy, 0)}, "");
+      SubVecs[SVI].push_back(ExtractSubVec);
+    } else {
+      SubVecs[SVI].push_back(SubVec);
+    }
+  }
+
+  // Replace uses of the shufflevector instructions with the sub-vectors
+  // returned by the load intrinsic. If a shufflevector instruction is
+  // associated with more than one sub-vector, those sub-vectors will be
+  // concatenated into a single wide vector.
+  for (ShuffleVectorInst *SVI : Shuffles) {
+    auto &SubVec = SubVecs[SVI];
+    auto *WideVec =
+        SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
+    SVI->replaceAllUsesWith(WideVec);
+  }
+
+  return true;
+}
+
 // On RV32, 64-bit integers are split into their high and low parts and held
 // in two different registers, so the trunc is free since the low register can
 // just be used.
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -150,6 +150,7 @@
 void RISCVPassConfig::addIRPasses() {
   addPass(createAtomicExpandPass());
   TargetPassConfig::addIRPasses();
+  addPass(createInterleavedAccessPass());
 }
 
 bool RISCVPassConfig::addInstSelector() {
diff --git a/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/InterleavedAccess/RISCV/interleaved-accesses.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple riscv64-linux-gnu -interleaved-access \
+; RUN:     -mattr=+experimental-v -S < %s | FileCheck %s
+
+define <8 x i8> @load_factor2(<16 x i8>* %ptr) {
+; CHECK-LABEL: @load_factor2(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8>* [[PTR:%.*]] to i8*
+; CHECK-NEXT:    [[VLSEGN:%.*]] = call { <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.riscv.vlseg2.nxv8i8.i64(i8* [[TMP1]], i64 8)
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i8> } [[VLSEGN]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv8i8(<vscale x 8 x i8> [[TMP2]], i64 0)
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 8 x i8>, <vscale x 8 x i8> } [[VLSEGN]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = call <8 x i8> @llvm.experimental.vector.extract.v8i8.nxv8i8(<vscale x 8 x i8> [[TMP4]], i64 0)
+; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
+;
+  %interleaved.vec = load <16 x i8>, <16 x i8>* %ptr, align 4
+  %v0 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %v1 = shufflevector <16 x i8> %interleaved.vec, <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  ret <8 x i8> %v1
+}
+
+define <4 x i32> @load_factor3(<12 x i32>* %ptr) {
+; CHECK-LABEL: @load_factor3(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <12 x i32>* [[PTR:%.*]] to i32*
+; CHECK-NEXT:    [[VLSEGN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.riscv.vlseg3.nxv4i32.i64(i32* [[TMP1]], i64 4)
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[VLSEGN]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[TMP2]], i64 0)
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[VLSEGN]], 1
+; CHECK-NEXT:    [[TMP5:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[TMP4]], i64 0)
+; CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[VLSEGN]], 0
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[TMP6]], i64 0)
+; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+;
+  %interleaved.vec = load <12 x i32>, <12 x i32>* %ptr, align 4
+  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
+  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
+  ret <4 x i32> %v2
+}
+
+define <4 x i32> @load_factor4(<16 x i32>* %ptr) {
+; CHECK-LABEL: @load_factor4(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i32>* [[PTR:%.*]] to i32*
+; CHECK-NEXT:    [[VLSEGN:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.riscv.vlseg4.nxv4i32.i64(i32* [[TMP1]], i64 4)
+; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[VLSEGN]], 3
+; CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[TMP2]], i64 0)
+; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[VLSEGN]], 2
+; CHECK-NEXT:    [[TMP5:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[TMP4]], i64 0)
+; CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[VLSEGN]], 1
+; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[TMP6]], i64 0)
+; CHECK-NEXT:    [[TMP8:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[VLSEGN]], 0
+; CHECK-NEXT:    [[TMP9:%.*]] = call <4 x i32> @llvm.experimental.vector.extract.v4i32.nxv4i32(<vscale x 4 x i32> [[TMP8]], i64 0)
+; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+;
+  %interleaved.vec = load <16 x i32>, <16 x i32>* %ptr, align 4
+  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
+  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
+  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
+  %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
+  ret <4 x i32> %v3
+}
diff --git a/llvm/test/Transforms/InterleavedAccess/RISCV/lit.local.cfg b/llvm/test/Transforms/InterleavedAccess/RISCV/lit.local.cfg
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/InterleavedAccess/RISCV/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'RISCV' in config.root.targets:
+  config.unsupported = True