diff --git a/clang/include/clang/Basic/RISCVVTypes.def b/clang/include/clang/Basic/RISCVVTypes.def
--- a/clang/include/clang/Basic/RISCVVTypes.def
+++ b/clang/include/clang/Basic/RISCVVTypes.def
@@ -60,6 +60,12 @@
   RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, false, true)
 #endif
 
+#ifndef RVV_VECTOR_TUPLE_INT
+#define RVV_VECTOR_TUPLE_INT\
+(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned) \
+RVV_VECTOR_TYPE_INT(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned)
+#endif
+
 //===- Vector types -------------------------------------------------------===//
 
 RVV_VECTOR_TYPE_INT("__rvv_int8mf8_t", RvvInt8mf8, RvvInt8mf8Ty, 1,   8, 1, true)
@@ -140,6 +146,11 @@
 RVV_PREDICATE_TYPE("__rvv_bool32_t", RvvBool32, RvvBool32Ty, 2)
 RVV_PREDICATE_TYPE("__rvv_bool64_t", RvvBool64, RvvBool64Ty, 1)
 
+//===- Tuple vector types -------------------------------------------------===//
+
+RVV_VECTOR_TUPLE_INT("__rvv_int32m1x2_t", RvvInt32m1x2, RvvInt32m1x2Ty, 2,  32, 2, true)
+
+#undef RVV_VECTOR_TUPLE_INT
 #undef RVV_VECTOR_TYPE_FLOAT
 #undef RVV_VECTOR_TYPE_INT
 #undef RVV_VECTOR_TYPE
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -3055,30 +3055,54 @@
       llvm::StructType *STy = dyn_cast<llvm::StructType>(ArgI.getCoerceToType());
       if (ArgI.isDirect() && ArgI.getCanBeFlattened() && STy &&
           STy->getNumElements() > 1) {
-        uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy);
-        llvm::Type *DstTy = Ptr.getElementType();
-        uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(DstTy);
+        if (STy->containsScalableVectorType()) {
+          assert(STy->containsHomogeneousScalableVectorTypes() &&
+                 "ABI only supports structure with homogeneous scalable vector "
+                 "type");
+
+          llvm::Type *DstTy = Ptr.getElementType();
+          llvm::TypeSize SrcSize = CGM.getDataLayout().getTypeAllocSize(STy);
+          llvm::TypeSize DstSize = CGM.getDataLayout().getTypeAllocSize(DstTy);
+          assert(SrcSize == DstSize &&
+                 "Only allow non-fractional movement of structure with"
+                 "homogeneous scalable vector type");
+          assert(STy->getNumElements() == NumIRArgs);
+
+          llvm::Value *LoadedStructValue = CreateCoercedLoad(Ptr, STy, *this);
+
+          for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+            auto *AI = Fn->getArg(FirstIRArg + i);
+            AI->setName(Arg->getName() + ".coerce" + Twine(i));
+            LoadedStructValue =
+                Builder.CreateInsertValue(LoadedStructValue, AI, i);
+          }
 
-        Address AddrToStoreInto = Address::invalid();
-        if (SrcSize <= DstSize) {
-          AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy);
+          Builder.CreateStore(LoadedStructValue, Ptr);
         } else {
-          AddrToStoreInto =
-            CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");
-        }
+          uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(STy);
+          llvm::Type *DstTy = Ptr.getElementType();
+          uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(DstTy);
+
+          Address AddrToStoreInto = Address::invalid();
+          if (SrcSize <= DstSize) {
+            AddrToStoreInto = Builder.CreateElementBitCast(Ptr, STy);
+          } else {
+            AddrToStoreInto =
+                CreateTempAlloca(STy, Alloca.getAlignment(), "coerce");
+          }
 
-        assert(STy->getNumElements() == NumIRArgs);
-        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-          auto AI = Fn->getArg(FirstIRArg + i);
-          AI->setName(Arg->getName() + ".coerce" + Twine(i));
-          Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i);
-          Builder.CreateStore(AI, EltPtr);
-        }
+          assert(STy->getNumElements() == NumIRArgs);
+          for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+            auto AI = Fn->getArg(FirstIRArg + i);
+            AI->setName(Arg->getName() + ".coerce" + Twine(i));
+            Address EltPtr = Builder.CreateStructGEP(AddrToStoreInto, i);
+            Builder.CreateStore(AI, EltPtr);
+          }
 
-        if (SrcSize > DstSize) {
-          Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize);
+          if (SrcSize > DstSize) {
+            Builder.CreateMemCpy(Ptr, AddrToStoreInto, DstSize);
+          }
         }
-
       } else {
         // Simple case, just do a coerced store of the argument into the alloca.
         assert(NumIRArgs == 1);
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -628,15 +628,26 @@
         llvm::FixedVectorType::get(ConvertType(Context.BoolTy), Size); \
       break;
 #include "clang/Basic/PPCTypes.def"
-#define RVV_TYPE(Name, Id, SingletonId) case BuiltinType::Id:
+#define RVV_TYPE(Name, Id, SingletonId)                                        \
+  case BuiltinType::Id: {                                                      \
+    ASTContext::BuiltinVectorTypeInfo Info =                                   \
+        Context.getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));               \
+    return llvm::ScalableVectorType::get(ConvertType(Info.ElementType),        \
+                                         Info.EC.getKnownMinValue() *          \
+                                             Info.NumVectors);                 \
+  }
+#define RVV_VECTOR_TUPLE_INT(Name, Id, SingletonId, ...)                       \
+  case BuiltinType::Id: {                                                      \
+    ASTContext::BuiltinVectorTypeInfo Info =                                   \
+        Context.getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));               \
+    llvm::Type *EltTy = llvm::ScalableVectorType::get(                         \
+        ConvertType(Info.ElementType), Info.EC.getKnownMinValue());            \
+    llvm::SmallVector<llvm::Type *, 4> EltTys;                                 \
+    for (unsigned I = 0; I < Info.NumVectors; ++I)                             \
+      EltTys.push_back(EltTy);                                                 \
+    return llvm::StructType::get(getLLVMContext(), EltTys);                    \
+  }
 #include "clang/Basic/RISCVVTypes.def"
-    {
-      ASTContext::BuiltinVectorTypeInfo Info =
-          Context.getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));
-      return llvm::ScalableVectorType::get(ConvertType(Info.ElementType),
-                                           Info.EC.getKnownMinValue() *
-                                           Info.NumVectors);
-    }
 #define WASM_REF_TYPE(Name, MangledName, Id, SingletonId, AS)                  \
   case BuiltinType::Id: {                                                      \
     if (BuiltinType::Id == BuiltinType::WasmExternRef)                         \
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type-0.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type-0.c
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type-0.c
@@ -0,0 +1,65 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -O0 \
+// RUN:   -emit-llvm %s -o - | FileCheck %s
+
+#include <riscv_vector.h>
+
+// Declare local variable
+// CHECK-LABEL: define dso_local void @foo
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[V_TUPLE:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// CHECK-NEXT:    ret void
+//
+void foo() {
+  __rvv_int32m1x2_t v_tuple;
+}
+
+// Declare local variable and return
+// CHECK-LABEL: define dso_local { <vscale x 2 x i32>, <vscale x 2 x i32> } @bar
+// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[V_TUPLE:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE]], align 4
+// CHECK-NEXT:    ret { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]]
+//
+__rvv_int32m1x2_t bar() {
+  __rvv_int32m1x2_t v_tuple;
+  return v_tuple;
+}
+
+// Pass as function parameter
+// CHECK-LABEL: define dso_local void @baz
+// CHECK-SAME: (<vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[V_TUPLE:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// CHECK-NEXT:    [[V_TUPLE_ADDR:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]], <vscale x 2 x i32> [[V_TUPLE_COERCE1]], 1
+// CHECK-NEXT:    store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP2]], ptr [[V_TUPLE]], align 4
+// CHECK-NEXT:    [[V_TUPLE1:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE]], align 4
+// CHECK-NEXT:    store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[V_TUPLE1]], ptr [[V_TUPLE_ADDR]], align 4
+// CHECK-NEXT:    ret void
+//
+void baz(__rvv_int32m1x2_t v_tuple) {
+}
+
+// Pass as function parameter and return
+// CHECK-LABEL: define dso_local { <vscale x 2 x i32>, <vscale x 2 x i32> } @qux
+// CHECK-SAME: (<vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[V_TUPLE:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// CHECK-NEXT:    [[V_TUPLE_ADDR:%.*]] = alloca { <vscale x 2 x i32>, <vscale x 2 x i32> }, align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE0]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]], <vscale x 2 x i32> [[V_TUPLE_COERCE1]], 1
+// CHECK-NEXT:    store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP2]], ptr [[V_TUPLE]], align 4
+// CHECK-NEXT:    [[V_TUPLE1:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE]], align 4
+// CHECK-NEXT:    store { <vscale x 2 x i32>, <vscale x 2 x i32> } [[V_TUPLE1]], ptr [[V_TUPLE_ADDR]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load { <vscale x 2 x i32>, <vscale x 2 x i32> }, ptr [[V_TUPLE_ADDR]], align 4
+// CHECK-NEXT:    ret { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP3]]
+//
+__rvv_int32m1x2_t qux(__rvv_int32m1x2_t v_tuple) {
+  return v_tuple;
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type-1.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type-1.c
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type-1.c
@@ -0,0 +1,49 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve32x -disable-O0-optnone \
+// RUN:   -emit-llvm %s -o - | opt -S -passes=mem2reg | FileCheck %s
+
+#include <riscv_vector.h>
+
+// Declare local variable
+// CHECK-LABEL: define dso_local void @foo
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret void
+//
+void foo() {
+  __rvv_int32m1x2_t v_tuple;
+}
+
+// Declare local variable and return
+// CHECK-LABEL: define dso_local { <vscale x 2 x i32>, <vscale x 2 x i32> } @bar
+// CHECK-SAME: () #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret { <vscale x 2 x i32>, <vscale x 2 x i32> } undef
+//
+__rvv_int32m1x2_t bar() {
+  __rvv_int32m1x2_t v_tuple;
+  return v_tuple;
+}
+
+// Pass as function parameter
+// CHECK-LABEL: define dso_local void @baz
+// CHECK-SAME: (<vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } undef, <vscale x 2 x i32> [[V_TUPLE_COERCE0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1]], 1
+// CHECK-NEXT:    ret void
+//
+void baz(__rvv_int32m1x2_t v_tuple) {
+}
+
+// Pass as function parameter and return
+// CHECK-LABEL: define dso_local { <vscale x 2 x i32>, <vscale x 2 x i32> } @qux
+// CHECK-SAME: (<vscale x 2 x i32> [[V_TUPLE_COERCE0:%.*]], <vscale x 2 x i32> [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } undef, <vscale x 2 x i32> [[V_TUPLE_COERCE0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP0]], <vscale x 2 x i32> [[V_TUPLE_COERCE1]], 1
+// CHECK-NEXT:    ret { <vscale x 2 x i32>, <vscale x 2 x i32> } [[TMP1]]
+//
+__rvv_int32m1x2_t qux(__rvv_int32m1x2_t v_tuple) {
+  return v_tuple;
+}
diff --git a/clang/test/Sema/riscv-types.c b/clang/test/Sema/riscv-types.c
--- a/clang/test/Sema/riscv-types.c
+++ b/clang/test/Sema/riscv-types.c
@@ -133,6 +133,9 @@
 
   // CHECK: __rvv_int8mf2_t x43;
   __rvv_int8mf2_t x43;
+
+  // CHECK: __rvv_int32m1x2_t x44;
+  __rvv_int32m1x2_t x44;
 }
 
 typedef __rvv_bool4_t vbool4_t;