Index: lib/Target/RISCV/CMakeLists.txt
===================================================================
--- lib/Target/RISCV/CMakeLists.txt
+++ lib/Target/RISCV/CMakeLists.txt
@@ -2,6 +2,7 @@
 
 tablegen(LLVM RISCVGenAsmMatcher.inc -gen-asm-matcher)
 tablegen(LLVM RISCVGenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM RISCVGenCallingConv.inc -gen-callingconv)
 tablegen(LLVM RISCVGenCompressInstEmitter.inc -gen-compress-inst-emitter)
 tablegen(LLVM RISCVGenDAGISel.inc -gen-dag-isel)
 tablegen(LLVM RISCVGenDisassemblerTables.inc -gen-disassembler)
Index: lib/Target/RISCV/RISCVCallingConv.td
===================================================================
--- lib/Target/RISCV/RISCVCallingConv.td
+++ lib/Target/RISCV/RISCVCallingConv.td
@@ -13,6 +13,25 @@
 // The RISC-V calling convention is handled with custom code in
 // RISCVISelLowering.cpp (CC_RISCV).
 
+def CC_RISCV_FastCC : CallingConv<[
+  // X5 and X6 might be used for save-restore libcall.
+  CCIfType<[i32, i64], CCAssignToReg<[X10, X11, X12, X13, X14, X15, X16, X17,
+                                      X7, X28, X29, X30, X31]>>,
+
+  CCIfType<[f32], CCAssignToReg<[F10_F, F11_F, F12_F, F13_F, F14_F, F15_F,
+                                 F16_F, F17_F, F0_F, F1_F, F2_F, F3_F,
+                                 F4_F, F5_F, F6_F, F7_F, F28_F, F29_F,
+                                 F30_F, F31_F]>>,
+
+  CCIfType<[f64], CCAssignToReg<[F10_D, F11_D, F12_D, F13_D, F14_D, F15_D,
+                                 F16_D, F17_D, F0_D, F1_D, F2_D, F3_D,
+                                 F4_D, F5_D, F6_D, F7_D, F28_D, F29_D,
+                                 F30_D, F31_D]>>,
+
+  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+  CCIfType<[i64, f64], CCAssignToStack<8, 8>>
+]>;
+
 def CSR_ILP32_LP64
     : CalleeSavedRegs<(add X1, X3, X4, X8, X9, (sequence "X%u", 18, 27))>;
 
Index: lib/Target/RISCV/RISCVISelLowering.cpp
===================================================================
--- lib/Target/RISCV/RISCVISelLowering.cpp
+++ lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1796,6 +1796,8 @@
   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
 }
 
+#include "RISCVGenCallingConv.inc"
+
 // Transform physical registers into virtual registers.
 SDValue RISCVTargetLowering::LowerFormalArguments(
     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
@@ -1835,7 +1837,12 @@
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
-  analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
+
+  if (CallConv == CallingConv::Fast) {
+    CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC);
+  } else {
+    analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
+  }
 
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
@@ -2035,7 +2042,12 @@
   // Analyze the operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
-  analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
+
+  if (CallConv == CallingConv::Fast) {
+    ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC);
+  } else {
+    analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
+  }
 
   // Check if it's really possible to do a tail call.
   if (IsTailCall)
Index: test/CodeGen/RISCV/fastcc-float.ll
===================================================================
--- /dev/null
+++ test/CodeGen/RISCV/fastcc-float.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+f,+d -verify-machineinstrs < %s \
+; RUN:     | FileCheck %s
+
+define fastcc float @callee(<32 x float> %A) nounwind {
+; CHECK-LABEL: callee:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fmv.x.w a0, fa0
+; CHECK-NEXT:    ret
+	%B = extractelement <32 x float> %A, i32 0
+	ret float %B
+}
+
+; With the fastcc, arguments will be passed by fa0-fa7 and ft0-f11.
+; The rest will be pushed on the stack.
+define float @caller(<32 x float> %A) nounwind {
+; CHECK-LABEL: caller:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -64
+; CHECK-NEXT:    sw ra, 60(sp)
+; CHECK-NEXT:    flw fa0, 0(a0)
+; CHECK-NEXT:    flw fa1, 4(a0)
+; CHECK-NEXT:    flw fa2, 8(a0)
+; CHECK-NEXT:    flw fa3, 12(a0)
+; CHECK-NEXT:    flw fa4, 16(a0)
+; CHECK-NEXT:    flw fa5, 20(a0)
+; CHECK-NEXT:    flw fa6, 24(a0)
+; CHECK-NEXT:    flw fa7, 28(a0)
+; CHECK-NEXT:    flw ft0, 32(a0)
+; CHECK-NEXT:    flw ft1, 36(a0)
+; CHECK-NEXT:    flw ft2, 40(a0)
+; CHECK-NEXT:    flw ft3, 44(a0)
+; CHECK-NEXT:    flw ft4, 48(a0)
+; CHECK-NEXT:    flw ft5, 52(a0)
+; CHECK-NEXT:    flw ft6, 56(a0)
+; CHECK-NEXT:    flw ft7, 60(a0)
+; CHECK-NEXT:    flw ft8, 64(a0)
+; CHECK-NEXT:    flw ft9, 68(a0)
+; CHECK-NEXT:    flw ft10, 72(a0)
+; CHECK-NEXT:    flw ft11, 76(a0)
+; CHECK-NEXT:    flw fs0, 80(a0)
+; CHECK-NEXT:    flw fs1, 84(a0)
+; CHECK-NEXT:    flw fs2, 88(a0)
+; CHECK-NEXT:    flw fs3, 92(a0)
+; CHECK-NEXT:    flw fs4, 96(a0)
+; CHECK-NEXT:    flw fs5, 100(a0)
+; CHECK-NEXT:    flw fs6, 104(a0)
+; CHECK-NEXT:    flw fs7, 108(a0)
+; CHECK-NEXT:    flw fs8, 112(a0)
+; CHECK-NEXT:    flw fs9, 116(a0)
+; CHECK-NEXT:    flw fs10, 120(a0)
+; CHECK-NEXT:    flw fs11, 124(a0)
+; CHECK-NEXT:    fsw fs11, 44(sp)
+; CHECK-NEXT:    fsw fs10, 40(sp)
+; CHECK-NEXT:    fsw fs9, 36(sp)
+; CHECK-NEXT:    fsw fs8, 32(sp)
+; CHECK-NEXT:    fsw fs7, 28(sp)
+; CHECK-NEXT:    fsw fs6, 24(sp)
+; CHECK-NEXT:    fsw fs5, 20(sp)
+; CHECK-NEXT:    fsw fs4, 16(sp)
+; CHECK-NEXT:    fsw fs3, 12(sp)
+; CHECK-NEXT:    fsw fs2, 8(sp)
+; CHECK-NEXT:    fsw fs1, 4(sp)
+; CHECK-NEXT:    fsw fs0, 0(sp)
+; CHECK-NEXT:    call callee
+; CHECK-NEXT:    lw ra, 60(sp)
+; CHECK-NEXT:    addi sp, sp, 64
+; CHECK-NEXT:    ret
+	%C = call fastcc float @callee(<32 x float> %A)
+	ret float %C
+}
Index: test/CodeGen/RISCV/fastcc-int.ll
===================================================================
--- /dev/null
+++ test/CodeGen/RISCV/fastcc-int.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV32 %s
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV64 %s
+
+define fastcc i32 @callee(<16 x i32> %A) nounwind {
+; RV32-LABEL: callee:
+; RV32:       # %bb.0:
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: callee:
+; RV64:       # %bb.0:
+; RV64-NEXT:    ret
+	%B = extractelement <16 x i32> %A, i32 0
+	ret i32 %B
+}
+
+; With the fastcc, arguments will be passed by a0-a7 and t2-t6.
+; The rest will be pushed on the stack.
+define i32 @caller(<16 x i32> %A) nounwind {
+; RV32-LABEL: caller:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -32
+; RV32-NEXT:    sw ra, 28(sp)
+; RV32-NEXT:    sw s0, 24(sp)
+; RV32-NEXT:    lw t0, 0(a0)
+; RV32-NEXT:    lw a1, 4(a0)
+; RV32-NEXT:    lw a2, 8(a0)
+; RV32-NEXT:    lw a3, 12(a0)
+; RV32-NEXT:    lw a4, 16(a0)
+; RV32-NEXT:    lw a5, 20(a0)
+; RV32-NEXT:    lw a6, 24(a0)
+; RV32-NEXT:    lw a7, 28(a0)
+; RV32-NEXT:    lw t2, 32(a0)
+; RV32-NEXT:    lw t3, 36(a0)
+; RV32-NEXT:    lw t4, 40(a0)
+; RV32-NEXT:    lw t5, 44(a0)
+; RV32-NEXT:    lw t6, 48(a0)
+; RV32-NEXT:    lw t1, 52(a0)
+; RV32-NEXT:    lw s0, 56(a0)
+; RV32-NEXT:    lw a0, 60(a0)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    sw s0, 4(sp)
+; RV32-NEXT:    sw t1, 0(sp)
+; RV32-NEXT:    mv a0, t0
+; RV32-NEXT:    call callee
+; RV32-NEXT:    lw s0, 24(sp)
+; RV32-NEXT:    lw ra, 28(sp)
+; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: caller:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -48
+; RV64-NEXT:    sd ra, 40(sp)
+; RV64-NEXT:    sd s0, 32(sp)
+; RV64-NEXT:    ld t0, 0(a0)
+; RV64-NEXT:    ld a1, 8(a0)
+; RV64-NEXT:    ld a2, 16(a0)
+; RV64-NEXT:    ld a3, 24(a0)
+; RV64-NEXT:    ld a4, 32(a0)
+; RV64-NEXT:    ld a5, 40(a0)
+; RV64-NEXT:    ld a6, 48(a0)
+; RV64-NEXT:    ld a7, 56(a0)
+; RV64-NEXT:    ld t2, 64(a0)
+; RV64-NEXT:    ld t3, 72(a0)
+; RV64-NEXT:    ld t4, 80(a0)
+; RV64-NEXT:    ld t5, 88(a0)
+; RV64-NEXT:    ld t6, 96(a0)
+; RV64-NEXT:    ld t1, 104(a0)
+; RV64-NEXT:    ld s0, 112(a0)
+; RV64-NEXT:    ld a0, 120(a0)
+; RV64-NEXT:    sd a0, 16(sp)
+; RV64-NEXT:    sd s0, 8(sp)
+; RV64-NEXT:    sd t1, 0(sp)
+; RV64-NEXT:    mv a0, t0
+; RV64-NEXT:    call callee
+; RV64-NEXT:    ld s0, 32(sp)
+; RV64-NEXT:    ld ra, 40(sp)
+; RV64-NEXT:    addi sp, sp, 48
+; RV64-NEXT:    ret
+	%C = call fastcc i32 @callee(<16 x i32> %A)
+	ret i32 %C
+}