Index: lib/Target/RISCV/CMakeLists.txt =================================================================== --- lib/Target/RISCV/CMakeLists.txt +++ lib/Target/RISCV/CMakeLists.txt @@ -2,6 +2,7 @@ tablegen(LLVM RISCVGenAsmMatcher.inc -gen-asm-matcher) tablegen(LLVM RISCVGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM RISCVGenCallingConv.inc -gen-callingconv) tablegen(LLVM RISCVGenCompressInstEmitter.inc -gen-compress-inst-emitter) tablegen(LLVM RISCVGenDAGISel.inc -gen-dag-isel) tablegen(LLVM RISCVGenDisassemblerTables.inc -gen-disassembler) Index: lib/Target/RISCV/RISCVCallingConv.td =================================================================== --- lib/Target/RISCV/RISCVCallingConv.td +++ lib/Target/RISCV/RISCVCallingConv.td @@ -13,6 +13,25 @@ // The RISC-V calling convention is handled with custom code in // RISCVISelLowering.cpp (CC_RISCV). +def CC_RISCV_FastCC : CallingConv<[ + // X5 and X6 might be used for save-restore libcall. + CCIfType<[i32, i64], CCAssignToReg<[X10, X11, X12, X13, X14, X15, X16, X17, + X7, X28, X29, X30, X31]>>, + + CCIfType<[f32], CCAssignToReg<[F10_F, F11_F, F12_F, F13_F, F14_F, F15_F, + F16_F, F17_F, F0_F, F1_F, F2_F, F3_F, + F4_F, F5_F, F6_F, F7_F, F28_F, F29_F, + F30_F, F31_F]>>, + + CCIfType<[f64], CCAssignToReg<[F10_D, F11_D, F12_D, F13_D, F14_D, F15_D, + F16_D, F17_D, F0_D, F1_D, F2_D, F3_D, + F4_D, F5_D, F6_D, F7_D, F28_D, F29_D, + F30_D, F31_D]>>, + + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + CCIfType<[i64, f64], CCAssignToStack<8, 8>> +]>; + def CSR_ILP32_LP64 : CalleeSavedRegs<(add X1, X3, X4, X8, X9, (sequence "X%u", 18, 27))>; Index: lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- lib/Target/RISCV/RISCVISelLowering.cpp +++ lib/Target/RISCV/RISCVISelLowering.cpp @@ -1796,6 +1796,8 @@ return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi); } +#include "RISCVGenCallingConv.inc" + // Transform physical registers into virtual registers. SDValue RISCVTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, @@ -1835,7 +1837,11 @@ // Assign locations to all of the incoming arguments. SmallVector ArgLocs; CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); + + if (CallConv == CallingConv::Fast) + CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC); + else + analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -2035,7 +2041,11 @@ // Analyze the operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); + + if (CallConv == CallingConv::Fast) + ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC); + else + analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI); // Check if it's really possible to do a tail call. if (IsTailCall) Index: test/CodeGen/RISCV/fastcc-float.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/fastcc-float.ll @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+f,+d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +define fastcc float @callee(<32 x float> %A) nounwind { +; CHECK-LABEL: callee: +; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.w a0, fa0 +; CHECK-NEXT: ret + %B = extractelement <32 x float> %A, i32 0 + ret float %B +} + +; With the fastcc, arguments will be passed by fa0-fa7 and ft0-f11. +; The rest will be pushed on the stack. +define float @caller(<32 x float> %A) nounwind { +; CHECK-LABEL: caller: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -64 +; CHECK-NEXT: sw ra, 60(sp) +; CHECK-NEXT: flw fa0, 0(a0) +; CHECK-NEXT: flw fa1, 4(a0) +; CHECK-NEXT: flw fa2, 8(a0) +; CHECK-NEXT: flw fa3, 12(a0) +; CHECK-NEXT: flw fa4, 16(a0) +; CHECK-NEXT: flw fa5, 20(a0) +; CHECK-NEXT: flw fa6, 24(a0) +; CHECK-NEXT: flw fa7, 28(a0) +; CHECK-NEXT: flw ft0, 32(a0) +; CHECK-NEXT: flw ft1, 36(a0) +; CHECK-NEXT: flw ft2, 40(a0) +; CHECK-NEXT: flw ft3, 44(a0) +; CHECK-NEXT: flw ft4, 48(a0) +; CHECK-NEXT: flw ft5, 52(a0) +; CHECK-NEXT: flw ft6, 56(a0) +; CHECK-NEXT: flw ft7, 60(a0) +; CHECK-NEXT: flw ft8, 64(a0) +; CHECK-NEXT: flw ft9, 68(a0) +; CHECK-NEXT: flw ft10, 72(a0) +; CHECK-NEXT: flw ft11, 76(a0) +; CHECK-NEXT: flw fs0, 80(a0) +; CHECK-NEXT: flw fs1, 84(a0) +; CHECK-NEXT: flw fs2, 88(a0) +; CHECK-NEXT: flw fs3, 92(a0) +; CHECK-NEXT: flw fs4, 96(a0) +; CHECK-NEXT: flw fs5, 100(a0) +; CHECK-NEXT: flw fs6, 104(a0) +; CHECK-NEXT: flw fs7, 108(a0) +; CHECK-NEXT: flw fs8, 112(a0) +; CHECK-NEXT: flw fs9, 116(a0) +; CHECK-NEXT: flw fs10, 120(a0) +; CHECK-NEXT: flw fs11, 124(a0) +; CHECK-NEXT: fsw fs11, 44(sp) +; CHECK-NEXT: fsw fs10, 40(sp) +; CHECK-NEXT: fsw fs9, 36(sp) +; CHECK-NEXT: fsw fs8, 32(sp) +; CHECK-NEXT: fsw fs7, 28(sp) +; CHECK-NEXT: fsw fs6, 24(sp) +; CHECK-NEXT: fsw fs5, 20(sp) +; CHECK-NEXT: fsw fs4, 16(sp) +; CHECK-NEXT: fsw fs3, 12(sp) +; CHECK-NEXT: fsw fs2, 8(sp) +; CHECK-NEXT: fsw fs1, 4(sp) +; CHECK-NEXT: fsw fs0, 0(sp) +; CHECK-NEXT: call callee +; CHECK-NEXT: lw ra, 60(sp) +; CHECK-NEXT: addi sp, sp, 64 +; CHECK-NEXT: ret + %C = call fastcc float @callee(<32 x float> %A) + ret float %C +} Index: test/CodeGen/RISCV/fastcc-int.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/fastcc-int.ll @@ -0,0 +1,85 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32 %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64 %s + +define fastcc i32 @callee(<16 x i32> %A) nounwind { +; RV32-LABEL: callee: +; RV32: # %bb.0: +; RV32-NEXT: ret +; +; RV64-LABEL: callee: +; RV64: # %bb.0: +; RV64-NEXT: ret + %B = extractelement <16 x i32> %A, i32 0 + ret i32 %B +} + +; With the fastcc, arguments will be passed by a0-a7 and t2-t6. +; The rest will be pushed on the stack. +define i32 @caller(<16 x i32> %A) nounwind { +; RV32-LABEL: caller: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw ra, 28(sp) +; RV32-NEXT: sw s0, 24(sp) +; RV32-NEXT: lw t0, 0(a0) +; RV32-NEXT: lw a1, 4(a0) +; RV32-NEXT: lw a2, 8(a0) +; RV32-NEXT: lw a3, 12(a0) +; RV32-NEXT: lw a4, 16(a0) +; RV32-NEXT: lw a5, 20(a0) +; RV32-NEXT: lw a6, 24(a0) +; RV32-NEXT: lw a7, 28(a0) +; RV32-NEXT: lw t2, 32(a0) +; RV32-NEXT: lw t3, 36(a0) +; RV32-NEXT: lw t4, 40(a0) +; RV32-NEXT: lw t5, 44(a0) +; RV32-NEXT: lw t6, 48(a0) +; RV32-NEXT: lw t1, 52(a0) +; RV32-NEXT: lw s0, 56(a0) +; RV32-NEXT: lw a0, 60(a0) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw s0, 4(sp) +; RV32-NEXT: sw t1, 0(sp) +; RV32-NEXT: mv a0, t0 +; RV32-NEXT: call callee +; RV32-NEXT: lw s0, 24(sp) +; RV32-NEXT: lw ra, 28(sp) +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: caller: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -48 +; RV64-NEXT: sd ra, 40(sp) +; RV64-NEXT: sd s0, 32(sp) +; RV64-NEXT: ld t0, 0(a0) +; RV64-NEXT: ld a1, 8(a0) +; RV64-NEXT: ld a2, 16(a0) +; RV64-NEXT: ld a3, 24(a0) +; RV64-NEXT: ld a4, 32(a0) +; RV64-NEXT: ld a5, 40(a0) +; RV64-NEXT: ld a6, 48(a0) +; RV64-NEXT: ld a7, 56(a0) +; RV64-NEXT: ld t2, 64(a0) +; RV64-NEXT: ld t3, 72(a0) +; RV64-NEXT: ld t4, 80(a0) +; RV64-NEXT: ld t5, 88(a0) +; RV64-NEXT: ld t6, 96(a0) +; RV64-NEXT: ld t1, 104(a0) +; RV64-NEXT: ld s0, 112(a0) +; RV64-NEXT: ld a0, 120(a0) +; RV64-NEXT: sd a0, 16(sp) +; RV64-NEXT: sd s0, 8(sp) +; RV64-NEXT: sd t1, 0(sp) +; RV64-NEXT: mv a0, t0 +; RV64-NEXT: call callee +; RV64-NEXT: ld s0, 32(sp) +; RV64-NEXT: ld ra, 40(sp) +; RV64-NEXT: addi sp, sp, 48 +; RV64-NEXT: ret + %C = call fastcc i32 @callee(<16 x i32> %A) + ret i32 %C +}