diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -6795,10 +6795,17 @@ return false; } - // Fixed-length vectors are located in the corresponding scalable-vector - // container types. - if (ValVT.isFixedLengthVector()) - LocVT = TLI.getContainerForFixedLengthVector(LocVT); + if (ValVT.isFixedLengthVector()) { + // Pass vectors with the same size as XLen by GPRs for RVP. + if (TLI.getSubtarget().hasStdExtP() && (XLen == ValVT.getSizeInBits())) { + LocVT = XLenVT; + LocInfo = CCValAssign::BCvt; + } else { + // Fixed-length vectors are located in the corresponding scalable-vector + // container types. + LocVT = TLI.getContainerForFixedLengthVector(LocVT); + } + } // Split arguments might be passed indirectly, so keep track of the pending // values. Split vectors are passed via a mix of registers and indirectly, so @@ -6836,7 +6843,7 @@ Reg = State.AllocateReg(ArgFPR32s); else if (ValVT == MVT::f64 && !UseGPRForF64) Reg = State.AllocateReg(ArgFPR64s); - else if (ValVT.isVector()) { + else if (ValVT.isVector() && LocVT.isVector()) { const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT); if (RC == &RISCV::VRRegClass) { // Assign the first mask argument to V0. diff --git a/llvm/test/CodeGen/RISCV/rvp/calling-conv-rv32.ll b/llvm/test/CodeGen/RISCV/rvp/calling-conv-rv32.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvp/calling-conv-rv32.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-p,+f -verify-machineinstrs < %s \ +; RUN: | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-p,+d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +define <4 x i8> @ret_v4i8(<4 x i8>* %p) nounwind { +; CHECK-LABEL: ret_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a0, 0(a0) +; CHECK-NEXT: ret + %v = load <4 x i8>, <4 x i8>* %p + ret <4 x i8> %v +} + +define <4 x i8> @param_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind { +; CHECK-LABEL: param_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: ret + %r = and <4 x i8> %a, %b + ret <4 x i8> %r +} + +define <2 x i16> @ret_v2i16(<2 x i16>* %p) nounwind { +; CHECK-LABEL: ret_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lw a0, 0(a0) +; CHECK-NEXT: ret + %v = load <2 x i16>, <2 x i16>* %p + ret <2 x i16> %v +} + +define <2 x i16> @param_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind { +; CHECK-LABEL: param_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: ret + %r = and <2 x i16> %a, %b + ret <2 x i16> %r +} + +declare <4 x i8> @callee_v4i8(<4 x i8>, <4 x i8>) + +define <4 x i8> @call_v4i8(<4 x i8> %a, <4 x i8> %b) nounwind { +; CHECK-LABEL: call_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: call callee_v4i8@plt +; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %r = call <4 x i8> @callee_v4i8(<4 x i8> %b, <4 x i8> %a) + ret <4 x i8> %r +} + +declare <2 x i16> @callee_v2i16(<2 x i16>, <2 x i16>) + +define <2 x i16> @call_v2i16(<2 x i16> %a, <2 x i16> %b) nounwind { +; CHECK-LABEL: call_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: call callee_v2i16@plt +; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %r = call <2 x i16> @callee_v2i16(<2 x i16> %b, <2 x i16> %a) + ret <2 x i16> %r +} diff --git a/llvm/test/CodeGen/RISCV/rvp/calling-conv-rv64.ll b/llvm/test/CodeGen/RISCV/rvp/calling-conv-rv64.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvp/calling-conv-rv64.ll @@ -0,0 +1,115 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s \ +; RUN: | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-p,+f -verify-machineinstrs < %s \ +; RUN: | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-p,+d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +define <8 x i8> @ret_v8i8(<8 x i8>* %p) nounwind { +; CHECK-LABEL: ret_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a0, 0(a0) +; CHECK-NEXT: ret + %v = load <8 x i8>, <8 x i8>* %p + ret <8 x i8> %v +} + +define <8 x i8> @param_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind { +; CHECK-LABEL: param_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: ret + %r = and <8 x i8> %a, %b + ret <8 x i8> %r +} + +define <4 x i16> @ret_v4i16(<4 x i16>* %p) nounwind { +; CHECK-LABEL: ret_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a0, 0(a0) +; CHECK-NEXT: ret + %v = load <4 x i16>, <4 x i16>* %p + ret <4 x i16> %v +} + +define <4 x i16> @param_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind { +; CHECK-LABEL: param_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: ret + %r = and <4 x i16> %a, %b + ret <4 x i16> %r +} + +define <2 x i32> @ret_v2i32(<2 x i32>* %p) nounwind { +; CHECK-LABEL: ret_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: ld a0, 0(a0) +; CHECK-NEXT: ret + %v = load <2 x i32>, <2 x i32>* %p + ret <2 x i32> %v +} + +define <2 x i32> @param_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind { +; CHECK-LABEL: param_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: ret + %r = and <2 x i32> %a, %b + ret <2 x i32> %r +} + +declare <8 x i8> @callee_v8i8(<8 x i8>, <8 x i8>) + +define <8 x i8> @call_v8i8(<8 x i8> %a, <8 x i8> %b) nounwind { +; CHECK-LABEL: call_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: call callee_v8i8@plt +; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %r = call <8 x i8> @callee_v8i8(<8 x i8> %b, <8 x i8> %a) + ret <8 x i8> %r +} + +declare <4 x i16> @callee_v4i16(<4 x i16>, <4 x i16>) + +define <4 x i16> @call_v4i16(<4 x i16> %a, <4 x i16> %b) nounwind { +; CHECK-LABEL: call_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: call callee_v4i16@plt +; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %r = call <4 x i16> @callee_v4i16(<4 x i16> %b, <4 x i16> %a) + ret <4 x i16> %r +} + +declare <2 x i32> @callee_v2i32(<2 x i32>, <2 x i32>) + +define <2 x i32> @call_v2i32(<2 x i32> %a, <2 x i32> %b) nounwind { +; CHECK-LABEL: call_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: mv a2, a0 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: call callee_v2i32@plt +; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %r = call <2 x i32> @callee_v2i32(<2 x i32> %b, <2 x i32> %a) + ret <2 x i32> %r +}