Index: include/llvm/IR/CallingConv.h =================================================================== --- include/llvm/IR/CallingConv.h +++ include/llvm/IR/CallingConv.h @@ -222,6 +222,9 @@ // Calling convention between AArch64 Advanced SIMD functions AArch64_VectorCall = 97, + /// Calling convention between AArch64 SVE functions + AArch64_SVE_VectorCall = 98, + /// The highest possible calling convention ID. Must be some 2^k - 1. MaxID = 1023 }; Index: lib/Target/AArch64/AArch64CallingConvention.td =================================================================== --- lib/Target/AArch64/AArch64CallingConvention.td +++ lib/Target/AArch64/AArch64CallingConvention.td @@ -70,6 +70,18 @@ CCIfConsecutiveRegs>, + CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, + nxv1f32, nxv2f32, nxv4f32, nxv1f64, nxv2f64], + CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>, + CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, + nxv1f32, nxv2f32, nxv4f32, nxv1f64, nxv2f64], + CCPassIndirect>, + + CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCAssignToReg<[P0, P1, P2, P3]>>, + CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCPassIndirect>, + // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, // up to eight each of GPR and FPR. CCIfType<[i1, i8, i16], CCPromoteToType>, @@ -135,7 +147,14 @@ CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], - CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> + CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, + + CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16, + nxv1f32, nxv2f32, nxv4f32, nxv1f64, nxv2f64], + CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>, + + CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCAssignToReg<[P0, P1, P2, P3]>> ]>; // Vararg functions on windows pass floats in integer registers @@ -325,6 +344,13 @@ X23, X24, X25, X26, X27, X28, (sequence "Q%u", 8, 23))>; +// Functions taking SVE arguments or returning an SVE type +// must (additionally) preserve full Z8-Z23 and predicate registers P4-P15 +def CSR_AArch64_SVE_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, + X23, X24, X25, X26, X27, X28, + (sequence "Z%u", 8, 23), + (sequence "P%u", 4, 15))>; + // Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since // 'this' and the pointer return value are both passed in X0 in these cases, // this can be partially modelled by treating X0 as a callee-saved register; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -161,6 +161,29 @@ addQRTypeForNEON(MVT::v8f16); } + if (Subtarget->hasSVE()) { + // Add legal sve predicate types + addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass); + addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass); + addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass); + addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass); + + // Add legal sve data types + addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass); + + addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv1f32, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv1f64, &AArch64::ZPRRegClass); + addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass); + } + // Compute derived properties from the register classes computeRegisterProperties(Subtarget->getRegisterInfo()); @@ -3139,6 +3162,11 @@ RC = &AArch64::FPR64RegClass; else if (RegVT == MVT::f128 || RegVT.is128BitVector()) RC = &AArch64::FPR128RegClass; + else if (RegVT.isScalableVector() && + RegVT.getVectorElementType() == MVT::i1) + RC = &AArch64::PPRRegClass; + else if (RegVT.isScalableVector()) + RC = &AArch64::ZPRRegClass; else llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); @@ -3154,6 +3182,10 @@ llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: break; + case CCValAssign::Indirect: + assert(VA.getValVT().isScalableVector() && + "Only scalable vectors can be passed indirectly"); + llvm_unreachable("Spilling of SVE vectors not yet implemented"); case CCValAssign::BCvt: ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue); break; @@ -3194,6 +3226,10 @@ case CCValAssign::BCvt: MemVT = VA.getLocVT(); break; + case CCValAssign::Indirect: + assert(VA.getValVT().isScalableVector() && + "Only scalable vectors can be passed indirectly"); + llvm_unreachable("Spilling of SVE vectors not yet implemented"); case CCValAssign::SExt: ExtType = ISD::SEXTLOAD; break; @@ -3779,6 +3815,10 @@ case CCValAssign::FPExt: Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg); break; + case CCValAssign::Indirect: + assert(VA.getValVT().isScalableVector() && + "Only scalable vectors can be passed indirectly"); + llvm_unreachable("Spilling of SVE vectors not yet implemented"); } if (VA.isRegLoc()) { @@ -3925,6 +3965,20 @@ Ops.push_back(DAG.getRegister(RegToPass.first, RegToPass.second.getValueType())); + // Check callee args/returns for SVE registers and set calling convention + // accordingly. + if (CallConv == CallingConv::C) { + bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){ + return Out.VT.isScalableVector(); + }); + bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){ + return In.VT.isScalableVector(); + }); + + if (CalleeInSVE || CalleeOutSVE) + CallConv = CallingConv::AArch64_SVE_VectorCall; + } + // Add a register mask operand representing the call-preserved registers. const uint32_t *Mask; const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); Index: lib/Target/AArch64/AArch64RegisterInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.cpp +++ lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -120,6 +120,8 @@ : CSR_AArch64_CXX_TLS_Darwin_RegMask; if (CC == CallingConv::AArch64_VectorCall) return SCS ? CSR_AArch64_AAVPCS_SCS_RegMask : CSR_AArch64_AAVPCS_RegMask; + if (CC == CallingConv::AArch64_SVE_VectorCall) + return CSR_AArch64_SVE_AAPCS_RegMask; if (MF.getSubtarget().getTargetLowering() ->supportSwiftError() && MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError)) Index: test/CodeGen/AArch64/sve-calling-convention.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/sve-calling-convention.ll @@ -0,0 +1,121 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -stop-after=finalize-isel < %s | FileCheck %s + +; CHECK-LABEL: name: nosve_signature +define i32 @nosve_signature() nounwind { + ret i32 42 +} + +; CHECK-LABEL: name: sve_signature_ret_vec +define @sve_signature_ret_vec() nounwind { + ret undef +} + +; CHECK-LABEL: name: sve_signature_ret_pred +define @sve_signature_ret_pred() nounwind { + ret undef +} + +; CHECK-LABEL: name: sve_signature_arg_vec +define void @sve_signature_arg_vec( %arg) nounwind { + ret void +} + +; CHECK-LABEL: name: sve_signature_arg_pred +define void @sve_signature_arg_pred( %arg) nounwind { + ret void +} + +; CHECK-LABEL: name: caller_nosve_signature +; CHECK: BL @nosve_signature, csr_aarch64_aapcs +define i32 @caller_nosve_signature() nounwind { + %res = call i32 @nosve_signature() + ret i32 %res +} + +; CHECK-LABEL: name: sve_signature_ret_vec_caller +; CHECK: BL @sve_signature_ret_vec, csr_aarch64_sve_aapcs +define @sve_signature_ret_vec_caller() nounwind { + %res = call @sve_signature_ret_vec() + ret %res +} + +; CHECK-LABEL: name: sve_signature_ret_pred_caller +; CHECK: BL @sve_signature_ret_pred, csr_aarch64_sve_aapcs +define @sve_signature_ret_pred_caller() nounwind { + %res = call @sve_signature_ret_pred() + ret %res +} + +; CHECK-LABEL: name: sve_signature_arg_vec_caller +; CHECK: BL @sve_signature_arg_vec, csr_aarch64_sve_aapcs +define void @sve_signature_arg_vec_caller( %arg) nounwind { + call void @sve_signature_arg_vec( %arg) + ret void +} + +; CHECK-LABEL: name: sve_signature_arg_pred_caller +; CHECK: BL @sve_signature_arg_pred, csr_aarch64_sve_aapcs +define void @sve_signature_arg_pred_caller( %arg) nounwind { + call void @sve_signature_arg_pred( %arg) + ret void +} + +; CHECK-LABEL: name: sve_signature_many_arg_vec +; CHECK: [[RES:%[0-9]+]]:zpr = COPY $z7 +; CHECK: $z0 = COPY [[RES]] +; CHECK: RET_ReallyLR implicit $z0 +define @sve_signature_many_arg_vec( %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8) nounwind { + ret %arg8 +} + +; CHECK-LABEL: name: sve_signature_many_arg_pred +; CHECK: [[RES:%[0-9]+]]:ppr = COPY $p3 +; CHECK: $p0 = COPY [[RES]] +; CHECK: RET_ReallyLR implicit $p0 +define @sve_signature_many_arg_pred( %arg1, %arg2, %arg3, %arg4) nounwind { + ret %arg4 +} + +; CHECK-LABEL: name: sve_signature_vec +; CHECK: [[RES:%[0-9]+]]:zpr = COPY $z1 +; CHECK: $z0 = COPY [[RES]] +; CHECK: RET_ReallyLR implicit $z0 +define @sve_signature_vec( %arg1, %arg2) nounwind { + ret %arg2 +} + +; CHECK-LABEL: name: sve_signature_pred +; CHECK: [[RES:%[0-9]+]]:ppr = COPY $p1 +; CHECK: $p0 = COPY [[RES]] +; CHECK: RET_ReallyLR implicit $p0 +define @sve_signature_pred( %arg1, %arg2) nounwind { + ret %arg2 +} + +; CHECK-LABEL: name: sve_signature_vec_caller +; CHECK-DAG: [[ARG2:%[0-9]+]]:zpr = COPY $z1 +; CHECK-DAG: [[ARG1:%[0-9]+]]:zpr = COPY $z0 +; CHECK-DAG: $z0 = COPY [[ARG2]] +; CHECK-DAG: $z1 = COPY [[ARG1]] +; CHECK-NEXT: BL @sve_signature_vec, csr_aarch64_sve_aapcs +; CHECK: [[RES:%[0-9]+]]:zpr = COPY $z0 +; CHECK: $z0 = COPY [[RES]] +; CHECK: RET_ReallyLR implicit $z0 +define @sve_signature_vec_caller( %arg1, %arg2) nounwind { + %res = call @sve_signature_vec( %arg2, %arg1) + ret %res +} + +; CHECK-LABEL: name: sve_signature_pred_caller +; CHECK-DAG: [[ARG2:%[0-9]+]]:ppr = COPY $p1 +; CHECK-DAG: [[ARG1:%[0-9]+]]:ppr = COPY $p0 +; CHECK-DAG: $p0 = COPY [[ARG2]] +; CHECK-DAG: $p1 = COPY [[ARG1]] +; CHECK-NEXT: BL @sve_signature_pred, csr_aarch64_sve_aapcs +; CHECK: [[RES:%[0-9]+]]:ppr = COPY $p0 +; CHECK: $p0 = COPY [[RES]] +; CHECK: RET_ReallyLR implicit $p0 +define @sve_signature_pred_caller( %arg1, %arg2) nounwind { + %res = call @sve_signature_pred( %arg2, %arg1) + ret %res +}