Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3138,9 +3138,6 @@ switch (CC) { default: report_fatal_error("Unsupported calling convention."); - case CallingConv::AArch64_SVE_VectorCall: - // Calling SVE functions is currently not yet supported. - report_fatal_error("Unsupported calling convention."); case CallingConv::WebKit_JS: return CC_AArch64_WebKit_JS; case CallingConv::GHC: @@ -3163,6 +3160,7 @@ case CallingConv::CFGuard_Check: return CC_AArch64_Win64_CFGuard_Check; case CallingConv::AArch64_VectorCall: + case CallingConv::AArch64_SVE_VectorCall: return CC_AArch64_AAPCS; } } @@ -3281,7 +3279,7 @@ case CCValAssign::Indirect: assert(VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"); - llvm_unreachable("Spilling of SVE vectors not yet implemented"); + break; case CCValAssign::BCvt: ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue); break; @@ -3299,6 +3297,8 @@ assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem"); unsigned ArgOffset = VA.getLocMemOffset(); unsigned ArgSize = VA.getValVT().getSizeInBits() / 8; + if (VA.getLocInfo() == CCValAssign::Indirect) + ArgSize = VA.getLocVT().getSizeInBits() / 8; uint32_t BEAlign = 0; if (!Subtarget->isLittleEndian() && ArgSize < 8 && @@ -3324,7 +3324,8 @@ case CCValAssign::Indirect: assert(VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"); - llvm_unreachable("Spilling of SVE vectors not yet implemented"); + MemVT = VA.getLocVT(); + break; case CCValAssign::SExt: ExtType = ISD::SEXTLOAD; break; @@ -3342,6 +3343,15 @@ MemVT); } + + if (VA.getLocInfo() == CCValAssign::Indirect) { + assert(VA.getValVT().isScalableVector() && + "Only scalable vectors can be passed indirectly"); + // If value is passed via pointer - do a load. + ArgValue = + DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, MachinePointerInfo()); + } + if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer()) ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(), ArgValue, DAG.getValueType(MVT::i32)); @@ -3702,6 +3712,18 @@ const AArch64FunctionInfo *FuncInfo = MF.getInfo(); + // If any of the arguments is passed indirectly, it must be SVE, so the + // 'getBytesInStackArgArea' is not sufficient to determine whether we need to + // allocate space on the stack. That is why we determine this explicitly here + // the call cannot be a tailcall. + if (llvm::any_of(ArgLocs, [](CCValAssign &A) { + assert((A.getLocInfo() != CCValAssign::Indirect || + A.getValVT().isScalableVector()) && + "Expected value to be scalable"); + return A.getLocInfo() == CCValAssign::Indirect; + })) + return false; + // If the stack arguments for this call do not fit into our own save area then // the call cannot be made tail. if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) @@ -3943,7 +3965,15 @@ case CCValAssign::Indirect: assert(VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"); - llvm_unreachable("Spilling of SVE vectors not yet implemented"); + SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); + int FI = cast(SpillSlot)->getIndex(); + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + MFI.setStackID(FI, TargetStackID::SVEVector); + Chain = DAG.getStore( + Chain, DL, Arg, SpillSlot, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); + Arg = SpillSlot; + break; } if (VA.isRegLoc()) { Index: llvm/test/CodeGen/AArch64/sve-callbyref-notailcall.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-callbyref-notailcall.ll @@ -0,0 +1,29 @@ +; Because some arguments are passed by reference (through stack), +; the compiler should not do tail-call optimization. +; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s + +; CHECK-LABEL: caller: +; CHECK: addvl sp, sp, #-[[STACKSIZE:[0-9]+]] +; CHECK-NOT: addvl sp +; CHECK: bl callee +; CHECK: addvl sp, sp, #[[STACKSIZE]] +; CHECK: ret +define @caller( %v) { + %1 = tail call @callee( %v, %v, %v, %v, %v, %v, %v, %v, %v) + ret %1 +} + +declare @callee(, , , , , , , , ) + +; CHECK-LABEL: caller_pred: +; CHECK: addvl sp, sp, #-[[STACKSIZE:[0-9]+]] +; CHECK-NOT: addvl sp +; CHECK: bl callee_pred +; CHECK: addvl sp, sp, #[[STACKSIZE]] +; CHECK: ret +define @caller_pred( %v) { + %1 = tail call @callee_pred( %v, %v, %v, %v, %v) + ret %1 +} + +declare @callee_pred(, , , , ) Index: llvm/test/CodeGen/AArch64/sve-calling-convention-byref.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sve-calling-convention-byref.ll @@ -0,0 +1,118 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -stop-after=finalize-isel < %s | FileCheck %s + +; Test that z8 and z9, passed in by reference, are correctly loaded from x0 and x1. +; i.e. z0 = %z0 +; : +; z7 = %z7 +; x0 = &%z8 +; x1 = &%z9 +define aarch64_sve_vector_pcs @callee_with_many_sve_arg( %z0, %z1, %z2, %z3, %z4, %z5, %z6, %z7, %z8, %z9) { +; CHECK: name: callee_with_many_sve_arg +; CHECK-DAG: [[BASE:%[0-9]+]]:gpr64common = COPY $x1 +; CHECK-DAG: [[PTRUE:%[0-9]+]]:ppr_3b = PTRUE_S 31 +; CHECK-DAG: [[RES:%[0-9]+]]:zpr = LD1W_IMM killed [[PTRUE]], [[BASE]] +; CHECK-DAG: $z0 = COPY [[RES]] +; CHECK: RET_ReallyLR implicit $z0 + ret %z9 +} + +; Test that z8 and z9 are passed by reference. +define aarch64_sve_vector_pcs @caller_with_many_sve_arg( %z) { +; CHECK: name: caller_with_many_sve_arg +; CHECK: stack: +; CHECK: - { id: 0, name: '', type: default, offset: 0, size: 16, alignment: 16, +; CHECK-NEXT: stack-id: sve-vec +; CHECK: - { id: 1, name: '', type: default, offset: 0, size: 16, alignment: 16, +; CHECK-NEXT: stack-id: sve-vec +; CHECK-DAG: [[PTRUE:%[0-9]+]]:ppr_3b = PTRUE_S 31 +; CHECK-DAG: ST1W_IMM %{{[0-9]+}}, [[PTRUE]], %stack.1, 0 +; CHECK-DAG: ST1W_IMM %{{[0-9]+}}, [[PTRUE]], %stack.0, 0 +; CHECK-DAG: [[BASE2:%[0-9]+]]:gpr64sp = ADDXri %stack.1, 0 +; CHECK-DAG: [[BASE1:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0 +; CHECK-DAG: $x0 = COPY [[BASE1]] +; CHECK-DAG: $x1 = COPY [[BASE2]] +; CHECK-NEXT: BL @callee_with_many_sve_arg +; CHECK: RET_ReallyLR implicit $z0 + %ret = call aarch64_sve_vector_pcs @callee_with_many_sve_arg( %z, %z, %z, %z, %z, %z, %z, %z, %z, %z) + ret %ret +} + +; Test that p4 and p5, passed in by reference, are correctly loaded from register x0 and x1. +; i.e. p0 = %p0 +; : +; p3 = %p3 +; x0 = &%p4 +; x1 = &%p5 +define aarch64_sve_vector_pcs @callee_with_many_svepred_arg( %p0, %p1, %p2, %p3, %p4, %p5) { +; CHECK: name: callee_with_many_svepred_arg +; CHECK-DAG: [[BASE:%[0-9]+]]:gpr64common = COPY $x1 +; CHECK-DAG: [[RES:%[0-9]+]]:ppr = LDR_PXI [[BASE]], 0 +; CHECK-DAG: $p0 = COPY [[RES]] +; CHECK: RET_ReallyLR implicit $p0 + ret %p5 +} + +; Test that p4 and p5 are passed by reference. +define aarch64_sve_vector_pcs @caller_with_many_svepred_arg( %p) { +; CHECK: name: caller_with_many_svepred_arg +; CHECK: stack: +; CHECK: - { id: 0, name: '', type: default, offset: 0, size: 1, alignment: 4, +; CHECK-NEXT: stack-id: sve-vec +; CHECK: - { id: 1, name: '', type: default, offset: 0, size: 1, alignment: 4, +; CHECK-NEXT: stack-id: sve-vec +; CHECK-DAG: STR_PXI %{{[0-9]+}}, %stack.0, 0 +; CHECK-DAG: STR_PXI %{{[0-9]+}}, %stack.1, 0 +; CHECK-DAG: [[BASE1:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0 +; CHECK-DAG: [[BASE2:%[0-9]+]]:gpr64sp = ADDXri %stack.1, 0 +; CHECK-DAG: $x0 = COPY [[BASE1]] +; CHECK-DAG: $x1 = COPY [[BASE2]] +; CHECK-NEXT: BL @callee_with_many_svepred_arg +; CHECK: RET_ReallyLR implicit $p0 + %ret = call aarch64_sve_vector_pcs @callee_with_many_svepred_arg( %p, %p, %p, %p, %p, %p) + ret %ret +} + +; Test that z8 and z9, passed by reference, are loaded from a location that is passed on the stack. +; i.e. x0 = %x0 +; : +; x7 = %x7 +; z0 = %z0 +; : +; z7 = %z7 +; [sp] = &%z8 +; [sp+8] = &%z9 +; +define aarch64_sve_vector_pcs @callee_with_many_gpr_sve_arg(i64 %x0, i64 %x1, i64 %x2, i64 %x3, i64 %x4, i64 %x5, i64 %x6, i64 %x7, %z0, %z1, %z2, %z3, %z4, %z5, %z6, %z7, %z8, %z9) { +; CHECK: name: callee_with_many_gpr_sve_arg +; CHECK: fixedStack: +; CHECK: - { id: 0, type: default, offset: 8, size: 8, alignment: 8, stack-id: default, +; CHECK-DAG: [[BASE:%[0-9]+]]:gpr64common = LDRXui %fixed-stack.0, 0 +; CHECK-DAG: [[PTRUE:%[0-9]+]]:ppr_3b = PTRUE_S 31 +; CHECK-DAG: [[RES:%[0-9]+]]:zpr = LD1W_IMM killed [[PTRUE]], killed [[BASE]] +; CHECK-DAG: $z0 = COPY [[RES]] +; CHECK: RET_ReallyLR implicit $z0 + ret %z9 +} + +; Test that z8 and z9 are passed by reference, where reference is passed on the stack. +define aarch64_sve_vector_pcs @caller_with_many_gpr_sve_arg(i64 %x, %z, %z2) { +; CHECK: name: caller_with_many_gpr_sve_arg +; CHECK: stack: +; CHECK: - { id: 0, name: '', type: default, offset: 0, size: 16, alignment: 16, +; CHECK-NEXT: stack-id: sve-vec +; CHECK: - { id: 1, name: '', type: default, offset: 0, size: 16, alignment: 16, +; CHECK-NEXT: stack-id: sve-vec +; CHECK-DAG: [[PTRUE_S:%[0-9]+]]:ppr_3b = PTRUE_S 31 +; CHECK-DAG: [[PTRUE_D:%[0-9]+]]:ppr_3b = PTRUE_D 31 +; CHECK-DAG: ST1D_IMM %{{[0-9]+}}, killed [[PTRUE_D]], %stack.0, 0 +; CHECK-DAG: ST1W_IMM %{{[0-9]+}}, killed [[PTRUE_S]], %stack.1, 0 +; CHECK-DAG: [[BASE1:%[0-9]+]]:gpr64common = ADDXri %stack.0, 0 +; CHECK-DAG: [[BASE2:%[0-9]+]]:gpr64common = ADDXri %stack.1, 0 +; CHECK-DAG: [[SP:%[0-9]+]]:gpr64sp = COPY $sp +; CHECK-DAG: STRXui killed [[BASE1]], [[SP]], 0 +; CHECK-DAG: STRXui killed [[BASE2]], [[SP]], 1 +; CHECK: BL @callee_with_many_gpr_sve_arg +; CHECK: RET_ReallyLR implicit $z0 + %ret = call aarch64_sve_vector_pcs @callee_with_many_gpr_sve_arg(i64 %x, i64 %x, i64 %x, i64 %x, i64 %x, i64 %x, i64 %x, i64 %x, %z, %z, %z, %z, %z, %z, %z, %z, %z2, %z) + ret %ret +}