diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -5585,6 +5585,7 @@ } SmallVector ArgValues; unsigned ExtraArgLocs = 0; + AArch64FunctionInfo *FuncInfo = MF.getInfo(); for (unsigned i = 0, e = Ins.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i - ExtraArgLocs]; @@ -5627,11 +5628,13 @@ else if (RegVT == MVT::f128 || RegVT.is128BitVector()) RC = &AArch64::FPR128RegClass; else if (RegVT.isScalableVector() && - RegVT.getVectorElementType() == MVT::i1) + RegVT.getVectorElementType() == MVT::i1) { + FuncInfo->setSVE(true); RC = &AArch64::PPRRegClass; - else if (RegVT.isScalableVector()) + } else if (RegVT.isScalableVector()) { + FuncInfo->setSVE(true); RC = &AArch64::ZPRRegClass; - else + } else llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); // Transform the arguments in physical registers into virtual ones. @@ -5768,7 +5771,6 @@ assert((ArgLocs.size() + ExtraArgLocs) == Ins.size()); // varargs - AArch64FunctionInfo *FuncInfo = MF.getInfo(); if (isVarArg) { if (!Subtarget->isTargetDarwin() || IsWin64) { // The AAPCS variadic function ABI is identical to the non-variadic diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -175,6 +175,8 @@ /// The stack slot where the Swift asynchronous context is stored. int SwiftAsyncContextFrameIdx = std::numeric_limits::max(); + bool IsSVE = false; + /// True if the function need unwind information. mutable Optional NeedsDwarfUnwindInfo; @@ -184,6 +186,9 @@ public: explicit AArch64FunctionInfo(MachineFunction &MF); + bool isSVE() const { return IsSVE; }; + void setSVE(bool s) { IsSVE = s; }; + void initializeBaseYamlFields(const yaml::AArch64FunctionInfo &YamlMFI); unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; } diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -67,11 +67,8 @@ } bool AArch64RegisterInfo::hasSVEArgsOrReturn(const MachineFunction *MF) { - const Function &F = MF->getFunction(); - return isa(F.getReturnType()) || - any_of(F.args(), [](const Argument &Arg) { - return isa(Arg.getType()); - }); + return (MF->getInfo()->isSVE() || + isa(MF->getFunction().getReturnType())); } const MCPhysReg * diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll --- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll @@ -188,6 +188,193 @@ ret double %x0 } +; Use AAVPCS, SVE register in z0-z7 used + +define void @aavpcs1(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, i32 %s6, %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15, %s16, i32 * %ptr) nounwind { +; CHECK-LABEL: aavpcs1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldp x8, x9, [sp] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z3.s }, p0/z, [x8] +; CHECK-NEXT: ld1w { z24.s }, p0/z, [x7] +; CHECK-NEXT: st1w { z0.s }, p0, [x9] +; CHECK-NEXT: st1w { z1.s }, p0, [x9] +; CHECK-NEXT: st1w { z2.s }, p0, [x9] +; CHECK-NEXT: st1w { z4.s }, p0, [x9] +; CHECK-NEXT: st1w { z5.s }, p0, [x9] +; CHECK-NEXT: st1w { z6.s }, p0, [x9] +; CHECK-NEXT: st1w { z7.s }, p0, [x9] +; CHECK-NEXT: st1w { z24.s }, p0, [x9] +; CHECK-NEXT: st1w { z3.s }, p0, [x9] +; CHECK-NEXT: ret +entry: + %ptr1.bc = bitcast i32 * %ptr to * + store volatile %s7, * %ptr1.bc + store volatile %s8, * %ptr1.bc + store volatile %s9, * %ptr1.bc + store volatile %s11, * %ptr1.bc + store volatile %s12, * %ptr1.bc + store volatile %s13, * %ptr1.bc + store volatile %s14, * %ptr1.bc + store volatile %s15, * %ptr1.bc + store volatile %s16, * %ptr1.bc + ret void +} + +; Use AAVPCS, SVE register in z0-z7 used + +define void @aavpcs2(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15, %s16,float * %ptr) nounwind { +; CHECK-LABEL: aavpcs2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldp x8, x9, [sp] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x7] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x6] +; CHECK-NEXT: ld1w { z3.s }, p0/z, [x5] +; CHECK-NEXT: ld1w { z4.s }, p0/z, [x4] +; CHECK-NEXT: ld1w { z5.s }, p0/z, [x3] +; CHECK-NEXT: ld1w { z6.s }, p0/z, [x1] +; CHECK-NEXT: ld1w { z24.s }, p0/z, [x0] +; CHECK-NEXT: st1w { z7.s }, p0, [x9] +; CHECK-NEXT: st1w { z24.s }, p0, [x9] +; CHECK-NEXT: st1w { z6.s }, p0, [x9] +; CHECK-NEXT: st1w { z5.s }, p0, [x9] +; CHECK-NEXT: st1w { z4.s }, p0, [x9] +; CHECK-NEXT: st1w { z3.s }, p0, [x9] +; CHECK-NEXT: st1w { z2.s }, p0, [x9] +; CHECK-NEXT: st1w { z1.s }, p0, [x9] +; CHECK-NEXT: st1w { z0.s }, p0, [x9] +; CHECK-NEXT: ret +entry: + %ptr1.bc = bitcast float * %ptr to * + store volatile %s7, * %ptr1.bc + store volatile %s8, * %ptr1.bc + store volatile %s9, * %ptr1.bc + store volatile %s11, * %ptr1.bc + store volatile %s12, * %ptr1.bc + store volatile %s13, * %ptr1.bc + store volatile %s14, * %ptr1.bc + store volatile %s15, * %ptr1.bc + store volatile %s16, * %ptr1.bc + ret void +} + +; Use AAVPCS, no SVE register in z0-z7 used (floats occupy z0-z7) but predicate arg is used + +define void @aavpcs3(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15, %s16, %s17, %p0, float * %ptr) nounwind { +; CHECK-LABEL: aavpcs3: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr x8, [sp] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x7] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x6] +; CHECK-NEXT: ld1w { z3.s }, p0/z, [x5] +; CHECK-NEXT: ld1w { z4.s }, p0/z, [x4] +; CHECK-NEXT: ld1w { z5.s }, p0/z, [x3] +; CHECK-NEXT: ld1w { z6.s }, p0/z, [x2] +; CHECK-NEXT: ld1w { z7.s }, p0/z, [x1] +; CHECK-NEXT: ld1w { z24.s }, p0/z, [x0] +; CHECK-NEXT: ldr x8, [sp, #16] +; CHECK-NEXT: st1w { z24.s }, p0, [x8] +; CHECK-NEXT: st1w { z7.s }, p0, [x8] +; CHECK-NEXT: st1w { z6.s }, p0, [x8] +; CHECK-NEXT: st1w { z5.s }, p0, [x8] +; CHECK-NEXT: st1w { z4.s }, p0, [x8] +; CHECK-NEXT: st1w { z3.s }, p0, [x8] +; CHECK-NEXT: st1w { z2.s }, p0, [x8] +; CHECK-NEXT: st1w { z1.s }, p0, [x8] +; CHECK-NEXT: st1w { z0.s }, p0, [x8] +; CHECK-NEXT: ret +entry: + %ptr1.bc = bitcast float * %ptr to * + store volatile %s8, * %ptr1.bc + store volatile %s9, * %ptr1.bc + store volatile %s10, * %ptr1.bc + store volatile %s11, * %ptr1.bc + store volatile %s12, * %ptr1.bc + store volatile %s13, * %ptr1.bc + store volatile %s14, * %ptr1.bc + store volatile %s15, * %ptr1.bc + store volatile %s16, * %ptr1.bc + ret void +} + +; use AAVPCS, SVE register in z0-z7 used (i32s dont occupy z0-z7) + +define void @aavpcs4(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, i32 %s6, i32 %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15, %s16, %s17, i32 * %ptr) nounwind { +; CHECK-LABEL: aavpcs4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr x8, [sp] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldr x9, [sp, #16] +; CHECK-NEXT: ld1w { z24.s }, p0/z, [x8] +; CHECK-NEXT: st1w { z0.s }, p0, [x9] +; CHECK-NEXT: st1w { z1.s }, p0, [x9] +; CHECK-NEXT: st1w { z2.s }, p0, [x9] +; CHECK-NEXT: st1w { z3.s }, p0, [x9] +; CHECK-NEXT: st1w { z4.s }, p0, [x9] +; CHECK-NEXT: st1w { z5.s }, p0, [x9] +; CHECK-NEXT: st1w { z6.s }, p0, [x9] +; CHECK-NEXT: st1w { z7.s }, p0, [x9] +; CHECK-NEXT: st1w { z24.s }, p0, [x9] +; CHECK-NEXT: ret +entry: + %ptr1.bc = bitcast i32 * %ptr to * + store volatile %s8, * %ptr1.bc + store volatile %s9, * %ptr1.bc + store volatile %s10, * %ptr1.bc + store volatile %s11, * %ptr1.bc + store volatile %s12, * %ptr1.bc + store volatile %s13, * %ptr1.bc + store volatile %s14, * %ptr1.bc + store volatile %s15, * %ptr1.bc + store volatile %s16, * %ptr1.bc + ret void +} + +; Use AAPCS, no SVE register in z0-7 used (floats occupy z0-z7) + +define void @aapcs1(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15, %s16, %s17, float * %ptr) nounwind { +; CHECK-LABEL: aapcs1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr x8, [sp] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x7] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x6] +; CHECK-NEXT: ld1w { z3.s }, p0/z, [x5] +; CHECK-NEXT: ld1w { z4.s }, p0/z, [x4] +; CHECK-NEXT: ld1w { z5.s }, p0/z, [x3] +; CHECK-NEXT: ld1w { z6.s }, p0/z, [x2] +; CHECK-NEXT: ld1w { z7.s }, p0/z, [x1] +; CHECK-NEXT: ld1w { z16.s }, p0/z, [x0] +; CHECK-NEXT: ldr x8, [sp, #16] +; CHECK-NEXT: st1w { z16.s }, p0, [x8] +; CHECK-NEXT: st1w { z7.s }, p0, [x8] +; CHECK-NEXT: st1w { z6.s }, p0, [x8] +; CHECK-NEXT: st1w { z5.s }, p0, [x8] +; CHECK-NEXT: st1w { z4.s }, p0, [x8] +; CHECK-NEXT: st1w { z3.s }, p0, [x8] +; CHECK-NEXT: st1w { z2.s }, p0, [x8] +; CHECK-NEXT: st1w { z1.s }, p0, [x8] +; CHECK-NEXT: st1w { z0.s }, p0, [x8] +; CHECK-NEXT: ret +entry: + %ptr1.bc = bitcast float * %ptr to * + store volatile %s8, * %ptr1.bc + store volatile %s9, * %ptr1.bc + store volatile %s10, * %ptr1.bc + store volatile %s11, * %ptr1.bc + store volatile %s12, * %ptr1.bc + store volatile %s13, * %ptr1.bc + store volatile %s14, * %ptr1.bc + store volatile %s15, * %ptr1.bc + store volatile %s16, * %ptr1.bc + ret void +} + declare float @callee1(float, , , ) declare float @callee2(i32, i32, i32, i32, i32, i32, i32, i32, float, , ) declare float @callee3(float, float, , , )