diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -68,9 +68,19 @@ bool AArch64RegisterInfo::hasSVEArgsOrReturn(const MachineFunction *MF) { const Function &F = MF->getFunction(); + unsigned ZRegsUsed = 0; return isa(F.getReturnType()) || - any_of(F.args(), [](const Argument &Arg) { - return isa(Arg.getType()); + any_of(F.args(), [&ZRegsUsed](const Argument &Arg) { + auto Ty = Arg.getType(); + if (isa(Ty)) { + if (cast(Ty)->getElementType() == + Type::getInt1Ty(Arg.getContext())) + return true; + return ZRegsUsed++ < 8; + } + if (Ty->isFloatTy()) + ZRegsUsed++; + return false; }); } diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll --- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll @@ -188,6 +188,193 @@ ret double %x0 } +; Use AAVPCS, SVE register in z0-z7 used + +define void @aavpcs1(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, i32 %s6, %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15, %s16, i32 * %ptr) nounwind { +; CHECK-LABEL: aavpcs1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldp x8, x9, [sp] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z3.s }, p0/z, [x8] +; CHECK-NEXT: ld1w { z24.s }, p0/z, [x7] +; CHECK-NEXT: st1w { z0.s }, p0, [x9] +; CHECK-NEXT: st1w { z1.s }, p0, [x9] +; CHECK-NEXT: st1w { z2.s }, p0, [x9] +; CHECK-NEXT: st1w { z4.s }, p0, [x9] +; CHECK-NEXT: st1w { z5.s }, p0, [x9] +; CHECK-NEXT: st1w { z6.s }, p0, [x9] +; CHECK-NEXT: st1w { z7.s }, p0, [x9] +; CHECK-NEXT: st1w { z24.s }, p0, [x9] +; CHECK-NEXT: st1w { z3.s }, p0, [x9] +; CHECK-NEXT: ret +entry: + %ptr1.bc = bitcast i32 * %ptr to * + store volatile %s7, * %ptr1.bc + store volatile %s8, * %ptr1.bc + store volatile %s9, * %ptr1.bc + store volatile %s11, * %ptr1.bc + store volatile %s12, * %ptr1.bc + store volatile %s13, * %ptr1.bc + store volatile %s14, * %ptr1.bc + store volatile %s15, * %ptr1.bc + store volatile %s16, * %ptr1.bc + ret void +} + +; Use AAVPCS, SVE register in z0-z7 used + +define void @aavpcs2(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15, %s16,float * %ptr) nounwind { +; CHECK-LABEL: aavpcs2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldp x8, x9, [sp] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x7] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x6] +; CHECK-NEXT: ld1w { z3.s }, p0/z, [x5] +; CHECK-NEXT: ld1w { z4.s }, p0/z, [x4] +; CHECK-NEXT: ld1w { z5.s }, p0/z, [x3] +; CHECK-NEXT: ld1w { z6.s }, p0/z, [x1] +; CHECK-NEXT: ld1w { z24.s }, p0/z, [x0] +; CHECK-NEXT: st1w { z7.s }, p0, [x9] +; CHECK-NEXT: st1w { z24.s }, p0, [x9] +; CHECK-NEXT: st1w { z6.s }, p0, [x9] +; CHECK-NEXT: st1w { z5.s }, p0, [x9] +; CHECK-NEXT: st1w { z4.s }, p0, [x9] +; CHECK-NEXT: st1w { z3.s }, p0, [x9] +; CHECK-NEXT: st1w { z2.s }, p0, [x9] +; CHECK-NEXT: st1w { z1.s }, p0, [x9] +; CHECK-NEXT: st1w { z0.s }, p0, [x9] +; CHECK-NEXT: ret +entry: + %ptr1.bc = bitcast float * %ptr to * + store volatile %s7, * %ptr1.bc + store volatile %s8, * %ptr1.bc + store volatile %s9, * %ptr1.bc + store volatile %s11, * %ptr1.bc + store volatile %s12, * %ptr1.bc + store volatile %s13, * %ptr1.bc + store volatile %s14, * %ptr1.bc + store volatile %s15, * %ptr1.bc + store volatile %s16, * %ptr1.bc + ret void +} + +; Use AAVPCS, no SVE register in z0-z7 used (floats occupy z0-z7) but predicate arg is used + +define void @aavpcs3(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15, %s16, %s17, %p0, float * %ptr) nounwind { +; CHECK-LABEL: aavpcs3: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr x8, [sp] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x7] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x6] +; CHECK-NEXT: ld1w { z3.s }, p0/z, [x5] +; CHECK-NEXT: ld1w { z4.s }, p0/z, [x4] +; CHECK-NEXT: ld1w { z5.s }, p0/z, [x3] +; CHECK-NEXT: ld1w { z6.s }, p0/z, [x2] +; CHECK-NEXT: ld1w { z7.s }, p0/z, [x1] +; CHECK-NEXT: ld1w { z24.s }, p0/z, [x0] +; CHECK-NEXT: ldr x8, [sp, #16] +; CHECK-NEXT: st1w { z24.s }, p0, [x8] +; CHECK-NEXT: st1w { z7.s }, p0, [x8] +; CHECK-NEXT: st1w { z6.s }, p0, [x8] +; CHECK-NEXT: st1w { z5.s }, p0, [x8] +; CHECK-NEXT: st1w { z4.s }, p0, [x8] +; CHECK-NEXT: st1w { z3.s }, p0, [x8] +; CHECK-NEXT: st1w { z2.s }, p0, [x8] +; CHECK-NEXT: st1w { z1.s }, p0, [x8] +; CHECK-NEXT: st1w { z0.s }, p0, [x8] +; CHECK-NEXT: ret +entry: + %ptr1.bc = bitcast float * %ptr to * + store volatile %s8, * %ptr1.bc + store volatile %s9, * %ptr1.bc + store volatile %s10, * %ptr1.bc + store volatile %s11, * %ptr1.bc + store volatile %s12, * %ptr1.bc + store volatile %s13, * %ptr1.bc + store volatile %s14, * %ptr1.bc + store volatile %s15, * %ptr1.bc + store volatile %s16, * %ptr1.bc + ret void +} + +; use AAVPCS, SVE register in z0-z7 used (i32s dont occupy z0-z7) + +define void @aavpcs4(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, i32 %s6, i32 %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15, %s16, %s17, i32 * %ptr) nounwind { +; CHECK-LABEL: aavpcs4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr x8, [sp] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldr x9, [sp, #16] +; CHECK-NEXT: ld1w { z24.s }, p0/z, [x8] +; CHECK-NEXT: st1w { z0.s }, p0, [x9] +; CHECK-NEXT: st1w { z1.s }, p0, [x9] +; CHECK-NEXT: st1w { z2.s }, p0, [x9] +; CHECK-NEXT: st1w { z3.s }, p0, [x9] +; CHECK-NEXT: st1w { z4.s }, p0, [x9] +; CHECK-NEXT: st1w { z5.s }, p0, [x9] +; CHECK-NEXT: st1w { z6.s }, p0, [x9] +; CHECK-NEXT: st1w { z7.s }, p0, [x9] +; CHECK-NEXT: st1w { z24.s }, p0, [x9] +; CHECK-NEXT: ret +entry: + %ptr1.bc = bitcast i32 * %ptr to * + store volatile %s8, * %ptr1.bc + store volatile %s9, * %ptr1.bc + store volatile %s10, * %ptr1.bc + store volatile %s11, * %ptr1.bc + store volatile %s12, * %ptr1.bc + store volatile %s13, * %ptr1.bc + store volatile %s14, * %ptr1.bc + store volatile %s15, * %ptr1.bc + store volatile %s16, * %ptr1.bc + ret void +} + +; Use AAPCS, no SVE register in z0-7 used (floats occupy z0-z7) + +define void @aapcs1(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15, %s16, %s17, float * %ptr) nounwind { +; CHECK-LABEL: aapcs1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr x8, [sp] +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x7] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x6] +; CHECK-NEXT: ld1w { z3.s }, p0/z, [x5] +; CHECK-NEXT: ld1w { z4.s }, p0/z, [x4] +; CHECK-NEXT: ld1w { z5.s }, p0/z, [x3] +; CHECK-NEXT: ld1w { z6.s }, p0/z, [x2] +; CHECK-NEXT: ld1w { z7.s }, p0/z, [x1] +; CHECK-NEXT: ld1w { z16.s }, p0/z, [x0] +; CHECK-NEXT: ldr x8, [sp, #16] +; CHECK-NEXT: st1w { z16.s }, p0, [x8] +; CHECK-NEXT: st1w { z7.s }, p0, [x8] +; CHECK-NEXT: st1w { z6.s }, p0, [x8] +; CHECK-NEXT: st1w { z5.s }, p0, [x8] +; CHECK-NEXT: st1w { z4.s }, p0, [x8] +; CHECK-NEXT: st1w { z3.s }, p0, [x8] +; CHECK-NEXT: st1w { z2.s }, p0, [x8] +; CHECK-NEXT: st1w { z1.s }, p0, [x8] +; CHECK-NEXT: st1w { z0.s }, p0, [x8] +; CHECK-NEXT: ret +entry: + %ptr1.bc = bitcast float * %ptr to * + store volatile %s8, * %ptr1.bc + store volatile %s9, * %ptr1.bc + store volatile %s10, * %ptr1.bc + store volatile %s11, * %ptr1.bc + store volatile %s12, * %ptr1.bc + store volatile %s13, * %ptr1.bc + store volatile %s14, * %ptr1.bc + store volatile %s15, * %ptr1.bc + store volatile %s16, * %ptr1.bc + ret void +} + declare float @callee1(float, , , ) declare float @callee2(i32, i32, i32, i32, i32, i32, i32, i32, float, , ) declare float @callee3(float, float, , , )