diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -166,7 +166,8 @@ // Vararg functions on windows pass floats in integer registers let Entry = 1 in def CC_AArch64_Win64_VarArg : CallingConv<[ - CCIfType<[f16, bf16, f32], CCPromoteToType>, + CCIfType<[f16, bf16], CCBitConvertToType>, + CCIfType<[f32], CCBitConvertToType>, CCIfType<[f64], CCBitConvertToType>, CCDelegateTo ]>; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4732,7 +4732,10 @@ else if (ActualMVT == MVT::i16) ValVT = MVT::i16; } - CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false); + bool UseVarArgCC = false; + if (IsWin64) + UseVarArgCC = isVarArg; + CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC); bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo); assert(!Res && "Call operand has unhandled type"); @@ -5362,6 +5365,8 @@ AArch64FunctionInfo *FuncInfo = MF.getInfo(); bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; bool IsSibCall = false; + bool IsWin64 = + Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()); // Check callee args/returns for SVE registers and set calling convention // accordingly. @@ -5411,8 +5416,12 @@ "currently not supported"); ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, - /*IsVarArg=*/ !Outs[i].IsFixed); + bool UseVarArgCC = !Outs[i].IsFixed; + // On Windows, the fixed arguments in a vararg call are passed in GPRs + // too, so use the vararg CC to force them to integer registers. + if (IsWin64) + UseVarArgCC = true; + CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC); bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); assert(!Res && "Call operand has unhandled type"); (void)Res; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -164,11 +164,17 @@ struct OutgoingArgHandler : public CallLowering::OutgoingValueHandler { OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstrBuilder MIB, CCAssignFn *AssignFn, - CCAssignFn *AssignFnVarArg, bool IsTailCall = false, - int FPDiff = 0) + CCAssignFn *AssignFnVarArg, bool IsVarArg, + bool IsTailCall = false, int FPDiff = 0) : OutgoingValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB), AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), FPDiff(FPDiff), - StackSize(0), SPReg(0) {} + StackSize(0), SPReg(0) { + MachineFunction &MF = MIRBuilder.getMF(); + const auto &Subtarget = MF.getSubtarget(); + bool IsWin = + Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); + UseVarArgsCCForFixed = IsVarArg && IsWin; + } Register getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO, @@ -240,7 +246,7 @@ ISD::ArgFlagsTy Flags, CCState &State) override { bool Res; - if (Info.IsFixed) + if (Info.IsFixed && !UseVarArgsCCForFixed) Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State); else Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State); @@ -252,6 +258,7 @@ MachineInstrBuilder MIB; CCAssignFn *AssignFnVarArg; bool IsTailCall; + bool UseVarArgsCCForFixed; /// For tail calls, the byte offset of the call's argument area from the /// callee's. Unused elsewhere. @@ -376,7 +383,8 @@ splitToValueTypes(CurArgInfo, SplitArgs, DL, CC); } - OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFn, AssignFn); + OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFn, AssignFn, + F.isVarArg()); Success = handleAssignments(MIRBuilder, SplitArgs, Handler, CC, F.isVarArg()); } @@ -879,7 +887,7 @@ // Do the actual argument marshalling. OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed, - AssignFnVarArg, true, FPDiff); + AssignFnVarArg, Info.IsVarArg, true, FPDiff); if (!handleAssignments(MIRBuilder, OutArgs, Handler, CalleeCC, Info.IsVarArg)) return false; @@ -991,7 +999,7 @@ // Do the actual argument marshalling. OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed, - AssignFnVarArg, false); + AssignFnVarArg, Info.IsVarArg, false); if (!handleAssignments(MIRBuilder, OutArgs, Handler, Info.CallConv, Info.IsVarArg)) return false; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-tail-call.ll @@ -181,9 +181,9 @@ ; WINDOWS: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 ; WINDOWS: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; WINDOWS: $w0 = COPY [[C]](s32) - ; WINDOWS: $d0 = COPY [[C1]](s64) - ; WINDOWS: $x1 = COPY [[C2]](s64) - ; WINDOWS: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1 + ; WINDOWS: $x1 = COPY [[C1]](s64) + ; WINDOWS: $x2 = COPY [[C2]](s64) + ; WINDOWS: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $x1, implicit $x2 tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12) ret void } @@ -217,10 +217,10 @@ ; WINDOWS: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; WINDOWS: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314 ; WINDOWS: $w0 = COPY [[C]](s32) - ; WINDOWS: $d0 = COPY [[C1]](s64) - ; WINDOWS: $x1 = COPY [[C2]](s64) - ; WINDOWS: $x2 = COPY [[C3]](s64) - ; WINDOWS: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1, implicit $x2 + ; WINDOWS: $x1 = COPY [[C1]](s64) + ; WINDOWS: $x2 = COPY [[C2]](s64) + ; WINDOWS: $x3 = COPY [[C3]](s64) + ; WINDOWS: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $x1, implicit $x2, implicit $x3 tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i64 314) ret void } @@ -276,10 +276,10 @@ ; WINDOWS: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 ; WINDOWS: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 314 ; WINDOWS: $w0 = COPY [[C]](s32) - ; WINDOWS: $d0 = COPY [[C1]](s64) - ; WINDOWS: $x1 = COPY [[C2]](s64) - ; WINDOWS: $x2 = COPY [[C3]](s64) - ; WINDOWS: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $d0, implicit $x1, implicit $x2 + ; WINDOWS: $x1 = COPY [[C1]](s64) + ; WINDOWS: $x2 = COPY [[C2]](s64) + ; WINDOWS: $x3 = COPY [[C3]](s64) + ; WINDOWS: TCRETURNdi @varargs, 0, csr_aarch64_aapcs, implicit $sp, implicit $w0, implicit $x1, implicit $x2, implicit $x3 tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12, i64 314) ret void } diff --git a/llvm/test/CodeGen/AArch64/win64_vararg_float.ll b/llvm/test/CodeGen/AArch64/win64_vararg_float.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/win64_vararg_float.ll @@ -0,0 +1,108 @@ +; RUN: llc < %s -mtriple=aarch64-windows -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,DAGISEL +; RUN: llc < %s -mtriple=aarch64-windows -verify-machineinstrs -O0 -fast-isel | FileCheck %s --check-prefixes=CHECK,O0 +; RUN: llc < %s -mtriple=aarch64-windows -verify-machineinstrs -O0 -global-isel | FileCheck %s --check-prefixes=CHECK,O0 + +define void @float_va_fn(float %a, i32 %b, ...) nounwind { +entry: +; CHECK-LABEL: float_va_fn: +; O0: str x7, [sp, #72] +; O0: str x6, [sp, #64] +; O0: str x5, [sp, #56] +; O0: str x4, [sp, #48] +; O0: str x3, [sp, #40] +; O0: str x2, [sp, #32] +; CHECK: fmov s0, w0 +; O0: add x8, sp, #32 +; O0: str x8, [sp, #8] +; O0: ldr x0, [sp, #8] +; DAGISEL: add x0, sp, #16 +; DAGISEL: stp x2, x3, [sp, #16] +; DAGISEL: stp x4, x5, [sp, #32] +; DAGISEL: stp x6, x7, [sp, #48] +; CHECK: bl f_va_list + %ap = alloca i8*, align 8 + %0 = bitcast i8** %ap to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0) + call void @llvm.va_start(i8* nonnull %0) + %1 = load i8*, i8** %ap, align 8 + call void @f_va_list(float %a, i8* %1) + call void @llvm.va_end(i8* nonnull %0) + call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0) + ret void +} + +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) +declare void @llvm.va_start(i8*) +declare void @f_va_list(float, i8*) +declare void @llvm.va_end(i8*) +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +define void @double_va_fn(double %a, i32 %b, ...) nounwind { +entry: +; CHECK-LABEL: double_va_fn: +; O0: str x7, [sp, #72] +; O0: str x6, [sp, #64] +; O0: str x5, [sp, #56] +; O0: str x4, [sp, #48] +; O0: str x3, [sp, #40] +; O0: str x2, [sp, #32] +; CHECK: fmov d0, x0 +; O0: add x8, sp, #32 +; O0: str x8, [sp, #8] +; O0: ldr x0, [sp, #8] +; DAGISEL: add x0, sp, #16 +; DAGISEL: stp x2, x3, [sp, #16] +; DAGISEL: stp x4, x5, [sp, #32] +; DAGISEL: stp x6, x7, [sp, #48] +; CHECK: bl d_va_list + %ap = alloca i8*, align 8 + %0 = bitcast i8** %ap to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0) + call void @llvm.va_start(i8* nonnull %0) + %1 = load i8*, i8** %ap, align 8 + call void @d_va_list(double %a, i8* %1) + call void @llvm.va_end(i8* nonnull %0) + call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0) + ret void +} + +declare void @d_va_list(double, i8*) + +define void @call_f_va() nounwind { +entry: +; CHECK-LABEL: call_f_va: +; DAGISEL: mov w0, #1065353216 +; FASTISEL: mov w0, #1065353216 +; GISEL: fmov s0, #1.00000000 +; GISEL: fmov w0, s0 +; CHECK: mov w1, #2 +; DAGISEL: mov x2, #4613937818241073152 +; FASTISEL: mov x2, #4613937818241073152 +; GISEL: fmov d0, #3.00000000 +; GISEL: fmov x2, d0 +; CHECK: mov w3, #4 +; CHECK: b other_f_va_fn + tail call void (float, i32, ...) @other_f_va_fn(float 1.000000e+00, i32 2, double 3.000000e+00, i32 4) + ret void +} + +declare void @other_f_va_fn(float, i32, ...) + +define void @call_d_va() nounwind { +entry: +; CHECK-LABEL: call_d_va: +; DAGISEL: mov x0, #4607182418800017408 +; FASTISEL: mov x0, #4607182418800017408 +; GISEL: fmov d0, #1.00000000 +; GISEL: fmov x0, d0 +; CHECK: mov w1, #2 +; DAGISEL: mov x2, #4613937818241073152 +; FASTISEL: mov x2, #4613937818241073152 +; GISEL: fmov d0, #3.00000000 +; CHECK: mov w3, #4 +; CHECK: b other_d_va_fn + tail call void (double, i32, ...) @other_d_va_fn(double 1.000000e+00, i32 2, double 3.000000e+00, i32 4) #4 + ret void +} + +declare dso_local void @other_d_va_fn(double, i32, ...)