Index: lib/Target/X86/X86CallingConv.td =================================================================== --- lib/Target/X86/X86CallingConv.td +++ lib/Target/X86/X86CallingConv.td @@ -24,6 +24,7 @@ list GPR_16 = []; list GPR_32 = []; list GPR_64 = []; + list FP = [FP0]; list XMM = []; list YMM = []; list ZMM = []; @@ -85,14 +86,14 @@ // long long, __int64 --> GPR CCIfType<[i64], CCAssignToReg>, - // TODO: Handle the case of long double (f80) - CCIfType<[f80], CCCustom<"CC_X86_RegCall_Error">>, - // float, double, float128 --> XMM // In the case of SSE disabled --> save to stack CCIfType<[f32, f64, f128], CCIfSubtarget<"hasSSE1()", CCAssignToReg>>, + // long double --> FP + CCIfType<[f80], CCAssignToReg>, + // __m128, __m128i, __m128d --> XMM // In the case of SSE disabled --> save to stack CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], @@ -124,7 +125,7 @@ // float 128 get stack slots whose size and alignment depends // on the subtarget. - CCIfType<[f128], CCAssignToStack<0, 0>>, + CCIfType<[f80, f128], CCAssignToStack<0, 0>>, // Vectors get 16-byte stack slots that are 16-byte aligned. CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], @@ -156,7 +157,7 @@ CCIfType<[i64], CCAssignToReg>, // long double --> FP - CCIfType<[f80], CCAssignToReg<[FP0]>>, + CCIfType<[f80], CCAssignToReg>, // float, double, float128 --> XMM CCIfType<[f32, f64, f128], Index: lib/Target/X86/X86FloatingPoint.cpp =================================================================== --- lib/Target/X86/X86FloatingPoint.cpp +++ lib/Target/X86/X86FloatingPoint.cpp @@ -467,7 +467,7 @@ << " derived from " << MBB->getName() << ".\n"); StackTop = 0; // Get the live-in bundle for MBB. - const LiveBundle &Bundle = + LiveBundle &Bundle = LiveBundles[Bundles->getBundle(MBB->getNumber(), false)]; if (!Bundle.Mask) { @@ -476,7 +476,23 @@ } // Depth-first iteration should ensure that we always have an assigned stack. - assert(Bundle.isFixed() && "Reached block before any predecessors"); + // However in regcall convention, some FP registers may not be passed through + // the stack, so they will need to be assigned to the stack first + if (MBB->getParent()->getFunction()->getCallingConv() != + CallingConv::X86_RegCall) { + assert(Bundle.isFixed() && "Reached block before any predecessors"); + } else if (Bundle.Mask && !Bundle.FixCount) { + // In the register calling convention, up to one FP argument could be + // saved in the first FP register. + // If bundle.mask is non-zero and Bundle.FixCount is zero, it means + // that the FP registers contain arguments. + // The actual value is passed in FP0. + // Here we fix the stack and mark FP0 as pre-assigned register. + assert((Bundle.Mask & 0xFE) == 0 && + "Only FP0 could be passed as an argument"); + Bundle.FixCount = 1; + Bundle.FixStack[0] = Stack[0]; + } // Push the fixed live-in registers. for (unsigned i = Bundle.FixCount; i > 0; --i) { @@ -937,7 +953,14 @@ unsigned R = MO.getReg() - X86::FP0; if (R < 8) { - assert(MO.isDef() && MO.isImplicit()); + const MachineFunction* MF = MO.getParent()->getParent()->getParent(); + if (MF->getFunction()->getCallingConv() + != CallingConv::X86_RegCall) + { + assert(MO.isDef() && MO.isImplicit()); + assert(StackTop == 0 && "No arguments should be on the stack"); + } + STReturns |= 1 << R; } } @@ -945,8 +968,9 @@ unsigned N = countTrailingOnes(STReturns); // FP registers used for function return must be consecutive starting at - // FP0. + // FP0. So we assume the FP stack is empty. assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2)); + StackTop = 0; for (unsigned I = 0; I < N; ++I) pushReg(N - I - 1); Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -2647,6 +2647,8 @@ RC = &X86::FR32RegClass; else if (RegVT == MVT::f64) RC = &X86::FR64RegClass; + else if (RegVT == MVT::f80) + RC = &X86::RFP80RegClass; else if (RegVT == MVT::f128) RC = &X86::FR128RegClass; else if (RegVT.is512BitVector()) Index: test/CodeGen/X86/avx512-regcall-NoMask.ll =================================================================== --- test/CodeGen/X86/avx512-regcall-NoMask.ll +++ test/CodeGen/X86/avx512-regcall-NoMask.ll @@ -246,6 +246,44 @@ ret double %d } +; X32: test_argRetf80 +; X32-NOT: fldt +; X32: fadd %st(0), %st(0) +; X32: retl + +; WIN64: test_argRetf80 +; WIN64-NOT: fldt +; WIN64: fadd %st(0), %st(0) +; WIN64: retq + +; Test regcall when receiving/returning long double +define x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %a0) nounwind { + %r0 = fadd x86_fp80 %a0, %a0 + ret x86_fp80 %r0 +} + +; X32: test_CallargRetf80 +; X32-NOT: fldt +; X32: fadd %st({{[0-7]}}), %st({{[0-7]}}) +; X32: call{{.*}} {{.*}}test_argRetf80 +; X32: fadd{{.*}} %st({{[0-7]}}) +; X32: retl + +; WIN64: test_CallargRetf80 +; WIN64-NOT: fldt +; WIN64: fadd %st({{[0-7]}}), %st({{[0-7]}}) +; WIN64: call{{.*}} {{.*}}test_argRetf80 +; WIN64: fadd{{.*}} %st({{[0-7]}}) +; WIN64: retq + +; Test regcall when passing/retrieving long double +define x86_regcallcc x86_fp80 @test_CallargRetf80(x86_fp80 %a) { + %b = fadd x86_fp80 %a, %a + %c = call x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %b) + %d = fadd x86_fp80 %c, %c + ret x86_fp80 %d +} + ; X32-LABEL: test_argRetPointer: ; X32: incl %eax ; X32: ret{{.*}}