Index: llvm/trunk/lib/Target/X86/X86CallingConv.td =================================================================== --- llvm/trunk/lib/Target/X86/X86CallingConv.td +++ llvm/trunk/lib/Target/X86/X86CallingConv.td @@ -24,6 +24,8 @@ list GPR_16 = []; list GPR_32 = []; list GPR_64 = []; + list FP_CALL = [FP0]; + list FP_RET = [FP0, FP1]; list XMM = []; list YMM = []; list ZMM = []; @@ -90,14 +92,14 @@ // TODO: Handle the case of mask types (v*i1) CCIfType<[v8i1, v16i1, v32i1], CCCustom<"CC_X86_RegCall_Error">>, - // TODO: Handle the case of long double (f80) - CCIfType<[f80], CCCustom<"CC_X86_RegCall_Error">>, - // float, double, float128 --> XMM // In the case of SSE disabled --> save to stack CCIfType<[f32, f64, f128], CCIfSubtarget<"hasSSE1()", CCAssignToReg>>, + // long double --> FP + CCIfType<[f80], CCAssignToReg>, + // __m128, __m128i, __m128d --> XMM // In the case of SSE disabled --> save to stack CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], @@ -129,7 +131,7 @@ // float 128 get stack slots whose size and alignment depends // on the subtarget. - CCIfType<[f128], CCAssignToStack<0, 0>>, + CCIfType<[f80, f128], CCAssignToStack<0, 0>>, // Vectors get 16-byte stack slots that are 16-byte aligned. CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], @@ -166,7 +168,7 @@ CCIfType<[v8i1, v16i1, v32i1], CCCustom<"CC_X86_RegCall_Error">>, // long double --> FP - CCIfType<[f80], CCAssignToReg<[FP0]>>, + CCIfType<[f80], CCAssignToReg>, // float, double, float128 --> XMM CCIfType<[f32, f64, f128], Index: llvm/trunk/lib/Target/X86/X86FloatingPoint.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86FloatingPoint.cpp +++ llvm/trunk/lib/Target/X86/X86FloatingPoint.cpp @@ -206,6 +206,13 @@ RegMap[Reg] = StackTop++; } + // popReg - Pop a register from the stack. + void popReg() { + if (StackTop == 0) + report_fatal_error("Cannot pop empty stack!"); + RegMap[Stack[--StackTop]] = ~0; // Update state + } + bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; } void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) { DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); @@ -329,6 +336,25 @@ df_iterator_default_set Processed; MachineBasicBlock *Entry = &MF.front(); + LiveBundle &Bundle = + LiveBundles[Bundles->getBundle(Entry->getNumber(), false)]; + + // In regcall convention, some FP registers may not be passed through + // the stack, so they will need to be assigned to the stack first + if ((Entry->getParent()->getFunction()->getCallingConv() == + CallingConv::X86_RegCall) && (Bundle.Mask && !Bundle.FixCount)) { + // In the register calling convention, up to one FP argument could be + // saved in the first FP register. + // If bundle.mask is non-zero and Bundle.FixCount is zero, it means + // that the FP registers contain arguments. + // The actual value is passed in FP0. + // Here we fix the stack and mark FP0 as pre-assigned register. + assert((Bundle.Mask & 0xFE) == 0 && + "Only FP0 could be passed as an argument"); + Bundle.FixCount = 1; + Bundle.FixStack[0] = 0; + } + bool Changed = false; for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed)) Changed |= processBasicBlock(MF, *BB); @@ -791,9 +817,8 @@ MachineInstr &MI = *I; const DebugLoc &dl = MI.getDebugLoc(); ASSERT_SORTED(PopTable); - if (StackTop == 0) - report_fatal_error("Cannot pop empty stack!"); - RegMap[Stack[--StackTop]] = ~0; // Update state + + popReg(); // Check to see if there is a popping version of this instruction... int Opcode = Lookup(PopTable, I->getOpcode()); @@ -929,6 +954,7 @@ void FPS::handleCall(MachineBasicBlock::iterator &I) { unsigned STReturns = 0; + const MachineFunction* MF = I->getParent()->getParent(); for (const auto &MO : I->operands()) { if (!MO.isReg()) @@ -937,7 +963,10 @@ unsigned R = MO.getReg() - X86::FP0; if (R < 8) { - assert(MO.isDef() && MO.isImplicit()); + if (MF->getFunction()->getCallingConv() != CallingConv::X86_RegCall) { + assert(MO.isDef() && MO.isImplicit()); + } + STReturns |= 1 << R; } } @@ -945,9 +974,15 @@ unsigned N = countTrailingOnes(STReturns); // FP registers used for function return must be consecutive starting at - // FP0. + // FP0 assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2)); + // Reset the FP Stack - It is required because of possible leftovers from + // passed arguments. The caller should assume that the FP stack is + // returned empty (unless the callee returns values on FP stack). + while (StackTop > 0) + popReg(); + for (unsigned I = 0; I < N; ++I) pushReg(N - I - 1); } Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -2816,6 +2816,8 @@ RC = &X86::FR32RegClass; else if (RegVT == MVT::f64) RC = &X86::FR64RegClass; + else if (RegVT == MVT::f80) + RC = &X86::RFP80RegClass; else if (RegVT == MVT::f128) RC = &X86::FR128RegClass; else if (RegVT.is512BitVector()) Index: llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll +++ llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll @@ -246,6 +246,44 @@ ret double %d } +; X32: test_argRetf80 +; X32-NOT: fldt +; X32: fadd %st(0), %st(0) +; X32: retl + +; WIN64: test_argRetf80 +; WIN64-NOT: fldt +; WIN64: fadd %st(0), %st(0) +; WIN64: retq + +; Test regcall when receiving/returning long double +define x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %a0) nounwind { + %r0 = fadd x86_fp80 %a0, %a0 + ret x86_fp80 %r0 +} + +; X32: test_CallargRetf80 +; X32-NOT: fldt +; X32: fadd %st({{[0-7]}}), %st({{[0-7]}}) +; X32: call{{.*}} {{.*}}test_argRetf80 +; X32: fadd{{.*}} %st({{[0-7]}}) +; X32: retl + +; WIN64: test_CallargRetf80 +; WIN64-NOT: fldt +; WIN64: fadd %st({{[0-7]}}), %st({{[0-7]}}) +; WIN64: call{{.*}} {{.*}}test_argRetf80 +; WIN64: fadd{{.*}} %st({{[0-7]}}) +; WIN64: retq + +; Test regcall when passing/retrieving long double +define x86_regcallcc x86_fp80 @test_CallargRetf80(x86_fp80 %a) { + %b = fadd x86_fp80 %a, %a + %c = call x86_regcallcc x86_fp80 @test_argRetf80(x86_fp80 %b) + %d = fadd x86_fp80 %c, %c + ret x86_fp80 %d +} + ; X32-LABEL: test_argRetPointer: ; X32: incl %eax ; X32: ret{{.*}}