diff --git a/llvm/lib/Target/X86/X86CallLowering.h b/llvm/lib/Target/X86/X86CallLowering.h --- a/llvm/lib/Target/X86/X86CallLowering.h +++ b/llvm/lib/Target/X86/X86CallLowering.h @@ -36,6 +36,10 @@ bool lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const override; + + bool canLowerReturn(MachineFunction &MF, CallingConv::ID CallConv, + SmallVectorImpl &Outs, + bool IsVarArg) const override; }; } // end namespace llvm diff --git a/llvm/lib/Target/X86/X86CallLowering.cpp b/llvm/lib/Target/X86/X86CallLowering.cpp --- a/llvm/lib/Target/X86/X86CallLowering.cpp +++ b/llvm/lib/Target/X86/X86CallLowering.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/LowLevelType.h" @@ -129,15 +130,29 @@ } // end anonymous namespace +bool X86CallLowering::canLowerReturn( + MachineFunction &MF, CallingConv::ID CallConv, + SmallVectorImpl &Outs, bool IsVarArg) const { + LLVMContext &Context = MF.getFunction().getContext(); + SmallVector RVLocs; + CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); + return checkReturn(CCInfo, Outs, RetCC_X86); +} + bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef VRegs, FunctionLoweringInfo &FLI) const { assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) && "Return value without a vreg"); + MachineFunction &MF = MIRBuilder.getMF(); auto MIB = MIRBuilder.buildInstrNoInsert(X86::RET).addImm(0); + const X86Subtarget &STI = MF.getSubtarget(); + bool Is64Bit = STI.is64Bit(); - if (!VRegs.empty()) { - MachineFunction &MF = MIRBuilder.getMF(); + if (!FLI.CanLowerReturn) { + insertSRetStores(MIRBuilder, Val->getType(), VRegs, FLI.DemoteRegister); + MIRBuilder.buildCopy(Is64Bit ? X86::RAX : X86::EAX, FLI.DemoteRegister); + } else if (!VRegs.empty()) { const Function &F = MF.getFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); const DataLayout &DL = MF.getDataLayout(); @@ -238,18 +253,19 @@ const Function &F, ArrayRef> VRegs, FunctionLoweringInfo &FLI) const { - if (F.arg_empty()) - return true; - - // TODO: handle variadic function - if (F.isVarArg()) - return false; - MachineFunction &MF = MIRBuilder.getMF(); MachineRegisterInfo &MRI = MF.getRegInfo(); auto DL = MF.getDataLayout(); SmallVector SplitArgs; + + if (!FLI.CanLowerReturn) + insertSRetIncomingArgument(F, SplitArgs, FLI.DemoteRegister, MRI, DL); + + // TODO: handle variadic function + if (F.isVarArg()) + return false; + unsigned Idx = 0; for (const auto &Arg : F.args()) { // TODO: handle not simple cases. @@ -267,6 +283,9 @@ Idx++; } + if (SplitArgs.empty()) + return true; + MachineBasicBlock &MBB = MIRBuilder.getMBB(); if (!MBB.empty()) MIRBuilder.setInstr(*MBB.begin()); @@ -363,7 +382,7 @@ // symmetry with the arguments, the physical register must be an // implicit-define of the call instruction. - if (!Info.OrigRet.Ty->isVoidTy()) { + if (Info.CanLowerReturn && !Info.OrigRet.Ty->isVoidTy()) { if (Info.OrigRet.Regs.size() > 1) return false; @@ -391,5 +410,9 @@ .addImm(Assigner.getStackSize()) .addImm(0 /* NumBytesForCalleeToPop */); + if (!Info.CanLowerReturn) + insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs, + Info.DemoteRegister, Info.DemoteStackIndex); + return true; } diff --git a/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll b/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll --- a/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/irtranslator-callingconv.ll @@ -716,3 +716,60 @@ call void (ptr, ...) @variadic_callee(ptr %addr, double %val) ret void } + +; Return value is in memory unless subtarget is AVX or higher. +define <32 x float> @test_return_v32f32() { + ; X86-LABEL: name: test_return_v32f32 + ; X86: bb.1 (%ir-block.0): + ; X86-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 + ; X86-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16) + ; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; X86-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) + ; X86-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[LOAD]](p0) :: (store (<32 x s32>)) + ; X86-NEXT: $eax = COPY [[LOAD]](p0) + ; X86-NEXT: RET 0 + ; X64-LABEL: name: test_return_v32f32 + ; X64: bb.1 (%ir-block.0): + ; X64-NEXT: liveins: $rdi + ; X64-NEXT: {{ $}} + ; X64-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $rdi + ; X64-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 + ; X64-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) + ; X64-NEXT: G_STORE [[BUILD_VECTOR]](<32 x s32>), [[COPY]](p0) :: (store (<32 x s32>)) + ; X64-NEXT: $rax = COPY [[COPY]](p0) + ; X64-NEXT: RET 0 + ret <32 x float> zeroinitializer +} + +define float @test_call_v32f32() { + ; X86-LABEL: name: test_call_v32f32 + ; X86: bb.1 (%ir-block.0): + ; X86-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; X86-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; X86-NEXT: ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; X86-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $esp + ; X86-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; X86-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s32) + ; X86-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1) + ; X86-NEXT: CALLpcrel32 @test_return_v32f32, csr_32, implicit $esp, implicit $ssp + ; X86-NEXT: ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp + ; X86-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (load (<32 x s32>) from %stack.0) + ; X86-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<32 x s32>), [[C]](s32) + ; X86-NEXT: $fp0 = COPY [[EVEC]](s32) + ; X86-NEXT: RET 0, implicit $fp0 + ; X64-LABEL: name: test_call_v32f32 + ; X64: bb.1 (%ir-block.0): + ; X64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 + ; X64-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 + ; X64-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; X64-NEXT: $rdi = COPY [[FRAME_INDEX]](p0) + ; X64-NEXT: CALL64pcrel32 @test_return_v32f32, csr_64, implicit $rsp, implicit $ssp, implicit $rdi + ; X64-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp + ; X64-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (load (<32 x s32>) from %stack.0) + ; X64-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<32 x s32>), [[C]](s64) + ; X64-NEXT: $xmm0 = COPY [[EVEC]](s32) + ; X64-NEXT: RET 0, implicit $xmm0 + %vect = call <32 x float> @test_return_v32f32() + %elt = extractelement <32 x float> %vect, i32 7 + ret float %elt +}