diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -20,12 +20,50 @@ #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/LowLevelTypeImpl.h" using namespace llvm; +namespace { + +struct OutgoingArgHandler : public CallLowering::ValueHandler { + OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + MachineInstrBuilder MIB, CCAssignFn *AssignFn) + : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {} + + MachineInstrBuilder MIB; + + unsigned getStackAddress(uint64_t Size, int64_t Offset, + MachinePointerInfo &MPO) override { + llvm_unreachable("not implemented"); + return 0; + } + + void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size, + MachinePointerInfo &MPO, CCValAssign &VA) override { + llvm_unreachable("not implemented"); + } + + void assignValueToReg(unsigned ValVReg, unsigned PhysReg, + CCValAssign &VA) override { + MIB.addUse(PhysReg); + MIRBuilder.buildCopy(PhysReg, ValVReg); + } + + bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + const CallLowering::ArgInfo &Info, + CCState &State) override { + return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); + } +}; + +} + AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) : CallLowering(&TLI) { } @@ -33,11 +71,44 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef VRegs) const { - // FIXME: Add support for non-void returns. - if (Val) + + MachineFunction &MF = MIRBuilder.getMF(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + SIMachineFunctionInfo *MFI = MF.getInfo(); + MFI->setIfReturnsVoid(!Val); + + if (!Val) { + MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0); + return true; + } + + unsigned VReg = VRegs[0]; + + const Function &F = MF.getFunction(); + auto &DL = F.getParent()->getDataLayout(); + if (!AMDGPU::isShader(F.getCallingConv())) + return false; + + + const AMDGPUTargetLowering &TLI = *getTLI(); + SmallVector SplitVTs; + SmallVector Offsets; + ArgInfo OrigArg{VReg, Val->getType()}; + setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); + ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); + + SmallVector SplitArgs; + CCAssignFn *AssignFn = CCAssignFnForReturn(F.getCallingConv(), false); + for (unsigned i = 0, e = Offsets.size(); i != e; ++i) { + Type *SplitTy = SplitVTs[i].getTypeForEVT(F.getContext()); + SplitArgs.push_back({VRegs[i], SplitTy, OrigArg.Flags, OrigArg.IsFixed}); + } + auto RetInstr = MIRBuilder.buildInstrNoInsert(AMDGPU::SI_RETURN_TO_EPILOG); + OutgoingArgHandler Handler(MIRBuilder, MRI, RetInstr, AssignFn); + if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) return false; + MIRBuilder.insertInstr(RetInstr); - MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0); return true; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll @@ -55,6 +55,27 @@ ret void } +; CHECK-LABEL: name: ret_struct +; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr0 +; CHECK: [[S1:%[0-9]+]]:_(s32) = COPY $sgpr1 +; CHECK: $sgpr0 = COPY [[S0]] +; CHECK: $sgpr1 = COPY [[S1]] +; CHECK: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 +define amdgpu_vs <{ i32, i32 }> @ret_struct(i32 inreg %arg0, i32 inreg %arg1) { +main_body: + %tmp0 = insertvalue <{ i32, i32 }> undef, i32 %arg0, 0 + %tmp1 = insertvalue <{ i32, i32 }> %tmp0, i32 %arg1, 1 + ret <{ i32, i32 }> %tmp1 +} + +; CHECK_LABEL: name: non_void_ret +; CHECK: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK: $sgpr0 = COPY [[ZERO]] +; SI_RETURN_TO_EPILOG $sgpr0 +define amdgpu_vs i32 @non_void_ret() { + ret i32 0 +} + declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/todo.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/todo.ll deleted file mode 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/todo.ll +++ /dev/null @@ -1,10 +0,0 @@ -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s - -; This isn't implemented, but we need to make sure we fall back to SelectionDAG -; instead of generating wrong code. -; CHECK: warning: Instruction selection used fallback path for non_void_ret -; CHECK: non_void_ret: -; CHECK-NOT: s_endpgm -define amdgpu_vs i32 @non_void_ret() { - ret i32 0 -}