Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -108,6 +108,7 @@ bool selectDivScale(MachineInstr &MI) const; bool selectIntrinsicIcmp(MachineInstr &MI) const; bool selectBallot(MachineInstr &I) const; + bool selectReturnAddress(MachineInstr &I) const; bool selectG_INTRINSIC(MachineInstr &I) const; bool selectEndCfIntrinsic(MachineInstr &MI) const; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -902,6 +902,8 @@ return selectIntrinsicIcmp(I); case Intrinsic::amdgcn_ballot: return selectBallot(I); + case Intrinsic::returnaddress: + return selectReturnAddress(I); default: return selectImpl(I, *CoverageInfo); } @@ -1082,6 +1084,45 @@ return true; } +bool AMDGPUInstructionSelector::selectReturnAddress(MachineInstr &I) const { + MachineBasicBlock *MBB = I.getParent(); + MachineFunction &MF = *MBB->getParent(); + const DebugLoc &DL = I.getDebugLoc(); + + MachineOperand &Dst = I.getOperand(0); + Register DstReg = Dst.getReg(); + unsigned Depth = I.getOperand(2).getImm(); + + const TargetRegisterClass *RC + = TRI.getConstrainedRegClassForOperand(Dst, *MRI); + if (!RC->hasSubClassEq(&AMDGPU::SGPR_64RegClass) || + !RBI.constrainGenericRegister(DstReg, *RC, *MRI)) + return false; + + // Check for kernel and shader functions + if (Depth != 0 || + MF.getInfo()->isEntryFunction()) { + BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg) + .addImm(0); + I.eraseFromParent(); + return true; + } + + MachineFrameInfo &MFI = MF.getFrameInfo(); + // There is a call to @llvm.returnaddress in this function + MFI.setReturnAddressIsTaken(true); + + // Get the return address reg and mark it as an implicit live-in + Register ReturnAddrReg = TRI.getReturnAddressReg(MF); + Register LiveIn = getFunctionLiveInPhysReg(MF, TII, ReturnAddrReg, + AMDGPU::SReg_64RegClass); + BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), DstReg) + .addReg(LiveIn); + I.eraseFromParent(); + + return true; +} + bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const { // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick // SelectionDAG uses for wave32 vs wave64. Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -4021,7 +4021,8 @@ return getDefaultMappingAllVGPR(MI); case Intrinsic::amdgcn_kernarg_segment_ptr: case Intrinsic::amdgcn_s_getpc: - case Intrinsic::amdgcn_groupstaticsize: { + case Intrinsic::amdgcn_groupstaticsize: + case Intrinsic::returnaddress: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-returnaddress.mir @@ -0,0 +1,122 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: return_address_already_live_in_copy +legalized: true +regBankSelected: true +tracksRegLiveness: true +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%0' } + +body: | + bb.0: + liveins: $sgpr30_sgpr31 + ; CHECK-LABEL: name: return_address_already_live_in_copy + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY]] + %0:sgpr(p0) = COPY $sgpr30_sgpr31 + %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + S_ENDPGM 0, implicit %0, implicit %1 +... + +--- +name: return_address_already_block_live_in_copy_not_mf_life_in +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr30_sgpr31 + ; CHECK-LABEL: name: return_address_already_block_live_in_copy_not_mf_life_in + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_ENDPGM 0, implicit [[COPY1]], implicit [[COPY]] + %0:sgpr(p0) = COPY $sgpr30_sgpr31 + %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + S_ENDPGM 0, implicit %0, implicit %1 +... + +--- +name: return_address_no_live_in +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + + ; CHECK-LABEL: name: return_address_no_live_in + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_ENDPGM 0, implicit [[COPY]] + %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + S_ENDPGM 0, implicit %0 +... + +--- +name: return_address_no_live_in_non_entry_block +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + ; CHECK-LABEL: name: return_address_no_live_in_non_entry_block + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_BRANCH %bb.1 + ; CHECK: bb.1: + ; CHECK: S_ENDPGM 0, implicit [[COPY]] + bb.0: + G_BR %bb.1 + + bb.1: + %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + S_ENDPGM 0, implicit %0 +... + +--- +name: return_address_multi_use +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + ; CHECK-LABEL: name: return_address_multi_use + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; CHECK: S_BRANCH %bb.1 + ; CHECK: bb.1: + ; CHECK: S_ENDPGM 0, implicit [[COPY]], implicit [[COPY]] + bb.0: + %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + G_BR %bb.1 + + bb.1: + %1:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + S_ENDPGM 0, implicit %0, implicit %1 +... + +--- +name: return_address_kernel_is_null +legalized: true +regBankSelected: true +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + +body: | + bb.0: + ; CHECK-LABEL: name: return_address_kernel_is_null + ; CHECK: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; CHECK: S_ENDPGM 0, implicit [[S_MOV_B64_]] + %0:sgpr(p0) = G_INTRINSIC intrinsic(@llvm.returnaddress), 0 + S_ENDPGM 0, implicit %0 +... Index: llvm/test/CodeGen/AMDGPU/returnaddress.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/returnaddress.ll +++ llvm/test/CodeGen/AMDGPU/returnaddress.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s +; RUN: llc -global-isel -amdgpu-fixed-function-abi -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s ; Test with zero frame ; GCN-LABEL: {{^}}func1 @@ -25,7 +26,7 @@ ; Test with amdgpu_kernel ; GCN-LABEL: {{^}}func3 ; GCN: v_mov_b32_e32 v0, 0 -; GCN: v_mov_b32_e32 v1, v0 +; GCN: v_mov_b32_e32 v1, {{v0|0}} define amdgpu_kernel void @func3(i8** %out) nounwind { entry: %tmp = tail call i8* @llvm.returnaddress(i32 0) @@ -36,7 +37,7 @@ ; Test with use outside the entry-block ; GCN-LABEL: {{^}}func4 ; GCN: v_mov_b32_e32 v0, 0 -; GCN: v_mov_b32_e32 v1, v0 +; GCN: v_mov_b32_e32 v1, {{v0|0}} define amdgpu_kernel void @func4(i8** %out, i32 %val) nounwind { entry: %cmp = icmp ne i32 %val, 0 @@ -61,5 +62,22 @@ unreachable } +declare void @callee() + +; GCN-LABEL: {{^}}multi_use: +; GCN-DAG: v_mov_b32_e32 v[[LO:4[0-9]+]], s30 +; GCN-DAG: v_mov_b32_e32 v[[HI:4[0-9]+]], s31 +; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} +; GCN: s_swappc_b64 +; GCN: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} +define void @multi_use() nounwind { +entry: + %ret0 = tail call i8* @llvm.returnaddress(i32 0) + store volatile i8* %ret0, i8* addrspace(1)* undef + call void @callee() + %ret1 = tail call i8* @llvm.returnaddress(i32 0) + store volatile i8* %ret1, i8* addrspace(1)* undef + ret void +} declare i8* @llvm.returnaddress(i32) nounwind readnone