Index: lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- lib/Target/AMDGPU/SIISelLowering.h +++ lib/Target/AMDGPU/SIISelLowering.h @@ -89,7 +89,7 @@ SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; - + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue adjustLoadValueType(unsigned Opcode, MemSDNode *M, SelectionDAG &DAG, ArrayRef Ops, bool IsIntrinsic = false) const; Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -3695,6 +3695,7 @@ switch (Op.getOpcode()) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); case ISD::LOAD: { SDValue Result = LowerLOAD(Op, DAG); assert((!Result.getNode() || @@ -4153,6 +4154,31 @@ return Chain; } +SDValue SITargetLowering::LowerRETURNADDR(SDValue Op, + SelectionDAG &DAG) const { + MVT VT = Op.getSimpleValueType(); + SDLoc DL(Op); + // Checking the depth + if (cast(Op.getOperand(0))->getZExtValue() != 0) + return DAG.getConstant(0, DL, VT); + + MachineFunction &MF = DAG.getMachineFunction(); + const SIMachineFunctionInfo *Info = MF.getInfo(); + // Check for kernel and shader functions + if (Info->isEntryFunction()) + return DAG.getConstant(0, DL, VT); + + MachineFrameInfo &MFI = MF.getFrameInfo(); + // There is a call to @llvm.returnaddress in this function + MFI.setReturnAddressIsTaken(true); + + const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo(); + // Get the return address reg and mark it as an implicit live-in + unsigned Reg = MF.addLiveIn(TRI->getReturnAddressReg(MF), getRegClassFor(VT)); + + return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT); +} + SDValue SITargetLowering::getFPExtOrFPTrunc(SelectionDAG &DAG, SDValue Op, const SDLoc &DL, Index: returnadr.diff =================================================================== --- /dev/null +++ returnadr.diff @@ -0,0 +1,127 @@ +diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp +index c2cda5ef4d7..a59b54902b2 100644 +--- a/lib/Target/AMDGPU/SIISelLowering.cpp ++++ b/lib/Target/AMDGPU/SIISelLowering.cpp +@@ -3695,6 +3695,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + switch (Op.getOpcode()) { + default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); + case ISD::BRCOND: return LowerBRCOND(Op, DAG); ++ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); + case ISD::LOAD: { + SDValue Result = LowerLOAD(Op, DAG); + assert((!Result.getNode() || +@@ -4153,6 +4154,31 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, + return Chain; + } + ++SDValue SITargetLowering::LowerRETURNADDR(SDValue Op, ++ SelectionDAG &DAG) const { ++ MVT VT = Op.getSimpleValueType(); ++ SDLoc DL(Op); ++ // Checking the depth ++ if (cast(Op.getOperand(0))->getZExtValue() != 0) ++ return DAG.getConstant(0, DL, VT); ++ ++ MachineFunction &MF = DAG.getMachineFunction(); ++ const SIMachineFunctionInfo *Info = MF.getInfo(); ++ // Check for kernel and shader functions ++ if (Info->isEntryFunction()) ++ return DAG.getConstant(0, DL, VT); ++ ++ MachineFrameInfo &MFI = MF.getFrameInfo(); ++ // There is a call to @llvm.returnaddress in this function ++ MFI.setReturnAddressIsTaken(true); ++ ++ const SIRegisterInfo *TRI = getSubtarget()->getRegisterInfo(); ++ // Get the return address reg and mark it as an implicit live-in ++ unsigned Reg = MF.addLiveIn(TRI->getReturnAddressReg(MF), getRegClassFor(VT)); ++ ++ return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT); ++} ++ + SDValue SITargetLowering::getFPExtOrFPTrunc(SelectionDAG &DAG, + SDValue Op, + const SDLoc &DL, +diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h +index 60a474f51e5..227a409ebe2 100644 +--- a/lib/Target/AMDGPU/SIISelLowering.h ++++ b/lib/Target/AMDGPU/SIISelLowering.h +@@ -89,7 +89,7 @@ private: + SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; +- ++ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue adjustLoadValueType(unsigned Opcode, MemSDNode *M, + SelectionDAG &DAG, ArrayRef Ops, + bool IsIntrinsic = false) const; +diff --git a/test/CodeGen/AMDGPU/returnaddress.ll b/test/CodeGen/AMDGPU/returnaddress.ll +new file mode 100644 +index 00000000000..9795d96dd07 +--- /dev/null ++++ b/test/CodeGen/AMDGPU/returnaddress.ll +@@ -0,0 +1,64 @@ ++; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s ++ ++; Test with zero frame ++; GCN-LABEL: {{^}}func1 ++; GCN: v_mov_b32_e32 v0, s30 ++; GCN: v_mov_b32_e32 v1, s31 ++; GCN: s_setpc_b64 s[30:31] ++define i8* @func1() nounwind { ++entry: ++ %0 = tail call i8* @llvm.returnaddress(i32 0) ++ ret i8* %0 ++} ++ ++; Test with non-zero frame ++; GCN-LABEL: {{^}}func2 ++; GCN: v_mov_b32_e32 v0, 0 ++; GCN: v_mov_b32_e32 v1, 0 ++; GCN: s_setpc_b64 s[30:31] ++define i8* @func2() nounwind { ++entry: ++ %0 = tail call i8* @llvm.returnaddress(i32 1) ++ ret i8* %0 ++} ++ ++; Test with amdgpu_kernel ++; GCN-LABEL: {{^}}func3 ++; GCN: v_mov_b32_e32 v0, 0 ++; GCN: v_mov_b32_e32 v1, v0 ++define amdgpu_kernel void @func3(i8** %out) nounwind { ++entry: ++ %tmp = tail call i8* @llvm.returnaddress(i32 0) ++ store i8* %tmp, i8** %out, align 4 ++ ret void ++} ++ ++; Test without an entry-block ++; GCN-LABEL: {{^}}func4 ++; GCN: v_mov_b32_e32 v0, 0 ++; GCN: v_mov_b32_e32 v1, v0 ++define amdgpu_kernel void @func4(i8** %out) nounwind { ++entry: ++ %tmp = tail call i8* @llvm.returnaddress(i32 1) ++ br label %exit ++ ++exit: ++ store i8* %tmp, i8** %out, align 4 ++ ret void ++} ++ ++; Test ending in unreachable ++; GCN-LABEL: {{^}}func5 ++; GCN: v_mov_b32_e32 v0, 0 ++define void @func5() nounwind { ++entry: ++ %tmp = tail call i8* @llvm.returnaddress(i32 2) ++ br label %unreachable ++ ++unreachable: ++ store volatile i32 0, i32 addrspace(3)* undef, align 4 ++ unreachable ++} ++ ++ ++declare i8* @llvm.returnaddress(i32) nounwind readnone Index: test/CodeGen/AMDGPU/returnaddress.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/returnaddress.ll @@ -0,0 +1,64 @@ +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s + +; Test with zero frame +; GCN-LABEL: {{^}}func1 +; GCN: v_mov_b32_e32 v0, s30 +; GCN: v_mov_b32_e32 v1, s31 +; GCN: s_setpc_b64 s[30:31] +define i8* @func1() nounwind { +entry: + %0 = tail call i8* @llvm.returnaddress(i32 0) + ret i8* %0 +} + +; Test with non-zero frame +; GCN-LABEL: {{^}}func2 +; GCN: v_mov_b32_e32 v0, 0 +; GCN: v_mov_b32_e32 v1, 0 +; GCN: s_setpc_b64 s[30:31] +define i8* @func2() nounwind { +entry: + %0 = tail call i8* @llvm.returnaddress(i32 1) + ret i8* %0 +} + +; Test with amdgpu_kernel +; GCN-LABEL: {{^}}func3 +; GCN: v_mov_b32_e32 v0, 0 +; GCN: v_mov_b32_e32 v1, v0 +define amdgpu_kernel void @func3(i8** %out) nounwind { +entry: + %tmp = tail call i8* @llvm.returnaddress(i32 0) + store i8* %tmp, i8** %out, align 4 + ret void +} + +; Test with use outside the entry-block +; GCN-LABEL: {{^}}func4 +; GCN: v_mov_b32_e32 v0, 0 +; GCN: v_mov_b32_e32 v1, v0 +define amdgpu_kernel void @func4(i8** %out) nounwind { +entry: + %tmp = tail call i8* @llvm.returnaddress(i32 1) + br label %exit + +exit: + store i8* %tmp, i8** %out, align 4 + ret void +} + +; Test ending in unreachable +; GCN-LABEL: {{^}}func5 +; GCN: v_mov_b32_e32 v0, 0 +define void @func5() nounwind { +entry: + %tmp = tail call i8* @llvm.returnaddress(i32 2) + br label %unreachable + +unreachable: + store volatile i32 0, i32 addrspace(3)* undef, align 4 + unreachable +} + + +declare i8* @llvm.returnaddress(i32) nounwind readnone