Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -231,18 +231,27 @@ enum NodeType : unsigned { // AMDIL ISD Opcodes FIRST_NUMBER = ISD::BUILTIN_OP_END, - CALL, // Function call based on a single integer UMUL, // 32bit unsigned multiplication BRANCH_COND, // End AMDIL ISD Opcodes + // Function call. + CALL, + // Masked control flow nodes. IF, ELSE, LOOP, + // A uniform kernel return that terminates the wavefront. ENDPGM, - RETURN, + + // Return from a shader part's epilog code. + RETURN_EPILOG, + + // Return with values from a non-entry function. + RET_FLAG, + DWORDADDR, FRACT, Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3408,7 +3408,6 @@ switch ((AMDGPUISD::NodeType)Opcode) { case AMDGPUISD::FIRST_NUMBER: break; // AMDIL DAG nodes - NODE_NAME_CASE(CALL); NODE_NAME_CASE(UMUL); NODE_NAME_CASE(BRANCH_COND); @@ -3416,8 +3415,10 @@ NODE_NAME_CASE(IF) NODE_NAME_CASE(ELSE) NODE_NAME_CASE(LOOP) + NODE_NAME_CASE(CALL) + NODE_NAME_CASE(RET_FLAG) + NODE_NAME_CASE(RETURN_EPILOG) NODE_NAME_CASE(ENDPGM) - NODE_NAME_CASE(RETURN) NODE_NAME_CASE(DWORDADDR) NODE_NAME_CASE(FRACT) NODE_NAME_CASE(SETCC) Index: lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -362,5 +362,9 @@ def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone, [SDNPHasChain, SDNPOptInGlue]>; -def AMDGPUreturn : SDNode<"AMDGPUISD::RETURN", SDTNone, +def AMDGPUreturn_epilog : SDNode<"AMDGPUISD::RETURN_EPILOG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] +>; Index: lib/Target/AMDGPU/AMDGPUMCInstLower.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -195,8 +195,9 @@ ++I; } } else { - // We don't want SI_MASK_BRANCH/SI_RETURN encoded. They are placeholder - // terminator instructions and should only be printed as comments. + // We don't want SI_MASK_BRANCH/SI_RETURN_EPILOG encoded. They are + // placeholder terminator instructions and should only be printed as + // comments. if (MI->getOpcode() == AMDGPU::SI_MASK_BRANCH) { if (isVerbose()) { SmallVector BBStr; @@ -212,9 +213,9 @@ return; } - if (MI->getOpcode() == AMDGPU::SI_RETURN) { + if (MI->getOpcode() == AMDGPU::SI_RETURN_EPILOG) { if (isVerbose()) - OutStreamer->emitRawComment(" return"); + OutStreamer->emitRawComment(" return to shader part epilog"); return; } Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1342,7 +1342,7 @@ if (Flag.getNode()) RetOps.push_back(Flag); - unsigned Opc = Info->returnsVoid() ? AMDGPUISD::ENDPGM : AMDGPUISD::RETURN; + unsigned Opc = Info->returnsVoid() ? AMDGPUISD::ENDPGM : AMDGPUISD::RETURN_EPILOG; return DAG.getNode(Opc, DL, MVT::Other, RetOps); } Index: lib/Target/AMDGPU/SIInsertSkips.cpp =================================================================== --- lib/Target/AMDGPU/SIInsertSkips.cpp +++ lib/Target/AMDGPU/SIInsertSkips.cpp @@ -318,14 +318,14 @@ MI.eraseFromParent(); break; - case AMDGPU::SI_RETURN: + case AMDGPU::SI_RETURN_EPILOG: // FIXME: Should move somewhere else assert(!MF.getInfo()->returnsVoid()); // Graphics shaders returning non-void shouldn't contain S_ENDPGM, // because external bytecode will be appended at the end. if (BI != --MF.end() || I != MBB.getFirstTerminator()) { - // SI_RETURN is not the last instruction. Add an empty block at + // SI_RETURN_EPILOG is not the last instruction. Add an empty block at // the end and jump there. if (!EmptyMBBAtEnd) { EmptyMBBAtEnd = MF.CreateMachineBasicBlock(); Index: lib/Target/AMDGPU/SIInsertWaits.cpp =================================================================== --- lib/Target/AMDGPU/SIInsertWaits.cpp +++ lib/Target/AMDGPU/SIInsertWaits.cpp @@ -648,7 +648,7 @@ handleSendMsg(MBB, I); if (I->getOpcode() == AMDGPU::S_ENDPGM || - I->getOpcode() == AMDGPU::SI_RETURN) + I->getOpcode() == AMDGPU::SI_RETURN_EPILOG) EndPgmBlocks.push_back(&MBB); } @@ -679,7 +679,7 @@ // FIXME: It would be better to insert this before a waitcnt if any. if ((I->getOpcode() == AMDGPU::S_ENDPGM || - I->getOpcode() == AMDGPU::SI_RETURN) && !SeenDCacheWB) { + I->getOpcode() == AMDGPU::SI_RETURN_EPILOG) && !SeenDCacheWB) { Changes = true; BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB)); } Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -271,8 +271,9 @@ let isReMaterializable = 1; } -def SI_RETURN : SPseudoInstSI < - (outs), (ins variable_ops), [(AMDGPUreturn)]> { +// Return for returning shaders to a shader variant epilog. +def SI_RETURN_EPILOG : SPseudoInstSI < + (outs), (ins variable_ops), [(AMDGPUreturn_epilog)]> { let isTerminator = 1; let isBarrier = 1; let isReturn = 1; Index: test/CodeGen/AMDGPU/insert-waits-exp.mir =================================================================== --- test/CodeGen/AMDGPU/insert-waits-exp.mir +++ test/CodeGen/AMDGPU/insert-waits-exp.mir @@ -58,6 +58,6 @@ %vgpr1 = V_MOV_B32_e32 1065353216, implicit %exec %vgpr2 = V_MOV_B32_e32 1073741824, implicit %exec %vgpr3 = V_MOV_B32_e32 1082130432, implicit %exec - SI_RETURN killed %vgpr0, killed %vgpr1, killed %vgpr2, killed %vgpr3 + SI_RETURN_EPILOG killed %vgpr0, killed %vgpr1, killed %vgpr2, killed %vgpr3 ... Index: test/CodeGen/AMDGPU/scalar-store-cache-flush.mir =================================================================== --- test/CodeGen/AMDGPU/scalar-store-cache-flush.mir +++ test/CodeGen/AMDGPU/scalar-store-cache-flush.mir @@ -169,5 +169,5 @@ body: | bb.0: S_STORE_DWORD_SGPR undef %sgpr2, undef %sgpr0_sgpr1, undef %m0, 0 - SI_RETURN undef %vgpr0 + SI_RETURN_EPILOG undef %vgpr0 ...