Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -62,6 +62,7 @@ unsigned getMaxInterleaveFactor(unsigned VF); int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); + bool isSourceOfDivergence(const Value *V) const; }; } // end namespace llvm Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -91,3 +91,67 @@ return BaseT::getVectorInstrCost(Opcode, ValTy, Index); } } + +static bool isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo *TII, + const IntrinsicInst *I) { + switch (I->getIntrinsicID()) { + default: + return false; + + case Intrinsic::amdgcn_v_interp_p1: + case Intrinsic::amdgcn_v_interp_p2: + case Intrinsic::amdgcn_v_mbcnt_hi_u32_b32: + case Intrinsic::amdgcn_v_mbcnt_lo_u32_b32: + case Intrinsic::r600_read_tidig_x: + case Intrinsic::r600_read_tidig_y: + case Intrinsic::r600_read_tidig_z: + return true; + } +} + +static bool isArgPassedInSGPR(const Argument *A) { + const Function *F = A->getParent(); + unsigned ShaderType = AMDGPU::getShaderType(*F); + + // Arguments to compute shaders are never a source of divergence. + if (ShaderType == ShaderType::COMPUTE) + return true; + + // For non-compute shaders, the inreg attribute is used to mark inputs, + // which pre-loaded into SGPRs. + if (F->getAttributes().hasAttribute(A->getArgNo(), Attribute::InReg)) + return true; + + // For non-compute shaders, 32-bit values are pre-loaded into vgprs, all + // other value types use SGPRS. + return !A->getType()->isIntegerTy(32) && !A->getType()->isFloatTy(); +} + +/// +/// \returns true if the result of the value could potentially be +/// different across workitems in a wavefront. +bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const { + + if (const Argument *A = dyn_cast(V)) + return !isArgPassedInSGPR(A); + + // Loads from the private address space are divergent, because threads + // can execute the load instruction with the same inputs and get different + // results. + // + // All other loads are not divergent, because if threads issue loads with the + // same arguments, they will always get the same result. + if (const LoadInst *Load = dyn_cast(V)) + return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; + + if (const IntrinsicInst *Intrinsic = dyn_cast(V)) { + const TargetMachine &TM = getTLI()->getTargetMachine(); + return isIntrinsicSourceOfDivergence(TM.getIntrinsicInfo(), Intrinsic); + } + + // Assume all function calls are a source of divergence. + if (isa(V)) + return true; + + return false; +}