Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -775,14 +775,6 @@ return RC; } - /// Allows target to decide about the register class of the - /// specific value that is live outside the defining block. - /// Returns true if the value needs uniform register class. - virtual bool requiresUniformRegister(MachineFunction &MF, - const Value *) const { - return false; - } - /// Return the 'representative' register class for the specified value /// type. /// Index: llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -387,8 +387,7 @@ } unsigned FunctionLoweringInfo::CreateRegs(const Value *V) { - return CreateRegs(V->getType(), DA && DA->isDivergent(V) && - !TLI->requiresUniformRegister(*MF, V)); + return CreateRegs(V->getType(), DA && DA->isDivergent(V)); } /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the Index: llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -270,5 +270,13 @@ def : SourceOfDivergence; def : SourceOfDivergence; +// The dummy boolean output is divergent from the IR's perspective, +// but the mask results are uniform. These produce a divergent and +// uniform result, so the returned struct is collectively divergent. +// isAlwaysUniform can override the extract of the uniform component. +def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; + foreach intr = AMDGPUImageDimAtomicIntrinsics in def : SourceOfDivergence; Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -706,6 +706,7 @@ case Intrinsic::amdgcn_readlane: case Intrinsic::amdgcn_icmp: case Intrinsic::amdgcn_fcmp: + case Intrinsic::amdgcn_if_break: return true; } } @@ -714,14 +715,28 @@ if (!ExtValue) return false; - if (const CallInst *CI = dyn_cast(ExtValue->getOperand(0))) { - // If we have inline asm returning mixed SGPR and VGPR results, we inferred - // divergent for the overall struct return. We need to override it in the - // case we're extracting an SGPR component here. - if (isa(CI->getCalledValue())) - return !isInlineAsmSourceOfDivergence(CI, ExtValue->getIndices()); + const CallInst *CI = dyn_cast(ExtValue->getOperand(0)); + if (!CI) + return false; + + if (const IntrinsicInst *Intrinsic = dyn_cast(CI)) { + switch (Intrinsic->getIntrinsicID()) { + default: + return false; + case Intrinsic::amdgcn_if: + case Intrinsic::amdgcn_else: { + ArrayRef Indices = ExtValue->getIndices(); + return Indices.size() == 1 && Indices[0] == 1; + } + } } + // If we have inline asm returning mixed SGPR and VGPR results, we inferred + // divergent for the overall struct return. We need to override it in the + // case we're extracting an SGPR component here. + if (isa(CI->getCalledValue())) + return !isInlineAsmSourceOfDivergence(CI, ExtValue->getIndices()); + return false; } Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -10889,97 +10889,3 @@ return RC; } - -static bool hasCFUser(const Value *V, SmallPtrSet &Visited) { - if (!isa(V)) - return false; - if (!Visited.insert(V).second) - return false; - bool Result = false; - for (auto U : V->users()) { - if (const IntrinsicInst *Intrinsic = dyn_cast(U)) { - if (V == U->getOperand(1)) { - switch (Intrinsic->getIntrinsicID()) { - default: - Result = false; - break; - case Intrinsic::amdgcn_if_break: - case Intrinsic::amdgcn_if: - case Intrinsic::amdgcn_else: - Result = true; - break; - } - } - if (V == U->getOperand(0)) { - switch (Intrinsic->getIntrinsicID()) { - default: - Result = false; - break; - case Intrinsic::amdgcn_end_cf: - case Intrinsic::amdgcn_loop: - Result = true; - break; - } - } - } else { - Result = hasCFUser(U, Visited); - } - if (Result) - break; - } - return Result; -} - -bool SITargetLowering::requiresUniformRegister(MachineFunction &MF, - const Value *V) const { - if (const IntrinsicInst *Intrinsic = dyn_cast(V)) { - switch (Intrinsic->getIntrinsicID()) { - default: - return false; - case Intrinsic::amdgcn_if_break: - return true; - } - } - if (const ExtractValueInst *ExtValue = dyn_cast(V)) { - if (const IntrinsicInst *Intrinsic = - dyn_cast(ExtValue->getOperand(0))) { - switch (Intrinsic->getIntrinsicID()) { - default: - return false; - case Intrinsic::amdgcn_if: - case Intrinsic::amdgcn_else: { - ArrayRef Indices = ExtValue->getIndices(); - if (Indices.size() == 1 && Indices[0] == 1) { - return true; - } - } - } - } - } - if (const CallInst *CI = dyn_cast(V)) { - if (isa(CI->getCalledValue())) { - const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo(); - ImmutableCallSite CS(CI); - TargetLowering::AsmOperandInfoVector TargetConstraints = ParseConstraints( - MF.getDataLayout(), Subtarget->getRegisterInfo(), CS); - for (auto &TC : TargetConstraints) { - if (TC.Type == InlineAsm::isOutput) { - ComputeConstraintToUse(TC, SDValue()); - unsigned AssignedReg; - const TargetRegisterClass *RC; - std::tie(AssignedReg, RC) = getRegForInlineAsmConstraint( - SIRI, TC.ConstraintCode, TC.ConstraintVT); - if (RC) { - MachineRegisterInfo &MRI = MF.getRegInfo(); - if (AssignedReg != 0 && SIRI->isSGPRReg(MRI, AssignedReg)) - return true; - else if (SIRI->isSGPRClass(RC)) - return true; - } - } - } - } - } - SmallPtrSet Visited; - return hasCFUser(V, Visited); -} Index: llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll @@ -0,0 +1,102 @@ +; RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s + +; Tests control flow intrinsics that should be treated as uniform + +; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if_break': +; CHECK: DIVERGENT: %cond = icmp eq i32 %arg0, 0 +; CHECK-NOT: DIVERGENT +; CHECK: ret void +define amdgpu_ps void @test_if_break(i32 %arg0, i64 inreg %saved) { +entry: + %cond = icmp eq i32 %arg0, 0 + %break = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %cond, i64 %saved) + store volatile i64 %break, i64 addrspace(1)* undef + ret void +} + +; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if': +; CHECK: DIVERGENT: %cond = icmp eq i32 %arg0, 0 +; CHECK-NEXT: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond) +; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0 +; CHECK-NOT: DIVERGENT +; CHECK: DIVERGENT: %if.bool.ext = zext i1 %if.bool to i32 +define void @test_if(i32 %arg0) { +entry: + %cond = icmp eq i32 %arg0, 0 + %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond) + %if.bool = extractvalue { i1, i64 } %if, 0 + %if.mask = extractvalue { i1, i64 } %if, 1 + %if.bool.ext = zext i1 %if.bool to i32 + store volatile i32 %if.bool.ext, i32 addrspace(1)* undef + store volatile i64 %if.mask, i64 addrspace(1)* undef + ret void +} + +; The result should still be treated as divergent, even with a uniform source. +; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if_uniform': +; CHECK-NOT: DIVERGENT +; CHECK: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond) +; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0 +; CHECK-NOT: DIVERGENT +; CHECK: DIVERGENT: %if.bool.ext = zext i1 %if.bool to i32 +define amdgpu_ps void @test_if_uniform(i32 inreg %arg0) { +entry: + %cond = icmp eq i32 %arg0, 0 + %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond) + %if.bool = extractvalue { i1, i64 } %if, 0 + %if.mask = extractvalue { i1, i64 } %if, 1 + %if.bool.ext = zext i1 %if.bool to i32 + store volatile i32 %if.bool.ext, i32 addrspace(1)* undef + store volatile i64 %if.mask, i64 addrspace(1)* undef + ret void +} + +; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_loop_uniform': +; CHECK: DIVERGENT: %loop = call i1 @llvm.amdgcn.loop.i64(i64 %mask) +define amdgpu_ps void @test_loop_uniform(i64 inreg %mask) { +entry: + %loop = call i1 @llvm.amdgcn.loop.i64(i64 %mask) + %loop.ext = zext i1 %loop to i32 + store volatile i32 %loop.ext, i32 addrspace(1)* undef + ret void +} + +; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_else': +; CHECK: DIVERGENT: %else = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask) +; CHECK: DIVERGENT: %else.bool = extractvalue { i1, i64 } %else, 0 +; CHECK: {{^[ \t]+}}%else.mask = extractvalue { i1, i64 } %else, 1 +define amdgpu_ps void @test_else(i64 inreg %mask) { +entry: + %else = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask) + %else.bool = extractvalue { i1, i64 } %else, 0 + %else.mask = extractvalue { i1, i64 } %else, 1 + %else.bool.ext = zext i1 %else.bool to i32 + store volatile i32 %else.bool.ext, i32 addrspace(1)* undef + store volatile i64 %else.mask, i64 addrspace(1)* undef + ret void +} + +; This case is probably always broken +; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_else_divergent_mask': +; CHECK: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask) +; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0 +; CHECK-NOT: DIVERGENT +; CHECK: DIVERGENT: %if.bool.ext = zext i1 %if.bool to i32 +define void @test_else_divergent_mask(i64 %mask) { +entry: + %if = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask) + %if.bool = extractvalue { i1, i64 } %if, 0 + %if.mask = extractvalue { i1, i64 } %if, 1 + %if.bool.ext = zext i1 %if.bool to i32 + store volatile i32 %if.bool.ext, i32 addrspace(1)* undef + store volatile i64 %if.mask, i64 addrspace(1)* undef + ret void +} + +declare { i1, i64 } @llvm.amdgcn.if.i64(i1) #0 +declare { i1, i64 } @llvm.amdgcn.else.i64.i64(i64) #0 +declare i64 @llvm.amdgcn.if.break.i64.i64(i1, i64) #1 +declare i1 @llvm.amdgcn.loop.i64(i64) #1 + +attributes #0 = { convergent nounwind } +attributes #1 = { convergent nounwind readnone }