Index: llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td +++ llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td @@ -270,5 +270,13 @@ def : SourceOfDivergence; def : SourceOfDivergence; +// The dummy boolean output is divergent from the IR's perspective, +// but the mask results are uniform. These produce a divergent and +// uniform result, so the returned struct is collectively divergent. +// isAlwaysUniform can override the extract of the uniform component. +def : SourceOfDivergence; +def : SourceOfDivergence; +def : SourceOfDivergence; + foreach intr = AMDGPUImageDimAtomicIntrinsics in def : SourceOfDivergence; Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -706,6 +706,7 @@ case Intrinsic::amdgcn_readlane: case Intrinsic::amdgcn_icmp: case Intrinsic::amdgcn_fcmp: + case Intrinsic::amdgcn_if_break: return true; } } @@ -714,14 +715,28 @@ if (!ExtValue) return false; - if (const CallInst *CI = dyn_cast(ExtValue->getOperand(0))) { - // If we have inline asm returning mixed SGPR and VGPR results, we inferred - // divergent for the overall struct return. We need to override it in the - // case we're extracting an SGPR component here. - if (isa(CI->getCalledValue())) - return !isInlineAsmSourceOfDivergence(CI, ExtValue->getIndices()); + const CallInst *CI = dyn_cast(ExtValue->getOperand(0)); + if (!CI) + return false; + + if (const IntrinsicInst *Intrinsic = dyn_cast(CI)) { + switch (Intrinsic->getIntrinsicID()) { + default: + return false; + case Intrinsic::amdgcn_if: + case Intrinsic::amdgcn_else: { + ArrayRef Indices = ExtValue->getIndices(); + return Indices.size() == 1 && Indices[0] == 1; + } + } } + // If we have inline asm returning mixed SGPR and VGPR results, we inferred + // divergent for the overall struct return. We need to override it in the + // case we're extracting an SGPR component here. + if (isa(CI->getCalledValue())) + return !isInlineAsmSourceOfDivergence(CI, ExtValue->getIndices()); + return false; } Index: llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/DivergenceAnalysis/AMDGPU/control-flow-intrinsics.ll @@ -0,0 +1,102 @@ +; RUN: opt -mtriple=amdgcn-mesa-mesa3d -analyze -divergence -use-gpu-divergence-analysis %s | FileCheck %s + +; Tests control flow intrinsics that should be treated as uniform + +; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if_break': +; CHECK: DIVERGENT: %cond = icmp eq i32 %arg0, 0 +; CHECK-NOT: DIVERGENT +; CHECK: ret void +define amdgpu_ps void @test_if_break(i32 %arg0, i64 inreg %saved) { +entry: + %cond = icmp eq i32 %arg0, 0 + %break = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %cond, i64 %saved) + store volatile i64 %break, i64 addrspace(1)* undef + ret void +} + +; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if': +; CHECK: DIVERGENT: %cond = icmp eq i32 %arg0, 0 +; CHECK-NEXT: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond) +; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0 +; CHECK-NOT: DIVERGENT +; CHECK: DIVERGENT: %if.bool.ext = zext i1 %if.bool to i32 +define void @test_if(i32 %arg0) { +entry: + %cond = icmp eq i32 %arg0, 0 + %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond) + %if.bool = extractvalue { i1, i64 } %if, 0 + %if.mask = extractvalue { i1, i64 } %if, 1 + %if.bool.ext = zext i1 %if.bool to i32 + store volatile i32 %if.bool.ext, i32 addrspace(1)* undef + store volatile i64 %if.mask, i64 addrspace(1)* undef + ret void +} + +; The result should still be treated as divergent, even with a uniform source. +; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_if_uniform': +; CHECK-NOT: DIVERGENT +; CHECK: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond) +; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0 +; CHECK-NOT: DIVERGENT +; CHECK: DIVERGENT: %if.bool.ext = zext i1 %if.bool to i32 +define amdgpu_ps void @test_if_uniform(i32 inreg %arg0) { +entry: + %cond = icmp eq i32 %arg0, 0 + %if = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %cond) + %if.bool = extractvalue { i1, i64 } %if, 0 + %if.mask = extractvalue { i1, i64 } %if, 1 + %if.bool.ext = zext i1 %if.bool to i32 + store volatile i32 %if.bool.ext, i32 addrspace(1)* undef + store volatile i64 %if.mask, i64 addrspace(1)* undef + ret void +} + +; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_loop_uniform': +; CHECK: DIVERGENT: %loop = call i1 @llvm.amdgcn.loop.i64(i64 %mask) +define amdgpu_ps void @test_loop_uniform(i64 inreg %mask) { +entry: + %loop = call i1 @llvm.amdgcn.loop.i64(i64 %mask) + %loop.ext = zext i1 %loop to i32 + store volatile i32 %loop.ext, i32 addrspace(1)* undef + ret void +} + +; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_else': +; CHECK: DIVERGENT: %else = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask) +; CHECK: DIVERGENT: %else.bool = extractvalue { i1, i64 } %else, 0 +; CHECK: {{^[ \t]+}}%else.mask = extractvalue { i1, i64 } %else, 1 +define amdgpu_ps void @test_else(i64 inreg %mask) { +entry: + %else = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask) + %else.bool = extractvalue { i1, i64 } %else, 0 + %else.mask = extractvalue { i1, i64 } %else, 1 + %else.bool.ext = zext i1 %else.bool to i32 + store volatile i32 %else.bool.ext, i32 addrspace(1)* undef + store volatile i64 %else.mask, i64 addrspace(1)* undef + ret void +} + +; This case is probably always broken +; CHECK: Printing analysis 'Legacy Divergence Analysis' for function 'test_else_divergent_mask': +; CHECK: DIVERGENT: %if = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask) +; CHECK-NEXT: DIVERGENT: %if.bool = extractvalue { i1, i64 } %if, 0 +; CHECK-NOT: DIVERGENT +; CHECK: DIVERGENT: %if.bool.ext = zext i1 %if.bool to i32 +define void @test_else_divergent_mask(i64 %mask) { +entry: + %if = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %mask) + %if.bool = extractvalue { i1, i64 } %if, 0 + %if.mask = extractvalue { i1, i64 } %if, 1 + %if.bool.ext = zext i1 %if.bool to i32 + store volatile i32 %if.bool.ext, i32 addrspace(1)* undef + store volatile i64 %if.mask, i64 addrspace(1)* undef + ret void +} + +declare { i1, i64 } @llvm.amdgcn.if.i64(i1) #0 +declare { i1, i64 } @llvm.amdgcn.else.i64.i64(i64) #0 +declare i64 @llvm.amdgcn.if.break.i64.i64(i1, i64) #1 +declare i1 @llvm.amdgcn.loop.i64(i64) #1 + +attributes #0 = { convergent nounwind } +attributes #1 = { convergent nounwind readnone }