Index: include/llvm/Analysis/DivergenceAnalysis.h =================================================================== --- include/llvm/Analysis/DivergenceAnalysis.h +++ include/llvm/Analysis/DivergenceAnalysis.h @@ -35,10 +35,16 @@ // Print all divergent branches in the function. void print(raw_ostream &OS, const Module *) const override; - // Returns true if V is divergent. + // Returns true if V is divergent at its definition. + // + // Even if this function returns false, V may still be divergent when used + // in a different basic block. bool isDivergent(const Value *V) const { return DivergentValues.count(V); } // Returns true if V is uniform/non-divergent. + // + // Even if this function returns true, V may still be divergent when used + // in a different basic block. bool isUniform(const Value *V) const { return !isDivergent(V); } private: Index: lib/Transforms/Scalar/StructurizeCFG.cpp =================================================================== --- lib/Transforms/Scalar/StructurizeCFG.cpp +++ lib/Transforms/Scalar/StructurizeCFG.cpp @@ -55,6 +55,12 @@ namespace { +static cl::opt ForceSkipUniformRegions( + "structurizecfg-skip-uniform-regions", + cl::Hidden, + cl::desc("Force the StructurizeCFG pass to skip uniform regions"), + cl::init(false)); + // Definition of the complex types used in this pass. using BBValuePair = std::pair; @@ -243,7 +249,8 @@ static char ID; explicit StructurizeCFG(bool SkipUniformRegions = false) - : RegionPass(ID), SkipUniformRegions(SkipUniformRegions) { + : RegionPass(ID), + SkipUniformRegions(SkipUniformRegions || ForceSkipUniformRegions) { initializeStructurizeCFGPass(*PassRegistry::getPassRegistry()); } @@ -891,7 +898,7 @@ if (!Br || !Br->isConditional()) continue; - if (!DA.isUniform(Br->getCondition())) + if (!DA.isUniform(Br)) return false; DEBUG(dbgs() << "BB: " << BB->getName() << " has uniform terminator\n"); } Index: test/CodeGen/AMDGPU/control-flow-optnone.ll =================================================================== --- test/CodeGen/AMDGPU/control-flow-optnone.ll +++ test/CodeGen/AMDGPU/control-flow-optnone.ll @@ -15,8 +15,8 @@ ; GCN: s_mov_b64 exec ; GCN: s_or_b64 exec, exec -; GCN: v_cmp_eq_u32 -; GCN: s_cbranch_vccnz +; GCN: s_cmp_eq_u32 +; GCN: s_cbranch_scc1 ; GCN-NEXT: s_branch define amdgpu_kernel void @copytoreg_divergent_brcond(i32 %arg, i32 %arg1, i32 %arg2) #0 { bb: Index: test/Transforms/StructurizeCFG/uniform-regions.ll =================================================================== --- /dev/null +++ test/Transforms/StructurizeCFG/uniform-regions.ll @@ -0,0 +1,49 @@ +; RUN: opt -mtriple=amdgcn-- -S -o - -structurizecfg -structurizecfg-skip-uniform-regions < %s | FileCheck %s + +; CHECK-LABEL: @uniform( +; CHECK: entry: +; CHECK: br i1 %cc, label %if, label %end, !structurizecfg.uniform !0 +; CHECK: if: +; CHECK: br label %end, !structurizecfg.uniform !0 +define amdgpu_cs void @uniform(i32 inreg %v) { +entry: + %cc = icmp eq i32 %v, 0 + br i1 %cc, label %if, label %end + +if: + br label %end + +end: + ret void +} + +; CHECK-LABEL: @nonuniform( +; CHECK-NOT: !structurizecfg +define amdgpu_cs void @nonuniform(i32 addrspace(2)* %ptr) { +entry: + br label %for.body + +for.body: + %i = phi i32 [0, %entry], [%i.inc, %end.loop] + %cc = icmp ult i32 %i, 4 + br i1 %cc, label %mid.loop, label %for.end + +mid.loop: + %gep = getelementptr i32, i32 addrspace(2)* %ptr, i32 %i + %v = load i32, i32 addrspace(2)* %gep, align 4 + %cc2 = icmp eq i32 %v, 0 + br i1 %cc2, label %end.loop, label %for.end + +end.loop: + %i.inc = add i32 %i, 1 + br label %for.body + +for.end: + br i1 %cc, label %if, label %end + +if: + br label %end + +end: + ret void +}