Index: llvm/trunk/include/llvm/IR/Instructions.h =================================================================== --- llvm/trunk/include/llvm/IR/Instructions.h +++ llvm/trunk/include/llvm/IR/Instructions.h @@ -2693,6 +2693,11 @@ /// same value, return the value, otherwise return null. Value *hasConstantValue() const; + /// hasConstantOrUndefValue - Whether the specified PHI node always merges + /// together the same value, assuming undefs are equal to a unique + /// non-undef value. + bool hasConstantOrUndefValue() const; + /// Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const Instruction *I) { return I->getOpcode() == Instruction::PHI; Index: llvm/trunk/lib/Analysis/DivergenceAnalysis.cpp =================================================================== --- llvm/trunk/lib/Analysis/DivergenceAnalysis.cpp +++ llvm/trunk/lib/Analysis/DivergenceAnalysis.cpp @@ -146,7 +146,7 @@ for (auto I = IPostDom->begin(); isa(I); ++I) { // A PHINode is uniform if it returns the same value no matter which path is // taken. - if (!cast(I)->hasConstantValue() && DV.insert(&*I).second) + if (!cast(I)->hasConstantOrUndefValue() && DV.insert(&*I).second) Worklist.push_back(&*I); } Index: llvm/trunk/lib/IR/Instructions.cpp =================================================================== --- llvm/trunk/lib/IR/Instructions.cpp +++ llvm/trunk/lib/IR/Instructions.cpp @@ -154,6 +154,24 @@ return ConstantValue; } +/// hasConstantOrUndefValue - Whether the specified PHI node always merges +/// together the same value, assuming that undefs result in the same value as +/// non-undefs. +/// Unlike \ref hasConstantValue, this does not return a value because the +/// unique non-undef incoming value need not dominate the PHI node. +bool PHINode::hasConstantOrUndefValue() const { + Value *ConstantValue = nullptr; + for (unsigned i = 0, e = getNumIncomingValues(); i != e; ++i) { + Value *Incoming = getIncomingValue(i); + if (Incoming != this && !isa(Incoming)) { + if (ConstantValue && ConstantValue != Incoming) + return false; + ConstantValue = Incoming; + } + } + return true; +} + //===----------------------------------------------------------------------===// // LandingPadInst Implementation //===----------------------------------------------------------------------===// Index: llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll =================================================================== --- llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll +++ llvm/trunk/test/Analysis/DivergenceAnalysis/AMDGPU/phi-undef.ll @@ -0,0 +1,28 @@ +; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s + +; CHECK-LABEL: 'test1': +; CHECK-NEXT: DIVERGENT: i32 %bound +; CHECK-NEXT: DIVERGENT: %break = icmp sge i32 %counter, %bound +; CHECK-NEXT: DIVERGENT: br i1 %break, label %footer, label %body +; CHECK-NEXT: DIVERGENT: br i1 %break, label %end, label %header +; Note: %counter is not divergent! +define amdgpu_ps void @test1(i32 %bound) { +entry: + br label %header + +header: + %counter = phi i32 [ 0, %entry ], [ %counter.footer, %footer ] + %break = icmp sge i32 %counter, %bound + br i1 %break, label %footer, label %body + +body: + %counter.next = add i32 %counter, 1 + br label %footer + +footer: + %counter.footer = phi i32 [ %counter.next, %body ], [ undef, %header ] + br i1 %break, label %end, label %header + +end: + ret void +} Index: llvm/trunk/test/CodeGen/AMDGPU/branch-uniformity.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/branch-uniformity.ll +++ llvm/trunk/test/CodeGen/AMDGPU/branch-uniformity.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=amdgcn-- < %s | FileCheck %s + +; The branch instruction in LOOP49 has a uniform condition, but PHI instructions +; introduced by the structurizecfg pass previously caused a false divergence +; which ended up in an assertion (or incorrect code) because +; SIAnnotateControlFlow and structurizecfg had different ideas about which +; branches are uniform. +; +; CHECK-LABEL: {{^}}main: +; CHECK: ; %LOOP49 +; CHECK: v_cmp_ne_i32_e32 vcc, +; CHECK: s_cbranch_vccnz +; CHECK: ; %ENDIF53 +define amdgpu_vs float @main(i32 %in) { +main_body: + %cmp = mul i32 %in, 2 + br label %LOOP + +LOOP: ; preds = %ENDLOOP48, %main_body + %counter = phi i32 [ 0, %main_body ], [ %counter.next, %ENDLOOP48 ] + %v.LOOP = phi i32 [ 0, %main_body ], [ %v.ENDLOOP48, %ENDLOOP48 ] + %tmp7 = icmp slt i32 %cmp, %counter + br i1 %tmp7, label %IF, label %LOOP49 + +IF: ; preds = %LOOP + %r = bitcast i32 %v.LOOP to float + ret float %r + +LOOP49: ; preds = %LOOP + %tmp8 = icmp ne i32 %counter, 0 + br i1 %tmp8, label %ENDLOOP48, label %ENDIF53 + +ENDLOOP48: ; preds = %ENDIF53, %LOOP49 + %v.ENDLOOP48 = phi i32 [ %v.LOOP, %LOOP49 ], [ %v.ENDIF53, %ENDIF53 ] + %counter.next = add i32 %counter, 1 + br label %LOOP + +ENDIF53: ; preds = %LOOP49 + %v.ENDIF53 = add i32 %v.LOOP, %counter + br label %ENDLOOP48 +}