Index: llvm/lib/Analysis/InstructionSimplify.cpp =================================================================== --- llvm/lib/Analysis/InstructionSimplify.cpp +++ llvm/lib/Analysis/InstructionSimplify.cpp @@ -4095,6 +4095,11 @@ /// See if we can fold the given phi. If not, returns null. static Value *SimplifyPHINode(PHINode *PN, const SimplifyQuery &Q) { + // LCSSA PHIs only have one input value + // return nullptr here to avoid simplifying them away + // since they are necessary to hold out of the loop dependecies. + if (PN->getNumIncomingValues() < 2) + return nullptr; // If all of the PHI's incoming values are the same then replace the PHI node // with the common value. Value *CommonValue = nullptr; Index: llvm/test/CodeGen/AMDGPU/lcssa-phi.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/lcssa-phi.mir @@ -0,0 +1,31 @@ +# RUN: llc -march=amdgcn -run-pass early-cse -o - %s | FileCheck %s +# Check that LCSSA PHI was not eliminated +# CHECK: %counter.lcssa = phi i32 [ %counter, %loop ] + +--- | + define amdgpu_kernel void @lcssa-phi(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + entry: + %tid = call i32 @llvm.amdgcn.workitem.id.x() + br label %loop + + loop: ; preds = %flow, %entry + %counter = phi i32 [ 0, %entry ], [ %inc, %flow ] + %cond = icmp ugt i32 %counter, %tid + br i1 %cond, label %endloop, label %flow + + flow: ; preds = %loop + %inc = add i32 %counter, 1 + br label %loop + + endloop: ; preds = %loop + %counter.lcssa = phi i32 [ %counter, %loop ] + store i32 %counter.lcssa, i32 addrspace(1)* %out + ret void + } + + ; Function Attrs: nounwind readnone speculatable + declare i32 @llvm.amdgcn.workitem.id.x() #0 + + attributes #0 = { nounwind readnone speculatable } + +...