Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -469,7 +469,7 @@ } NewOps.push_back(User->getOperand(i)); } - TLO.DAG.UpdateNodeOperands(User, NewOps); + User = TLO.DAG.UpdateNodeOperands(User, NewOps); // Op has less users now, so we may be able to perform additional combines // with it. DCI.AddToWorklist(Op.getNode()); Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -6485,7 +6485,7 @@ Node->getOperand(i)), 0)); } - DAG.UpdateNodeOperands(Node, Ops); + Node = DAG.UpdateNodeOperands(Node, Ops); return Node; } Index: test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll @@ -0,0 +1,85 @@ +; RUN: (ulimit -t 5; llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s) + +; Check we can compile this test without an infinite loop in +; TLI.SimplifyDemandedBits() due to failure to use return value +; of TLO.DAG.UpdateNodeOperands() + +; On success, the body of foo() is empty save for a s_endpgm. +; Check for it. + +@0 = external unnamed_addr addrspace(3) global [462 x float], align 4 + +declare i32 @llvm.amdgcn.workitem.id.y() +declare i32 @llvm.amdgcn.workitem.id.x() +declare float @llvm.fmuladd.f32(float, float, float) + +; GCN: s_endpgm +define amdgpu_kernel void @foo(float addrspace(1)* noalias nocapture readonly, float addrspace(1)* noalias nocapture readonly, float addrspace(1)* noalias nocapture, float) local_unnamed_addr !reqd_work_group_size !{i32 8, i32 16, i32 1} { + %5 = tail call i32 @llvm.amdgcn.workitem.id.y() + %6 = tail call i32 @llvm.amdgcn.workitem.id.x() + %7 = and i32 %5, 15 + %8 = mul nuw nsw i32 %7, 21 + %9 = sub i32 %8, 0 + %10 = add i32 %9, 0 + %11 = add i32 %10, 0 + %12 = getelementptr inbounds [462 x float], [462 x float] addrspace(3)* @0, i32 0, i32 0 + br label %14 + +;