Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16677,7 +16677,7 @@ if ((Count.getOpcode() == ISD::CTTZ || Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) && N0 == Count.getOperand(0) && - (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT))) + (!LegalOperations && TLI.isOperationLegal(ISD::CTTZ, VT))) return DAG.getNode(ISD::CTTZ, DL, VT, N0); // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is // legal, combine to just ctlz. Index: test/CodeGen/AMDGPU/cttz_zero_undef.ll =================================================================== --- test/CodeGen/AMDGPU/cttz_zero_undef.ll +++ test/CodeGen/AMDGPU/cttz_zero_undef.ll @@ -76,3 +76,28 @@ store <4 x i32> %cttz, <4 x i32> addrspace(1)* %out, align 16 ret void } + +; FUNC-LABEL: {{^}}s_cttz_zero_undef_i32_with_select: +; SI: s_ff1_i32_b32 +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]] +; EG: FFBL_INT {{\*? *}}[[RESULT]] +define amdgpu_kernel void @s_cttz_zero_undef_i32_with_select(i32 addrspace(1)* noalias %out, i32 %val) nounwind { + %cttz = tail call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone + %cttz_ret = icmp ne i32 %val, 0 + %ret = select i1 %cttz_ret, i32 %cttz, i32 32 + store i32 %cttz, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_cttz_zero_undef_i32_with_select: +; SI: v_ffbl_b32_e32 +; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]] +define amdgpu_kernel void @v_cttz_zero_undef_i32_with_select(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind { + %val = load i32, i32 addrspace(1)* %arrayidx, align 1 + %cttz = tail call i32 @llvm.cttz.i32(i32 %val, i1 true) nounwind readnone + %cttz_ret = icmp ne i32 %val, 0 + %ret = select i1 %cttz_ret, i32 %cttz, i32 32 + store i32 %ret, i32 addrspace(1)* %out, align 4 + ret void +} +