Index: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2208,9 +2208,8 @@ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32); - SDValue ZeroOrOne = isCtlzOpc(Op.getOpcode()) ? Zero : One; SDValue HiOrLo = isCtlzOpc(Op.getOpcode()) ? Hi : Lo; - SDValue Hi0orLo0 = DAG.getSetCC(SL, SetCCVT, HiOrLo, ZeroOrOne, ISD::SETEQ); + SDValue Hi0orLo0 = DAG.getSetCC(SL, SetCCVT, HiOrLo, Zero, ISD::SETEQ); SDValue OprLo = DAG.getNode(ISDOpc, SL, MVT::i32, Lo); SDValue OprHi = DAG.getNode(ISDOpc, SL, MVT::i32, Hi); @@ -2233,7 +2232,7 @@ // FIXME: DAG combines turn what should be an s_and_b64 into a v_or_b32, // which we probably don't want. SDValue LoOrHi = isCtlzOpc(Op.getOpcode()) ? Lo : Hi; - SDValue Lo0OrHi0 = DAG.getSetCC(SL, SetCCVT, LoOrHi, ZeroOrOne, ISD::SETEQ); + SDValue Lo0OrHi0 = DAG.getSetCC(SL, SetCCVT, LoOrHi, Zero, ISD::SETEQ); SDValue SrcIsZero = DAG.getNode(ISD::AND, SL, SetCCVT, Lo0OrHi0, Hi0orLo0); // TODO: If i64 setcc is half rate, it can result in 1 fewer instruction Index: llvm/trunk/test/CodeGen/AMDGPU/cttz_zero_undef.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/cttz_zero_undef.ll +++ llvm/trunk/test/CodeGen/AMDGPU/cttz_zero_undef.ll @@ -157,6 +157,7 @@ ; FUNC-LABEL: {{^}}v_cttz_zero_undef_i32_with_select: ; SI: v_ffbl_b32_e32 v{{[0-9]+}}, v{{[0-9]+}} +; SI: v_cmp_ne_u32_e32 vcc, 0 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]] define amdgpu_kernel void @v_cttz_zero_undef_i32_with_select(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind { %val = load i32, i32 addrspace(1)* %arrayidx, align 1 @@ -178,6 +179,8 @@ ; SI: v_or_b32_e32 [[VAL2:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} ; SI-DAG: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL1]] ; SI-DAG: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL2]] +; SI: v_cmp_eq_u32_e32 vcc, 0 +; SI: v_cmp_ne_u64_e32 vcc, 0 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]] define amdgpu_kernel void @v_cttz_zero_undef_i64_with_select(i64 addrspace(1)* noalias %out, i64 addrspace(1)* nocapture readonly %arrayidx) nounwind { %val = load i64, i64 addrspace(1)* %arrayidx, align 1