diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2054,7 +2054,8 @@ // First ZEXT the input. auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg); LLT CurTy = MRI.getType(SrcReg); - if (MI.getOpcode() == TargetOpcode::G_CTTZ) { + unsigned NewOpc = MI.getOpcode(); + if (NewOpc == TargetOpcode::G_CTTZ) { // The count is the same in the larger type except if the original // value was zero. This can be handled by setting the bit just off // the top of the original type. @@ -2062,10 +2063,12 @@ APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits()); MIBSrc = MIRBuilder.buildOr( WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit)); + // Now we know the operand is non-zero, use the more relaxed opcode. + NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF; } // Perform the operation at the larger size. - auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc}); + auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc}); // This is already the correct result for CTPOP and CTTZs if (MI.getOpcode() == TargetOpcode::G_CTLZ || MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-cttz.mir @@ -96,11 +96,7 @@ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C1]] ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR]](s32), [[C2]] - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[CTTZ_ZERO_UNDEF]] - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; CHECK: $vgpr0 = COPY [[AND1]](s32) @@ -176,18 +172,12 @@ ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65536 ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C2]] ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR]](s32), [[C3]] - ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C4]], [[CTTZ_ZERO_UNDEF]] - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]] ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[C2]] ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR1]](s32) - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR1]](s32), [[C3]] - ; CHECK: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C4]], [[CTTZ_ZERO_UNDEF1]] - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[SELECT1]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF1]](s32) ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C1]] ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY4]](s32) @@ -216,11 +206,7 @@ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[C1]] ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s32) - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR]](s32), [[C2]] - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[CTTZ_ZERO_UNDEF]] - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SELECT]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[CTTZ_ZERO_UNDEF]](s32) ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32) ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; CHECK: $vgpr0 = COPY [[AND1]](s32) @@ -246,14 +232,10 @@ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934592 ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[C1]] ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[OR]](s64) - ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[OR]](s64), [[C2]] - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C3]], [[CTTZ_ZERO_UNDEF]] - ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[SELECT]](s32) - ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[CTTZ_ZERO_UNDEF]](s32) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4294967295 ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[ZEXT]](s64) - ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C4]](s64) + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[C2]](s64) ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[COPY2]], [[COPY3]] ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY [[AND1]](s64) ; CHECK: $vgpr0_vgpr1 = COPY [[COPY4]](s64)