Index: llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -424,6 +424,7 @@ "Expected cttz or ctlz intrinsic"); bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz; Value *Op0 = II.getArgOperand(0); + Value *Op1 = II.getArgOperand(1); Value *X; // ctlz(bitreverse(x)) -> cttz(x) // cttz(bitreverse(x)) -> ctlz(x) @@ -438,6 +439,15 @@ if (match(Op0, m_Neg(m_Value(X)))) return IC.replaceOperand(II, 0, X); + // Zext doesn't change the number of trailing zeros, so narrow: + // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsUndef' parameter is 'true'. + if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) { + auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X, + IC.Builder.getTrue()); + auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType()); + return IC.replaceInstUsesWith(II, ZextCttz); + } + // cttz(abs(x)) -> cttz(x) // cttz(nabs(x)) -> cttz(x) Value *Y; Index: llvm/test/Transforms/InstCombine/cttz.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/cttz.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -instcombine | FileCheck %s + +declare i32 @llvm.cttz.i32(i32, i1) +declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) +declare void @use(i32) + +define i32 @cttz_zext_zero_undef(i16 %x) { +; CHECK-LABEL: @cttz_zext_zero_undef( +; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.cttz.i16(i16 [[X:%.*]], i1 true), !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %z = zext i16 %x to i32 + %tz = call i32 @llvm.cttz.i32(i32 %z, i1 true) + ret i32 %tz +} + +define i32 @cttz_zext_zero_def(i16 %x) { +; CHECK-LABEL: @cttz_zext_zero_def( +; CHECK-NEXT: [[Z:%.*]] = zext i16 [[X:%.*]] to i32 +; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[Z]], i1 false), !range [[RNG1:![0-9]+]] +; CHECK-NEXT: ret i32 [[TZ]] +; + %z = zext i16 %x to i32 + %tz = call i32 @llvm.cttz.i32(i32 %z, i1 false) + ret i32 %tz +} + +define i32 @cttz_zext_zero_undef_extra_use(i16 %x) { +; CHECK-LABEL: @cttz_zext_zero_undef_extra_use( +; CHECK-NEXT: [[Z:%.*]] = zext i16 [[X:%.*]] to i32 +; CHECK-NEXT: call void @use(i32 [[Z]]) +; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[Z]], i1 true), !range [[RNG1]] +; CHECK-NEXT: ret i32 [[TZ]] +; + %z = zext i16 %x to i32 + call void @use(i32 %z) + %tz = call i32 @llvm.cttz.i32(i32 %z, i1 true) + ret i32 %tz +} + +define <2 x i64> @cttz_zext_zero_undef_vec(<2 x i32> %x) { +; CHECK-LABEL: @cttz_zext_zero_undef_vec( +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 true) +; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP2]] +; + %z = zext <2 x i32> %x to <2 x i64> + %tz = tail call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %z, i1 true) + ret <2 x i64> %tz +} + +define <2 x i64> @cttz_zext_zero_def_vec(<2 x i32> %x) { +; CHECK-LABEL: @cttz_zext_zero_def_vec( +; CHECK-NEXT: [[Z:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64> +; CHECK-NEXT: [[TZ:%.*]] = tail call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[Z]], i1 false) +; CHECK-NEXT: ret <2 x i64> [[TZ]] +; + %z = zext <2 x i32> %x to <2 x i64> + %tz = tail call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %z, i1 false) + ret <2 x i64> %tz +}