diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -424,6 +424,7 @@ "Expected cttz or ctlz intrinsic"); bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz; Value *Op0 = II.getArgOperand(0); + Value *Op1 = II.getArgOperand(1); Value *X; // ctlz(bitreverse(x)) -> cttz(x) // cttz(bitreverse(x)) -> ctlz(x) @@ -438,6 +439,15 @@ if (match(Op0, m_Neg(m_Value(X)))) return IC.replaceOperand(II, 0, X); + // Zext doesn't change the number of trailing zeros, so narrow: + // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsUndef' parameter is 'true'. + if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) { + auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X, + IC.Builder.getTrue()); + auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType()); + return IC.replaceInstUsesWith(II, ZextCttz); + } + // cttz(abs(x)) -> cttz(x) // cttz(nabs(x)) -> cttz(x) Value *Y; diff --git a/llvm/test/Transforms/InstCombine/cttz.ll b/llvm/test/Transforms/InstCombine/cttz.ll --- a/llvm/test/Transforms/InstCombine/cttz.ll +++ b/llvm/test/Transforms/InstCombine/cttz.ll @@ -7,9 +7,9 @@ define i32 @cttz_zext_zero_undef(i16 %x) { ; CHECK-LABEL: @cttz_zext_zero_undef( -; CHECK-NEXT: [[Z:%.*]] = zext i16 [[X:%.*]] to i32 -; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[Z]], i1 true), !range [[RNG0:![0-9]+]] -; CHECK-NEXT: ret i32 [[TZ]] +; CHECK-NEXT: [[TMP1:%.*]] = call i16 @llvm.cttz.i16(i16 [[X:%.*]], i1 true), !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[TMP1]] to i32 +; CHECK-NEXT: ret i32 [[TMP2]] ; %z = zext i16 %x to i32 %tz = call i32 @llvm.cttz.i32(i32 %z, i1 true) @@ -19,7 +19,7 @@ define i32 @cttz_zext_zero_def(i16 %x) { ; CHECK-LABEL: @cttz_zext_zero_def( ; CHECK-NEXT: [[Z:%.*]] = zext i16 [[X:%.*]] to i32 -; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[Z]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[Z]], i1 false), !range [[RNG1:![0-9]+]] ; CHECK-NEXT: ret i32 [[TZ]] ; %z = zext i16 %x to i32 @@ -31,7 +31,7 @@ ; CHECK-LABEL: @cttz_zext_zero_undef_extra_use( ; CHECK-NEXT: [[Z:%.*]] = zext i16 [[X:%.*]] to i32 ; CHECK-NEXT: call void @use(i32 [[Z]]) -; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[Z]], i1 true), !range [[RNG0]] +; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[Z]], i1 true), !range [[RNG1]] ; CHECK-NEXT: ret i32 [[TZ]] ; %z = zext i16 %x to i32 @@ -42,9 +42,9 @@ define <2 x i64> @cttz_zext_zero_undef_vec(<2 x i32> %x) { ; CHECK-LABEL: @cttz_zext_zero_undef_vec( -; CHECK-NEXT: [[Z:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64> -; CHECK-NEXT: [[TZ:%.*]] = tail call <2 x i64> @llvm.cttz.v2i64(<2 x i64> [[Z]], i1 true) -; CHECK-NEXT: ret <2 x i64> [[TZ]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 true) +; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP2]] ; %z = zext <2 x i32> %x to <2 x i64> %tz = tail call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %z, i1 true) @@ -65,7 +65,7 @@ define i32 @cttz_sext_zero_undef(i16 %x) { ; CHECK-LABEL: @cttz_sext_zero_undef( ; CHECK-NEXT: [[S:%.*]] = sext i16 [[X:%.*]] to i32 -; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[S]], i1 true), !range [[RNG0]] +; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[S]], i1 true), !range [[RNG1]] ; CHECK-NEXT: ret i32 [[TZ]] ; %s = sext i16 %x to i32 @@ -76,7 +76,7 @@ define i32 @cttz_sext_zero_def(i16 %x) { ; CHECK-LABEL: @cttz_sext_zero_def( ; CHECK-NEXT: [[S:%.*]] = zext i16 [[X:%.*]] to i32 -; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[S]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[S]], i1 false), !range [[RNG1]] ; CHECK-NEXT: ret i32 [[TZ]] ; %s = zext i16 %x to i32 @@ -84,11 +84,11 @@ ret i32 %tz } -define i32 @cttz_zext_sero_undef_extra_use(i16 %x) { -; CHECK-LABEL: @cttz_zext_sero_undef_extra_use( +define i32 @cttz_sext_zero_undef_extra_use(i16 %x) { +; CHECK-LABEL: @cttz_sext_zero_undef_extra_use( ; CHECK-NEXT: [[S:%.*]] = sext i16 [[X:%.*]] to i32 ; CHECK-NEXT: call void @use(i32 [[S]]) -; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[S]], i1 true), !range [[RNG0]] +; CHECK-NEXT: [[TZ:%.*]] = call i32 @llvm.cttz.i32(i32 [[S]], i1 true), !range [[RNG1]] ; CHECK-NEXT: ret i32 [[TZ]] ; %s = sext i16 %x to i32