Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -709,17 +709,18 @@ match(Count, m_Trunc(m_Value(V)))) Count = V; + // Check that 'Count' is a call to intrinsic cttz/ctlz. Also check that the + // input to the cttz/ctlz is used as LHS for the compare instruction. + if (!match(Count, m_Intrinsic(m_Specific(CmpLHS))) && + !match(Count, m_Intrinsic(m_Specific(CmpLHS)))) + return nullptr; + + IntrinsicInst *II = cast(Count); + // Check if the value propagated on zero is a constant number equal to the // sizeof in bits of 'Count'. unsigned SizeOfInBits = Count->getType()->getScalarSizeInBits(); - if (!match(ValueOnZero, m_SpecificInt(SizeOfInBits))) - return nullptr; - - // Check that 'Count' is a call to intrinsic cttz/ctlz. Also check that the - // input to the cttz/ctlz is used as LHS for the compare instruction. - if (match(Count, m_Intrinsic(m_Specific(CmpLHS))) || - match(Count, m_Intrinsic(m_Specific(CmpLHS)))) { - IntrinsicInst *II = cast(Count); + if (match(ValueOnZero, m_SpecificInt(SizeOfInBits))) { // Explicitly clear the 'undef_on_zero' flag. IntrinsicInst *NewI = cast(II->clone()); NewI->setArgOperand(1, ConstantInt::getFalse(NewI->getContext())); @@ -727,6 +728,12 @@ return Builder.CreateZExtOrTrunc(NewI, ValueOnZero->getType()); } + // If the ValueOnZero is not the bitwidth, we can at least make use of the + // fact that the cttz/ctlz result will not be used if the input is zero, so + // it's okay to relax it to undef for that case. + if (II->hasOneUse() && !match(II->getArgOperand(1), m_One())) + II->setArgOperand(1, ConstantInt::getTrue(II->getContext())); + return nullptr; } Index: llvm/trunk/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll +++ llvm/trunk/test/Transforms/InstCombine/select-cmp-cttz-ctlz.ll @@ -345,7 +345,7 @@ define i32 @test_ctlz_not_bw(i32 %x) { ; CHECK-LABEL: @test_ctlz_not_bw( -; CHECK-NEXT: [[CT:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range !1 +; CHECK-NEXT: [[CT:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true), !range !1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0 ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 123, i32 [[CT]] ; CHECK-NEXT: ret i32 [[RES]] @@ -373,7 +373,7 @@ define i32 @test_cttz_not_bw(i32 %x) { ; CHECK-LABEL: @test_cttz_not_bw( -; CHECK-NEXT: [[CT:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 false), !range !1 +; CHECK-NEXT: [[CT:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true), !range !1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 0 ; CHECK-NEXT: [[RES:%.*]] = select i1 [[CMP]], i32 123, i32 [[CT]] ; CHECK-NEXT: ret i32 [[RES]] @@ -412,7 +412,7 @@ define <2 x i32> @test_ctlz_not_bw_vec(<2 x i32> %x) { ; CHECK-LABEL: @test_ctlz_not_bw_vec( -; CHECK-NEXT: [[CT:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 false) +; CHECK-NEXT: [[CT:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[X:%.*]], i1 true) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[X]], zeroinitializer ; CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> [[CT]] ; CHECK-NEXT: ret <2 x i32> [[RES]] @@ -436,7 +436,7 @@ define <2 x i32> @test_cttz_not_bw_vec(<2 x i32> %x) { ; CHECK-LABEL: @test_cttz_not_bw_vec( -; CHECK-NEXT: [[CT:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 false) +; CHECK-NEXT: [[CT:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[X:%.*]], i1 true) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[X]], zeroinitializer ; CHECK-NEXT: [[RES:%.*]] = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> [[CT]] ; CHECK-NEXT: ret <2 x i32> [[RES]]