Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1237,6 +1237,20 @@ return nullptr; } +static void makeCttzCtlzZeroUndef(InstCombiner &IC, IntrinsicInst *II) { + assert((II->getIntrinsicID() == Intrinsic::cttz || + II->getIntrinsicID() == Intrinsic::ctlz) && + "Expected cttz or ctlz intrinsic"); + bool IsZeroUndef = false; + Value *Op1 = II->getArgOperand(1); + if (auto *Op1C = dyn_cast(Op1)) + IsZeroUndef = Op1C->getZExtValue() != 0; + if (!IsZeroUndef) { + IC.Worklist.Add(II); + II->setOperand(1, ConstantInt::getAllOnesValue(Op1->getType())); + } +} + /// CallInst simplification. This mostly only handles folding of intrinsic /// instructions. For normal calls, it allows visitCallSite to do the heavy /// lifting. @@ -1400,40 +1414,43 @@ case Intrinsic::cttz: { // If all bits below the first known one are known zero, // this value is constant. - IntegerType *IT = dyn_cast(II->getArgOperand(0)->getType()); + Value *Op0 = II->getArgOperand(0); + IntegerType *IT = dyn_cast(Op0->getType()); // FIXME: Try to simplify vectors of integers. if (!IT) break; uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne, 0, II); + computeKnownBits(Op0, KnownZero, KnownOne, 0, II); unsigned TrailingZeros = KnownOne.countTrailingZeros(); APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros)); if ((Mask & KnownZero) == Mask) return replaceInstUsesWith(CI, ConstantInt::get(IT, APInt(BitWidth, TrailingZeros))); - + if (KnownOne != 0 || isKnownNonZero(Op0, DL)) + makeCttzCtlzZeroUndef(*this, II); } break; case Intrinsic::ctlz: { // If all bits above the first known one are known zero, // this value is constant. - IntegerType *IT = dyn_cast(II->getArgOperand(0)->getType()); + Value *Op0 = II->getArgOperand(0); + IntegerType *IT = dyn_cast(Op0->getType()); // FIXME: Try to simplify vectors of integers. if (!IT) break; uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne, 0, II); + computeKnownBits(Op0, KnownZero, KnownOne, 0, II); unsigned LeadingZeros = KnownOne.countLeadingZeros(); APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros)); if ((Mask & KnownZero) == Mask) return replaceInstUsesWith(CI, ConstantInt::get(IT, APInt(BitWidth, LeadingZeros))); - + if (KnownOne != 0 || isKnownNonZero(Op0, DL)) + makeCttzCtlzZeroUndef(*this, II); } break; - case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: case Intrinsic::umul_with_overflow: Index: test/Transforms/InstCombine/intrinsics.ll =================================================================== --- test/Transforms/InstCombine/intrinsics.ll +++ test/Transforms/InstCombine/intrinsics.ll @@ -384,6 +384,16 @@ ; CHECK-NEXT: ret i32 undef } +define i32 @ctlz_make_undef(i32 %a) { + %or = or i32 %a, 8 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %or, i1 false) + ret i32 %ctlz +; CHECK-LABEL: @ctlz_make_undef( +; CHECK-NEXT: %or = or i32 %a, 8 +; CHECK-NEXT: %ctlz = tail call i32 @llvm.ctlz.i32(i32 %or, i1 true) +; CHECK-NEXT: ret i32 %ctlz +} + define i32 @cttz_undef(i32 %Value) nounwind { %cttz = call i32 @llvm.cttz.i32(i32 0, i1 true) ret i32 %cttz @@ -392,6 +402,18 @@ ; CHECK-NEXT: ret i32 undef } +define i32 @cttz_make_undef(i32 %a) { +entry: + %or = or i32 %a, 8 + %cttz = tail call i32 @llvm.cttz.i32(i32 %or, i1 false) + ret i32 %cttz +; CHECK-LABEL: @cttz_make_undef( +; CHECK-NEXT: entry: +; CHECK-NEXT: %or = or i32 %a, 8 +; CHECK-NEXT: %cttz = tail call i32 @llvm.cttz.i32(i32 %or, i1 true) +; CHECK-NEXT: ret i32 %cttz +} + define i32 @ctlz_select(i32 %Value) nounwind { %tobool = icmp ne i32 %Value, 0 %ctlz = call i32 @llvm.ctlz.i32(i32 %Value, i1 true)