Index: llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -546,6 +546,62 @@ } } +// Try to recognize below function as popcount intrinsic. This comes from +// Hacker's Delight, Chapter 5. +// +// int popcount(unsigned int i) { +// i = i - ((i >> 1) & 0x55555555); +// i = (i & 0x33333333) + ((i >> 2) & 0x33333333); +// i = ((i + (i >> 4)) & 0x0F0F0F0F); +// return (i * 0x01010101) >> 24; +// } +static Instruction *tryToRecognizePopCount(BinaryOperator &I) { + if (I.getOpcode() != Instruction::LShr) + return nullptr; + + if (!I.getType()->isIntegerTy(32) && !I.getType()->isIntegerTy(64)) + return nullptr; + + bool IsType32 = I.getType()->isIntegerTy(32); + uint16_t FinalShiftConst = IsType32 ? 24 : 56; + uint64_t MulConst = IsType32 ? 0x01010101 : 0x0101010101010101; + Value *Op0 = I.getOperand(0); + Value *Op1 = I.getOperand(1); + Value *MulOp0; + // Matching "(i * 0x01010101) >> 24". + if ((match(Op0, m_Mul(m_Value(MulOp0), m_SpecificInt(MulConst)))) && + match(Op1, m_SpecificInt(FinalShiftConst))) { + uint64_t Shift4Const = IsType32 ? 0x0f0f0f0f : 0x0f0f0f0f0f0f0f0fULL; + Value *Shift4Op0; + // Matching "((i + (i >> 4)) & 0x0F0F0F0F)". + if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(Shift4Op0), m_SpecificInt(4)), + m_Deferred(Shift4Op0)), + m_SpecificInt(Shift4Const)))) { + uint64_t Shift2Const = IsType32 ? 0x33333333 : 0x3333333333333333ULL; + Value *And2Op0; + // Matching "(i & 0x33333333) + ((i >> 2) & 0x33333333)". + if (match(Shift4Op0, + m_c_Add(m_And(m_Value(And2Op0), m_SpecificInt(Shift2Const)), + m_And(m_LShr(m_Deferred(And2Op0), m_SpecificInt(2)), + m_SpecificInt(Shift2Const))))) { + uint64_t Shift1Const = IsType32 ? 0x55555555 : 0x5555555555555555ULL; + Value *Root, *SubOp1; + // Matching "i - ((i >> 1) & 0x55555555)". + if (match(And2Op0, m_Sub(m_Value(Root), m_Value(SubOp1))) && + match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)), + m_SpecificInt(Shift1Const)))) { + LLVM_DEBUG(dbgs() << "Recognize popcount intrinsic\n"); + Function *Func = Intrinsic::getDeclaration( + I.getModule(), Intrinsic::ctpop, I.getType()); + return IntrinsicInst::Create(Func, Root); + } + } + } + } + + return nullptr; +} + Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1, BinaryOperator &I) { bool isLeftShift = I.getOpcode() == Instruction::Shl; @@ -912,6 +968,9 @@ if (Instruction *X = foldVectorBinop(I)) return X; + if (Instruction *X = tryToRecognizePopCount(I)) + return X; + if (Instruction *R = commonShiftTransforms(I)) return R; Index: llvm/test/Transforms/InstCombine/popcount.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/popcount.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -instcombine -S | FileCheck %s + +;int popcount32(unsigned i) { +; i = i - ((i >> 1) & 0x55555555); +; i = (i & 0x33333333) + ((i >> 2) & 0x33333333); +; i = ((i + (i >> 4)) & 0x0F0F0F0F); +; return (i * 0x01010101) >> 24; +;} +define signext i32 @popcount32(i32 zeroext %0) { +; CHECK-LABEL: @popcount32( +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.ctpop.i32(i32 [[TMP0:%.*]]), !range !0 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %2 = lshr i32 %0, 1 + %3 = and i32 %2, 1431655765 + %4 = sub i32 %0, %3 + %5 = and i32 %4, 858993459 + %6 = lshr i32 %4, 2 + %7 = and i32 %6, 858993459 + %8 = add nuw nsw i32 %7, %5 + %9 = lshr i32 %8, 4 + %10 = add nuw nsw i32 %9, %8 + %11 = and i32 %10, 252645135 + %12 = mul i32 %11, 16843009 + %13 = lshr i32 %12, 24 + ret i32 %13 +} + +;int popcount64(unsigned long long i) { +; i = i - ((i >> 1) & 0x5555555555555555); +; i = (i & 0x3333333333333333) + ((i >> 2) & 0x3333333333333333); +; i = ((i + (i >> 4)) & 0x0F0F0F0F0F0F0F0F); +; return (i * 0x0101010101010101) >> 56; +;} +define signext i32 @popcount64(i64 %0) { +; CHECK-LABEL: @popcount64( +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP0:%.*]]), !range !1 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: ret i32 [[TMP3]] +; + %2 = lshr i64 %0, 1 + %3 = and i64 %2, 6148914691236517205 + %4 = sub i64 %0, %3 + %5 = and i64 %4, 3689348814741910323 + %6 = lshr i64 %4, 2 + %7 = and i64 %6, 3689348814741910323 + %8 = add nuw nsw i64 %7, %5 + %9 = lshr i64 %8, 4 + %10 = add nuw nsw i64 %9, %8 + %11 = and i64 %10, 1085102592571150095 + %12 = mul i64 %11, 72340172838076673 + %13 = lshr i64 %12, 56 + %14 = trunc i64 %13 to i32 + ret i32 %14 +}