Diff 43461

lib/Transforms/InstCombine/InstCombineAddSub.cpp

Show First 20 Lines • Show All 1,041 Lines • ▼ Show 20 Lines	if (match(LHS, m_Xor(m_Value(Y), m_APInt(C1))))
if (C1->countTrailingZeros() == 0)		if (C1->countTrailingZeros() == 0)
if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && C1 == (C2 + 1)) {		if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && C1 == (C2 + 1)) {
Value NewOr = Builder->CreateOr(Z, ~(C2));		Value NewOr = Builder->CreateOr(Z, ~(C2));
return Builder->CreateSub(RHS, NewOr, "sub");		return Builder->CreateSub(RHS, NewOr, "sub");
}		}
return nullptr;		return nullptr;
}		}

		// Check if BI matches a ctpop calculation pattern for a value of width BW
		// bits. If so, return the argument V such that ctpop(V) would be a candidate
		// for replacing BI. If BW is less than the bit-width of the type of V, then
		// BI == ctpop(V) if the bits of V beyond BW are zero.
		// The matched pattern is:
		// x0 := (V & 0x55..5) + ((V>>1) & 0x55..5)
		// x1 := (x0 & 0x33..3) + ((x0>>2) & 0x33..3)
		// ...
		// xn := (xn-1 & 0x00..0FF..F) + ((xn-1>>S/2) & 0x00..0FF..F)
		// where xn is the candidate for ctpop(V).
		static Value matchCtpopW(BinaryOperator BI, unsigned BW) {
		hfinkelUnsubmitted Done Reply Inline Actions I think it would be better if the comment above matchCtpopW explained what matchCtpopW does, and a comment above optimizeToCtpop explained what optimizeToCtpop does. The: // ... If BW is less than the bit-width of the type of V, then // BI == ctpop(V) if the bits of V beyond BW are zero. is a bit confusing here because it is describing behavior implemented in a different function. hfinkel: I think it would be better if the comment above matchCtpopW explained what matchCtpopW does…
		auto matchStep = [] (Value *V, unsigned S, APInt M, bool ShiftAlone)
		-> Value* {
		Value Op0 = nullptr, Op1 = nullptr;
		if (!match(V, m_Add(m_Value(Op0), m_Value(Op1))))
		return nullptr;

		auto matchAndShift = [S,M,ShiftAlone] (Value V0, Value V1) -> Value* {
		Value *V = nullptr;
		const APInt *P = &M;
		auto Mask = m_APInt(P);
		auto Shift = m_SpecificInt(S);

		if (!match(V0, m_And(m_Value(V), Mask)))
		return nullptr;
		if (ShiftAlone) {
		if (!match(V1, m_LShr(m_Specific(V), Shift)))
		return nullptr;
		} else {
		if (!match(V1, m_And(m_LShr(m_Specific(V), Shift), Mask)))
		return nullptr;
		}
		return V;
		};

		if (Value *T = matchAndShift(Op0, Op1))
		return T;
		if (Value *T = matchAndShift(Op1, Op0))
		return T;
		return nullptr;
		};

		// Generate the bitmask for the & operation. BW is the bit-width of the
		// entire mask. The masks are:
		// 0b01010101..01010101 0x55..55 1 bit every 2 bits
		// 0b00110011..00110011 0x33..35 2 bits every 4 bits
		// 0b00000111..00000111 0x07..07 3 bits every 8 bits
		// ... ... logS bits every S bits
		// Normally the masks would be 01010101, 00110011, 00001111, i.e. the
		// number of contiguous 1 bits in each group would be twice the number
		// in the previous mask, but by the time this code runs, the "demanded"
		// bits have been optimized to only require one more 1 bit in each
		// subsequent mask. This function generates the post-optimized masks.
		auto getMask = [] (unsigned S, unsigned BW) -> APInt {
		assert(isPowerOf2_32(S));
		APInt M(BW, S-1);
		APInt T(BW, 0);
		while (M != 0) {
		T \|= M;
		M <<= S;
		}
		return T;
		};

		Value *V = BI;
		bool SA = true;
		unsigned N = BW;
		while (N > 1) {
		unsigned S = N/2;
		V = matchStep(V, S, getMask(N, BW), SA);
		if (!V)
		return nullptr;
		N = S;
		SA = false;
		}

		return V;
		}

		static Value optimizeToCtpop(BinaryOperator BI,
		InstCombiner::BuilderTy *Builder) {
		IntegerType *Ty = dyn_cast<IntegerType>(BI->getType());
		if (!Ty)
		return nullptr;

		// Take the first shift amount feeding the add, and assume this is the
		// last shift in the popcnt computation.
		Value Op0 = nullptr, Op1 = nullptr;
		if (!match(BI, m_Add(m_Value(Op0), m_Value(Op1))))
		return nullptr;

		// Shift by half-width.
		uint64_t SH = 0;
		if (!match(Op0, m_And(m_Value(), m_LShr(m_Value(), m_ConstantInt(SH)))) &&
		!match(Op1, m_And(m_Value(), m_LShr(m_Value(), m_ConstantInt(SH)))) &&
		!match(Op0, m_LShr(m_Value(), m_ConstantInt(SH))) &&
		!match(Op1, m_LShr(m_Value(), m_ConstantInt(SH))))
		return nullptr;

		if (SH < 4 \|\| !isPowerOf2_64(SH))
		return nullptr;

		Value V = matchCtpopW(BI, 2SH);
		if (!V)
		return nullptr;

		Module *M = Builder->GetInsertBlock()->getParent()->getParent();
		unsigned TW = Ty->getBitWidth(), BW = 2*SH;
		if (BW < TW) {
		hfinkelUnsubmitted Not Done Reply Inline Actions Instead of checking for known zero bits, why not transform this into a ctpop(v & mask)? If the upper bits are known to be zero, then the mask will go away in the next instcombine iteration regardless. hfinkel: Instead of checking for known zero bits, why not transform this into a ctpop(v & mask)? If the…
		kparzyszAuthorUnsubmitted Not Done Reply Inline Actions Because then the pattern is not really computing the population count. The instructions in the pattern operate on the whole value, including the high bits, and these bits contribute to the final result. kparzysz: Because then the pattern is not really computing the population count. The instructions in the…
		// BW is the bit width of the expression whose population count is
		// being calculated. TW is the bit width of the type associated with
		// that expression. Usually they are the same, but for ctpop8 the
		// type may be "unsigned", i.e. 32-bit, while the ctpop8 would only
		// consider the low 8 bits. In that case BW=8 and TW=32.
		APInt K0(TW, 0), K1(TW, 0);
		computeKnownBits(V, K0, K1, M->getDataLayout());
		APInt Need0 = APInt::getBitsSet(TW, BW, TW);
		if ((K0 & Need0) != Need0)
		return nullptr;
		}

		Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctpop, {V->getType()});
		CallInst *CI = Builder->CreateCall(Func, {V});
		CI->setDebugLoc(BI->getDebugLoc());
		return CI;
		}

Instruction *InstCombiner::visitAdd(BinaryOperator &I) {		Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);		bool Changed = SimplifyAssociativeOrCommutative(I);
Value LHS = I.getOperand(0), RHS = I.getOperand(1);		Value LHS = I.getOperand(0), RHS = I.getOperand(1);

if (Value *V = SimplifyVectorOp(I))		if (Value *V = SimplifyVectorOp(I))
return ReplaceInstUsesWith(I, V);		return ReplaceInstUsesWith(I, V);

		majnemerUnsubmitted Done Reply Inline Actions Seeing as how this is a rather rare occurrence for adds, can you sort this to the bottom? The `Simplify...` naming convention implies that no new instruction needs to be created which is not the case for this transform. I'd call it `optimizeCtpopIdiom` or something similar. majnemer: Seeing as how this is a rather rare occurrence for adds, can you sort this to the bottom? The…
if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),		if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
I.hasNoUnsignedWrap(), DL, TLI, DT, AC))		I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
return ReplaceInstUsesWith(I, V);		return ReplaceInstUsesWith(I, V);

// (AB)+(AC) -> A*(B+C) etc		// (AB)+(AC) -> A*(B+C) etc
if (Value *V = SimplifyUsingDistributiveLaws(I))		if (Value *V = SimplifyUsingDistributiveLaws(I))
return ReplaceInstUsesWith(I, V);		return ReplaceInstUsesWith(I, V);

▲ Show 20 Lines • Show All 225 Lines • ▼ Show 20 Lines	if (match(LHS, m_Or(m_Value(A), m_Value(B))) &&
match(RHS, m_And(m_Specific(B), m_Specific(A))))) {		match(RHS, m_And(m_Specific(B), m_Specific(A))))) {
auto *New = BinaryOperator::CreateAdd(A, B);		auto *New = BinaryOperator::CreateAdd(A, B);
New->setHasNoSignedWrap(I.hasNoSignedWrap());		New->setHasNoSignedWrap(I.hasNoSignedWrap());
New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());		New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
return New;		return New;
}		}
}		}

		if (Value *V = optimizeToCtpop(&I, Builder))
		return ReplaceInstUsesWith(I, V);

// TODO(jingyue): Consider WillNotOverflowSignedAdd and		// TODO(jingyue): Consider WillNotOverflowSignedAdd and
// WillNotOverflowUnsignedAdd to reduce the number of invocations of		// WillNotOverflowUnsignedAdd to reduce the number of invocations of
// computeKnownBits.		// computeKnownBits.
if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, I)) {		if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS, I)) {
Changed = true;		Changed = true;
I.setHasNoSignedWrap(true);		I.setHasNoSignedWrap(true);
}		}
if (!I.hasNoUnsignedWrap() &&		if (!I.hasNoUnsignedWrap() &&
▲ Show 20 Lines • Show All 178 Lines • ▼ Show 20 Lines	Value InstCombiner::OptimizePointerDifference(Value LHS, Value *RHS,

// If we have p - gep(p, ...) then we have to negate the result.		// If we have p - gep(p, ...) then we have to negate the result.
if (Swapped)		if (Swapped)
Result = Builder->CreateNeg(Result, "diff.neg");		Result = Builder->CreateNeg(Result, "diff.neg");

return Builder->CreateIntCast(Result, Ty, true);		return Builder->CreateIntCast(Result, Ty, true);
}		}

		static Value optimizeToCtlz(BinaryOperator BI,
		InstCombiner::BuilderTy *Builder) {
		// Let bw = bitwidth(n),
		// convert
		// n = n \| (n>>1)
		// n = n \| (n>>2)
		// n = n \| (n>>4)
		// ...
		// n = n \| (n>>bw/2)
		// bw - ctpop(n)
		// to
		// ctlz(n).
		// This code expects that the ctpop intrinsic has already been generated.

		uint64_t BW = 0;
		if (!match(BI, m_Sub(m_ConstantInt(BW), m_Intrinsic<Intrinsic::ctpop>())))
		return nullptr;
		// Get the argument of the ctpop.
		Value *V = cast<User>(BI->getOperand(1))->getOperand(0);

		// The argument to ctpop can be zero-extended in some cases. It is safe
		// to ignore the zext.
		if (auto *Z = dyn_cast<ZExtInst>(V))
		V = Z->getOperand(0);

		IntegerType *Ty = cast<IntegerType>(V->getType());
		if (BW < Ty->getBitWidth())
		return nullptr;

		auto matchOrShift = [] (Value V, unsigned S) -> Value {
		Value Op0 = nullptr, Op1 = nullptr;
		if (!match(V, m_Or(m_Value(Op0), m_Value(Op1))))
		return nullptr;
		if (match(Op0, m_LShr(m_Specific(Op1), m_SpecificInt(S))))
		return Op1;
		if (match(Op1, m_LShr(m_Specific(Op0), m_SpecificInt(S))))
		return Op0;
		return nullptr;
		};

		unsigned N = BW;
		while (N > 1) {
		N /= 2;
		V = matchOrShift(V, N);
		if (!V)
		return nullptr;
		}

		// The value of BW is the one that determines the type of ctlz's argument.
		if (BW > Ty->getBitWidth()) {
		IntegerType *ATy = IntegerType::get(BI->getContext(), BW);
		V = Builder->CreateZExt(V, ATy);
		}
		Module *M = Builder->GetInsertBlock()->getParent()->getParent();
		Value *Func = Intrinsic::getDeclaration(M, Intrinsic::ctlz, {V->getType()});
		Value *False = ConstantInt::getFalse(BI->getContext());
		CallInst *CI = Builder->CreateCall(Func, {V, False});
		CI->setDebugLoc(BI->getDebugLoc());
		if (BI->getType() != CI->getType())
		return Builder->CreateZExt(CI, BI->getType());
		return CI;
		}

Instruction *InstCombiner::visitSub(BinaryOperator &I) {		Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Value Op0 = I.getOperand(0), Op1 = I.getOperand(1);		Value Op0 = I.getOperand(0), Op1 = I.getOperand(1);

if (Value *V = SimplifyVectorOp(I))		if (Value *V = SimplifyVectorOp(I))
return ReplaceInstUsesWith(I, V);		return ReplaceInstUsesWith(I, V);

if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(),		if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(),
I.hasNoUnsignedWrap(), DL, TLI, DT, AC))		I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
return ReplaceInstUsesWith(I, V);		return ReplaceInstUsesWith(I, V);

// (AB)-(AC) -> A*(B-C) etc		// (AB)-(AC) -> A*(B-C) etc
▲ Show 20 Lines • Show All 167 Lines • ▼ Show 20 Lines	if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
return ReplaceInstUsesWith(I, Res);		return ReplaceInstUsesWith(I, Res);

// trunc(p)-trunc(q) -> trunc(p-q)		// trunc(p)-trunc(q) -> trunc(p-q)
if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&		if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))		match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))		if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
return ReplaceInstUsesWith(I, Res);		return ReplaceInstUsesWith(I, Res);

		if (Value *V = optimizeToCtlz(&I, Builder))
		return ReplaceInstUsesWith(I, V);

bool Changed = false;		bool Changed = false;
if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, I)) {		if (!I.hasNoSignedWrap() && WillNotOverflowSignedSub(Op0, Op1, I)) {
Changed = true;		Changed = true;
I.setHasNoSignedWrap(true);		I.setHasNoSignedWrap(true);
}		}
if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, I)) {		if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedSub(Op0, Op1, I)) {
Changed = true;		Changed = true;
I.setHasNoUnsignedWrap(true);		I.setHasNoUnsignedWrap(true);
▲ Show 20 Lines • Show All 58 Lines • Show Last 20 Lines

test/Transforms/InstCombine/ctlz-match.ll

This file was added.

				; RUN: opt -instcombine -S < %s \| FileCheck %s

				; unsigned ctlz16(unsigned short t0) {
				; t0 = t0 \| (t0>>1);
				; t0 = t0 \| (t0>>2);
				; t0 = t0 \| (t0>>4);
				; t0 = t0 \| (t0>>8);
				; unsigned t1 = (t0 & 0x5555) + ((t0>>1) & 0x5555);
				; unsigned t2 = (t1 & 0x3333) + ((t1>>2) & 0x3333);
				; unsigned t3 = (t2 & 0x0F0F) + ((t2>>4) & 0x0F0F);
				; unsigned t4 = (t3 & 0x00FF) + ((t3>>8) & 0x00FF);
				; return 16-t4;
				; }
				;
				; CHECK: define i32 @ctlz16
				; CHECK: [[V0:%[a-zA-Z0-9_]+]] = call i16 @llvm.ctlz.i16(i16 %t0, i1 false)
				; CHECK: zext i16 [[V0]] to i32
				define i32 @ctlz16(i16 zeroext %t0) #0 {
				entry:
				%conv = zext i16 %t0 to i32
				%shr = ashr i32 %conv, 1
				%or = or i32 %conv, %shr
				%conv2 = trunc i32 %or to i16
				%conv3 = zext i16 %conv2 to i32
				%shr5 = ashr i32 %conv3, 2
				%or6 = or i32 %conv3, %shr5
				%conv7 = trunc i32 %or6 to i16
				%conv8 = zext i16 %conv7 to i32
				%shr10 = ashr i32 %conv8, 4
				%or11 = or i32 %conv8, %shr10
				%conv12 = trunc i32 %or11 to i16
				%conv13 = zext i16 %conv12 to i32
				%shr15 = ashr i32 %conv13, 8
				%or16 = or i32 %conv13, %shr15
				%conv17 = trunc i32 %or16 to i16
				%conv18 = zext i16 %conv17 to i32
				%and = and i32 %conv18, 21845
				%shr20 = ashr i32 %conv18, 1
				%and21 = and i32 %shr20, 21845
				%add = add nsw i32 %and, %and21
				%and22 = and i32 %add, 13107
				%shr23 = lshr i32 %add, 2
				%and24 = and i32 %shr23, 13107
				%add25 = add i32 %and22, %and24
				%and26 = and i32 %add25, 3855
				%shr27 = lshr i32 %add25, 4
				%and28 = and i32 %shr27, 3855
				%add29 = add i32 %and26, %and28
				%and30 = and i32 %add29, 255
				%shr31 = lshr i32 %add29, 8
				%and32 = and i32 %shr31, 255
				%add33 = add i32 %and30, %and32
				%sub = sub i32 16, %add33
				ret i32 %sub
				}


				; unsigned ctlz32(unsigned t0) {
				; t0 = t0 \| (t0>>1);
				; t0 = t0 \| (t0>>2);
				; t0 = t0 \| (t0>>4);
				; t0 = t0 \| (t0>>8);
				; t0 = t0 \| (t0>>16);
				; unsigned t1 = (t0 & 0x55555555) + ((t0>>1) & 0x55555555);
				; unsigned t2 = (t1 & 0x33333333) + ((t1>>2) & 0x33333333);
				; unsigned t3 = (t2 & 0x0F0F0F0F) + ((t2>>4) & 0x0F0F0F0F);
				; unsigned t4 = (t3 & 0x00FF00FF) + ((t3>>8) & 0x00FF00FF);
				; unsigned t5 = (t4 & 0x0000FFFF) + ((t4>>16) & 0x0000FFFF);
				; return 32-t5;
				; }
				;
				; CHECK: define i32 @ctlz32
				; CHECK: @llvm.ctlz.i32(i32 %t0, i1 false)
				define i32 @ctlz32(i32 %t0) #0 {
				entry:
				%shr = lshr i32 %t0, 1
				%or = or i32 %t0, %shr
				%shr1 = lshr i32 %or, 2
				%or2 = or i32 %or, %shr1
				%shr3 = lshr i32 %or2, 4
				%or4 = or i32 %or2, %shr3
				%shr5 = lshr i32 %or4, 8
				%or6 = or i32 %or4, %shr5
				%shr7 = lshr i32 %or6, 16
				%or8 = or i32 %or6, %shr7
				%and = and i32 %or8, 1431655765
				%shr9 = lshr i32 %or8, 1
				%and10 = and i32 %shr9, 1431655765
				%add = add i32 %and, %and10
				%and11 = and i32 %add, 858993459
				%shr12 = lshr i32 %add, 2
				%and13 = and i32 %shr12, 858993459
				%add14 = add i32 %and11, %and13
				%and15 = and i32 %add14, 252645135
				%shr16 = lshr i32 %add14, 4
				%and17 = and i32 %shr16, 252645135
				%add18 = add i32 %and15, %and17
				%and19 = and i32 %add18, 16711935
				%shr20 = lshr i32 %add18, 8
				%and21 = and i32 %shr20, 16711935
				%add22 = add i32 %and19, %and21
				%and23 = and i32 %add22, 65535
				%shr24 = lshr i32 %add22, 16
				%and25 = and i32 %shr24, 65535
				%add26 = add i32 %and23, %and25
				%sub = sub i32 32, %add26
				ret i32 %sub
				}


				; typedef unsigned long long u64_t;
				; u64_t ctlz64(u64_t t0) {
				; t0 = t0 \| (t0>>1);
				; t0 = t0 \| (t0>>2);
				; t0 = t0 \| (t0>>4);
				; t0 = t0 \| (t0>>8);
				; t0 = t0 \| (t0>>16);
				; t0 = t0 \| (t0>>32);
				; u64_t t1 = (t0 & 0x5555555555555555LL) + ((t0>>1) & 0x5555555555555555LL);
				; u64_t t2 = (t1 & 0x3333333333333333LL) + ((t1>>2) & 0x3333333333333333LL);
				; u64_t t3 = (t2 & 0x0F0F0F0F0F0F0F0FLL) + ((t2>>4) & 0x0F0F0F0F0F0F0F0FLL);
				; u64_t t4 = (t3 & 0x00FF00FF00FF00FFLL) + ((t3>>8) & 0x00FF00FF00FF00FFLL);
				; u64_t t5 = (t4 & 0x0000FFFF0000FFFFLL) + ((t4>>16) & 0x0000FFFF0000FFFFLL);
				; u64_t t6 = (t5 & 0x00000000FFFFFFFFLL) + ((t5>>32) & 0x00000000FFFFFFFFLL);
				; return 64-t6;
				; }
				;
				; CHECK: define i64 @ctlz64
				; CHECK: @llvm.ctlz.i64(i64 %t0, i1 false)
				define i64 @ctlz64(i64 %t0) #0 {
				entry:
				%shr = lshr i64 %t0, 1
				%or = or i64 %t0, %shr
				%shr1 = lshr i64 %or, 2
				%or2 = or i64 %or, %shr1
				%shr3 = lshr i64 %or2, 4
				%or4 = or i64 %or2, %shr3
				%shr5 = lshr i64 %or4, 8
				%or6 = or i64 %or4, %shr5
				%shr7 = lshr i64 %or6, 16
				%or8 = or i64 %or6, %shr7
				%shr9 = lshr i64 %or8, 32
				%or10 = or i64 %or8, %shr9
				%and = and i64 %or10, 6148914691236517205
				%shr11 = lshr i64 %or10, 1
				%and12 = and i64 %shr11, 6148914691236517205
				%add = add i64 %and, %and12
				%and13 = and i64 %add, 3689348814741910323
				%shr14 = lshr i64 %add, 2
				%and15 = and i64 %shr14, 3689348814741910323
				%add16 = add i64 %and13, %and15
				%and17 = and i64 %add16, 1085102592571150095
				%shr18 = lshr i64 %add16, 4
				%and19 = and i64 %shr18, 1085102592571150095
				%add20 = add i64 %and17, %and19
				%and21 = and i64 %add20, 71777214294589695
				%shr22 = lshr i64 %add20, 8
				%and23 = and i64 %shr22, 71777214294589695
				%add24 = add i64 %and21, %and23
				%and25 = and i64 %add24, 281470681808895
				%shr26 = lshr i64 %add24, 16
				%and27 = and i64 %shr26, 281470681808895
				%add28 = add i64 %and25, %and27
				%and29 = and i64 %add28, 4294967295
				%shr30 = lshr i64 %add28, 32
				%and31 = and i64 %shr30, 4294967295
				%add32 = add i64 %and29, %and31
				%sub = sub i64 64, %add32
				ret i64 %sub
				}

				attributes #0 = { nounwind }

test/Transforms/InstCombine/ctpop-match.ll

This file was added.

				; RUN: opt -instcombine -S < %s \| FileCheck %s

				; unsigned pop8(unsigned char t0) {
				; unsigned t1 = (t0 & 0x55) + ((t0>>1) & 0x55);
				; unsigned t2 = (t1 & 0x33) + ((t1>>2) & 0x33);
				; unsigned t3 = (t2 & 0x0F) + ((t2>>4) & 0x0F);
				; return t3;
				; }
				;
				; CHECK: define i32 @pop8
				; CHECK: [[ARG8:%[a-zA-Z0-9_]+]] = zext i8 %t0 to i32
				; CHECK: @llvm.ctpop.i32(i32 [[ARG8]])
				define i32 @pop8(i8 zeroext %t0) #0 {
				entry:
				%conv = zext i8 %t0 to i32
				%and = and i32 %conv, 85
				%shr = ashr i32 %conv, 1
				%and2 = and i32 %shr, 85
				%add = add nsw i32 %and, %and2
				%and3 = and i32 %add, 51
				%shr4 = lshr i32 %add, 2
				%and5 = and i32 %shr4, 51
				%add6 = add i32 %and3, %and5
				%and7 = and i32 %add6, 15
				%shr8 = lshr i32 %add6, 4
				%and9 = and i32 %shr8, 15
				%add10 = add i32 %and7, %and9
				ret i32 %add10
				}


				; unsigned pop16(unsigned short t0) {
				; unsigned t1 = (t0 & 0x5555) + ((t0>>1) & 0x5555);
				; unsigned t2 = (t1 & 0x3333) + ((t1>>2) & 0x3333);
				; unsigned t3 = (t2 & 0x0F0F) + ((t2>>4) & 0x0F0F);
				; unsigned t4 = (t3 & 0x00FF) + ((t3>>8) & 0x00FF);
				; return t4;
				; }
				;
				; CHECK: define i32 @pop16
				; CHECK: [[ARG16:%[a-zA-Z0-9_]+]] = zext i16 %t0 to i32
				; CHECK: @llvm.ctpop.i32(i32 [[ARG16]])
				define i32 @pop16(i16 zeroext %t0) #0 {
				entry:
				%conv = zext i16 %t0 to i32
				%and = and i32 %conv, 21845
				%shr = ashr i32 %conv, 1
				%and2 = and i32 %shr, 21845
				%add = add nsw i32 %and, %and2
				%and3 = and i32 %add, 13107
				%shr4 = lshr i32 %add, 2
				%and5 = and i32 %shr4, 13107
				%add6 = add i32 %and3, %and5
				%and7 = and i32 %add6, 3855
				%shr8 = lshr i32 %add6, 4
				%and9 = and i32 %shr8, 3855
				%add10 = add i32 %and7, %and9
				majnemerUnsubmitted Done Reply Inline Actions Please CHECK a little more thoroughly, it would be nice to see the call to ctpop consume the argument, etc. majnemer: Please CHECK a little more thoroughly, it would be nice to see the call to ctpop consume the…
				%and11 = and i32 %add10, 255
				%shr12 = lshr i32 %add10, 8
				%and13 = and i32 %shr12, 255
				%add14 = add i32 %and11, %and13
				ret i32 %add14
				}


				; unsigned pop32(unsigned t0) {
				; unsigned t1 = (t0 & 0x55555555) + ((t0>>1) & 0x55555555);
				; unsigned t2 = (t1 & 0x33333333) + ((t1>>2) & 0x33333333);
				; unsigned t3 = (t2 & 0x0F0F0F0F) + ((t2>>4) & 0x0F0F0F0F);
				; unsigned t4 = (t3 & 0x00FF00FF) + ((t3>>8) & 0x00FF00FF);
				; unsigned t5 = (t4 & 0x0000FFFF) + ((t4>>16) & 0x0000FFFF);
				; return t5;
				; }
				;
				; CHECK: define i32 @pop32
				; CHECK: @llvm.ctpop.i32(i32 %t0)
				define i32 @pop32(i32 %t0) #0 {
				entry:
				%and = and i32 %t0, 1431655765
				%shr = lshr i32 %t0, 1
				%and1 = and i32 %shr, 1431655765
				%add = add i32 %and, %and1
				%and2 = and i32 %add, 858993459
				%shr3 = lshr i32 %add, 2
				%and4 = and i32 %shr3, 858993459
				%add5 = add i32 %and2, %and4
				%and6 = and i32 %add5, 252645135
				%shr7 = lshr i32 %add5, 4
				%and8 = and i32 %shr7, 252645135
				%add9 = add i32 %and6, %and8
				%and10 = and i32 %add9, 16711935
				%shr11 = lshr i32 %add9, 8
				%and12 = and i32 %shr11, 16711935
				%add13 = add i32 %and10, %and12
				%and14 = and i32 %add13, 65535
				%shr15 = lshr i32 %add13, 16
				%and16 = and i32 %shr15, 65535
				majnemerUnsubmitted Done Reply Inline Actions This test case doesn't look reduced. majnemer: This test case doesn't look reduced.
				%add17 = add i32 %and14, %and16
				ret i32 %add17
				}


				; typedef unsigned long long u64_t;
				; u64_t pop64(u64_t t0) {
				; u64_t t1 = (t0 & 0x5555555555555555LL) + ((t0>>1) & 0x5555555555555555LL);
				; u64_t t2 = (t1 & 0x3333333333333333LL) + ((t1>>2) & 0x3333333333333333LL);
				; u64_t t3 = (t2 & 0x0F0F0F0F0F0F0F0FLL) + ((t2>>4) & 0x0F0F0F0F0F0F0F0FLL);
				; u64_t t4 = (t3 & 0x00FF00FF00FF00FFLL) + ((t3>>8) & 0x00FF00FF00FF00FFLL);
				; u64_t t5 = (t4 & 0x0000FFFF0000FFFFLL) + ((t4>>16) & 0x0000FFFF0000FFFFLL);
				; u64_t t6 = (t5 & 0x00000000FFFFFFFFLL) + ((t5>>32) & 0x00000000FFFFFFFFLL);
				; return t6;
				; }
				;
				; CHECK: define i64 @pop64
				; CHECK: @llvm.ctpop.i64(i64 %t0)
				define i64 @pop64(i64 %t0) #0 {
				entry:
				%and = and i64 %t0, 6148914691236517205
				%shr = lshr i64 %t0, 1
				%and1 = and i64 %shr, 6148914691236517205
				%add = add i64 %and, %and1
				%and2 = and i64 %add, 3689348814741910323
				%shr3 = lshr i64 %add, 2
				%and4 = and i64 %shr3, 3689348814741910323
				%add5 = add i64 %and2, %and4
				%and6 = and i64 %add5, 1085102592571150095
				%shr7 = lshr i64 %add5, 4
				%and8 = and i64 %shr7, 1085102592571150095
				%add9 = add i64 %and6, %and8
				%and10 = and i64 %add9, 71777214294589695
				%shr11 = lshr i64 %add9, 8
				%and12 = and i64 %shr11, 71777214294589695
				%add13 = add i64 %and10, %and12
				%and14 = and i64 %add13, 281470681808895
				%shr15 = lshr i64 %add13, 16
				%and16 = and i64 %shr15, 281470681808895
				%add17 = add i64 %and14, %and16
				%and18 = and i64 %add17, 4294967295
				%shr19 = lshr i64 %add17, 32
				%and20 = and i64 %shr19, 4294967295
				%add21 = add i64 %and18, %and20
				ret i64 %add21
				}

				attributes #0 = { nounwind }

This is an archive of the discontinued LLVM Phabricator instance.

Recognize pattern for ctpop in instcombine
AbandonedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 43461

lib/Transforms/InstCombine/InstCombineAddSub.cpp

test/Transforms/InstCombine/ctlz-match.ll

test/Transforms/InstCombine/ctpop-match.ll

This is an archive of the discontinued LLVM Phabricator instance.

Recognize pattern for ctpop in instcombineAbandonedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 43461

lib/Transforms/InstCombine/InstCombineAddSub.cpp

test/Transforms/InstCombine/ctlz-match.ll

test/Transforms/InstCombine/ctpop-match.ll

Recognize pattern for ctpop in instcombine
AbandonedPublic