Diff 43894

lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

Show First 20 Lines • Show All 1,587 Lines • ▼ Show 20 Lines
/// OverallLeftShift value would be -16 on entry. This is used to specify which		/// OverallLeftShift value would be -16 on entry. This is used to specify which
/// bits of BitValues are actually being set.		/// bits of BitValues are actually being set.
///		///
/// Similarly, BitMask is a bitmask where a bit is clear if its corresponding		/// Similarly, BitMask is a bitmask where a bit is clear if its corresponding
/// bit is masked to zero by a user. For example, in (X & 255), X will be		/// bit is masked to zero by a user. For example, in (X & 255), X will be
/// processed with a bytemask of 255. BitMask is always in the local		/// processed with a bytemask of 255. BitMask is always in the local
/// (OverallLeftShift) coordinate space.		/// (OverallLeftShift) coordinate space.
///		///
		/// It is possible to create an IR pattern that results in exponential execution
		/// time here, by making a tree where every OR refers to one other OR via both
		/// operands:
		///
		/// for (i = 0; i < N; ++i)
		/// b \|= b >> 1; // Causes 2^N executions of CollectBitParts!
		///
		/// To defend against this (and because ORs are the only node that cause us to
		/// fork control flow) we keep a counter alive between all invocations of
		/// CollectBitParts, expected to be initialized to bitwidth(b).
static bool CollectBitParts(Value *V, int OverallLeftShift, APInt BitMask,		static bool CollectBitParts(Value *V, int OverallLeftShift, APInt BitMask,
SmallVectorImpl<Value *> &BitValues,		SmallVectorImpl<Value *> &BitValues,
SmallVectorImpl<int> &BitProvenance,		SmallVectorImpl<int> &BitProvenance,
SmallPtrSetImpl<Value*> &OrValues) {		SmallPtrSetImpl<Value*> &OrValues,
		unsigned &NumOrsRemaining) {
if (Instruction *I = dyn_cast<Instruction>(V)) {		if (Instruction *I = dyn_cast<Instruction>(V)) {
// If this is a bitreverse intrinsic, it can obviously be part of a		// If this is a bitreverse intrinsic, it can obviously be part of a
// bitreverse/bswap.		// bitreverse/bswap.
// FIXME: Do the same for Intrinsic::bswap!		// FIXME: Do the same for Intrinsic::bswap!
if (isa<IntrinsicInst>(I) &&		if (isa<IntrinsicInst>(I) &&
cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::bitreverse) {		cast<IntrinsicInst>(I)->getIntrinsicID() == Intrinsic::bitreverse) {
auto *V = I->getOperand(0);		auto *V = I->getOperand(0);
// We don't have to demand a contiguous range of bits from a bitreverse.		// We don't have to demand a contiguous range of bits from a bitreverse.
Show All 9 Lines	if (isa<IntrinsicInst>(I) &&
if (BitMask[I]) {		if (BitMask[I]) {
BitProvenance[DestBitNo + I] = BW - I - 1;		BitProvenance[DestBitNo + I] = BW - I - 1;
BitValues[DestBitNo + I] = V;		BitValues[DestBitNo + I] = V;
}		}
return false;		return false;
}		}

// If this is an or instruction, it may be an inner node of the bswap.		// If this is an or instruction, it may be an inner node of the bswap.
if (I->getOpcode() == Instruction::Or)		if (I->getOpcode() == Instruction::Or) {
		if (NumOrsRemaining-- == 0)
		return true;
return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,		return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,
BitValues, BitProvenance, OrValues) \|\|		BitValues, BitProvenance, OrValues,
		NumOrsRemaining) \|\|
CollectBitParts(I->getOperand(1), OverallLeftShift, BitMask,		CollectBitParts(I->getOperand(1), OverallLeftShift, BitMask,
BitValues, BitProvenance, OrValues);		BitValues, BitProvenance, OrValues,
		NumOrsRemaining);
		}

// If this is a logical shift by a constant, recurse with OverallLeftShift		// If this is a logical shift by a constant, recurse with OverallLeftShift
// and BitMask adjusted.		// and BitMask adjusted.
if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {		if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
unsigned ShAmt =		unsigned ShAmt =
cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);		cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
// Ensure the shift amount is defined.		// Ensure the shift amount is defined.
if (ShAmt > BitValues.size())		if (ShAmt > BitValues.size())
Show All 11 Lines	if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
}		}

if (OverallLeftShift >= (int)BitValues.size())		if (OverallLeftShift >= (int)BitValues.size())
return true;		return true;
if (OverallLeftShift <= -(int)BitValues.size())		if (OverallLeftShift <= -(int)BitValues.size())
return true;		return true;

return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,		return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,
BitValues, BitProvenance, OrValues);		BitValues, BitProvenance, OrValues,
		NumOrsRemaining);
}		}

// If this is a logical 'and' with a mask that clears bits, clear the		// If this is a logical 'and' with a mask that clears bits, clear the
// corresponding bits in BitMask.		// corresponding bits in BitMask.
if (I->getOpcode() == Instruction::And &&		if (I->getOpcode() == Instruction::And &&
isa<ConstantInt>(I->getOperand(1))) {		isa<ConstantInt>(I->getOperand(1))) {
unsigned NumBits = BitValues.size();		unsigned NumBits = BitValues.size();
APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1);		APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1);
Show All 11 Lines	if (I->getOpcode() == Instruction::And &&
BitMask.clearBit(i);		BitMask.clearBit(i);
continue;		continue;
}		}

// Otherwise, this bit is kept.		// Otherwise, this bit is kept.
}		}

return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,		return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask,
BitValues, BitProvenance, OrValues);		BitValues, BitProvenance, OrValues,
		NumOrsRemaining);
}		}
}		}

// Okay, we got to something that isn't a shift, 'or' or 'and'. This must be		// Okay, we got to something that isn't a shift, 'or' or 'and'. This must be
// the input value to the bswap/bitreverse. To be part of a bswap or		// the input value to the bswap/bitreverse. To be part of a bswap or
// bitreverse we must be demanding a contiguous range of bits from it.		// bitreverse we must be demanding a contiguous range of bits from it.
unsigned InputBitLen = BitMask.countPopulation();		unsigned InputBitLen = BitMask.countPopulation();
unsigned InputBitNo = BitMask.countTrailingZeros();		unsigned InputBitNo = BitMask.countTrailingZeros();
▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines	Instruction *InstCombiner::MatchBSwapOrBitReverse(BinaryOperator &I) {
/// We keep track of which bit (BitProvenance) inside which value (BitValues)		/// We keep track of which bit (BitProvenance) inside which value (BitValues)
/// defines each bit in the result.		/// defines each bit in the result.
SmallVector<Value *, 8> BitValues(BW, nullptr);		SmallVector<Value *, 8> BitValues(BW, nullptr);
SmallVector<int, 8> BitProvenance(BW, -1);		SmallVector<int, 8> BitProvenance(BW, -1);

// Try to find all the pieces corresponding to the bswap.		// Try to find all the pieces corresponding to the bswap.
APInt BitMask = APInt::getAllOnesValue(BitValues.size());		APInt BitMask = APInt::getAllOnesValue(BitValues.size());
SmallPtrSet<Value*, 2> OrValues;		SmallPtrSet<Value*, 2> OrValues;
if (CollectBitParts(&I, 0, BitMask, BitValues, BitProvenance, OrValues))		unsigned NumOrsRemaining = BitMask.getBitWidth();
		if (CollectBitParts(&I, 0, BitMask, BitValues, BitProvenance, OrValues,
		NumOrsRemaining))
		majnemerUnsubmitted Not Done Reply Inline Actions This seems a bit much if you have i128. What is NumOrsRemaining typically when this optimization kicks in? majnemer: This seems a bit much if you have i128. What is NumOrsRemaining typically when this…
return nullptr;		return nullptr;

// Check to see if all of the bits come from the same value.		// Check to see if all of the bits come from the same value.
Value *V = nullptr;		Value *V = nullptr;
for (unsigned I = 0, E = BitValues.size(); I != E; ++I) {		for (unsigned I = 0, E = BitValues.size(); I != E; ++I) {
if (BitValues[I] && !V)		if (BitValues[I] && !V)
V = BitValues[I];		V = BitValues[I];
else if (BitProvenance[I] != -1 && BitValues[I] != V)		else if (BitProvenance[I] != -1 && BitValues[I] != V)
▲ Show 20 Lines • Show All 1,232 Lines • Show Last 20 Lines

test/Transforms/InstCombine/bitreverse-hang.ll

This file was added.

				; RUN: opt < %s -loop-unroll -instcombine -S \| FileCheck %s

				; This test is a worst-case scenario for bitreversal/byteswap detection.
				; After loop unrolling (the unrolled loop is unreadably large so it has been kept
				; rolled here), we have a binary tree of OR operands (as bitreversal detection
				; looks straight through shifts):
				;
				; OR
				; \| \
				; \| LSHR
				; \| /
				; OR
				; \| \
				; \| LSHR
				; \| /
				; OR
				;
				; This results in exponential runtime. The loop here is 32 iterations which will
				; totally hang if we don't deal with this case cleverly.

				@b = common global i32 0, align 4

				; CHECK: define i32 @fn1
				define i32 @fn1() #0 {
				entry:
				%b.promoted = load i32, i32* @b, align 4, !tbaa !2
				br label %for.body

				for.body: ; preds = %for.body, %entry
				%or4 = phi i32 [ %b.promoted, %entry ], [ %or, %for.body ]
				%i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
				%shr = lshr i32 %or4, 1
				%or = or i32 %shr, %or4
				%inc = add nuw nsw i32 %i.03, 1
				%exitcond = icmp eq i32 %inc, 32
				br i1 %exitcond, label %for.end, label %for.body

				for.end: ; preds = %for.body
				store i32 %or, i32* @b, align 4, !tbaa !2
				ret i32 undef
				}

				attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" }

				!llvm.module.flags = !{!0}
				!llvm.ident = !{!1}

				!0 = !{i32 1, !"PIC Level", i32 2}
				!1 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git eb70f4e9cc9a4dc3dd57b032fb858d56b4b64a0e)"}
				!2 = !{!3, !3, i64 0}
				!3 = !{!"int", !4, i64 0}
				!4 = !{!"omnipotent char", !5, i64 0}
				!5 = !{!"Simple C/C++ TBAA"}

This is an archive of the discontinued LLVM Phabricator instance.

[InstCombine] Defend against worst-case exponential execution time
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 43894

lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

test/Transforms/InstCombine/bitreverse-hang.ll

This is an archive of the discontinued LLVM Phabricator instance.

[InstCombine] Defend against worst-case exponential execution timeClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 43894

lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

test/Transforms/InstCombine/bitreverse-hang.ll

[InstCombine] Defend against worst-case exponential execution time
ClosedPublic