Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1593,10 +1593,21 @@ /// processed with a bytemask of 255. BitMask is always in the local /// (OverallLeftShift) coordinate space. /// +/// It is possible to create an IR pattern that results in exponential execution +/// time here, by making a tree where every OR refers to one other OR via *both* +/// operands: +/// +/// for (i = 0; i < N; ++i) +/// b |= b >> 1; // Causes 2^N executions of CollectBitParts! +/// +/// To defend against this (and because ORs are the only node that cause us to +/// fork control flow) we keep a counter alive between all invocations of +/// CollectBitParts, expected to be initialized to bitwidth(b). static bool CollectBitParts(Value *V, int OverallLeftShift, APInt BitMask, SmallVectorImpl &BitValues, SmallVectorImpl &BitProvenance, - SmallPtrSetImpl &OrValues) { + SmallPtrSetImpl &OrValues, + unsigned &NumOrsRemaining) { if (Instruction *I = dyn_cast(V)) { // If this is a bitreverse intrinsic, it can obviously be part of a // bitreverse/bswap. @@ -1622,11 +1633,16 @@ } // If this is an or instruction, it may be an inner node of the bswap. - if (I->getOpcode() == Instruction::Or) + if (I->getOpcode() == Instruction::Or) { + if (NumOrsRemaining-- == 0) + return true; return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask, - BitValues, BitProvenance, OrValues) || + BitValues, BitProvenance, OrValues, + NumOrsRemaining) || CollectBitParts(I->getOperand(1), OverallLeftShift, BitMask, - BitValues, BitProvenance, OrValues); + BitValues, BitProvenance, OrValues, + NumOrsRemaining); + } // If this is a logical shift by a constant, recurse with OverallLeftShift // and BitMask adjusted. @@ -1654,7 +1670,8 @@ return true; return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask, - BitValues, BitProvenance, OrValues); + BitValues, BitProvenance, OrValues, + NumOrsRemaining); } // If this is a logical 'and' with a mask that clears bits, clear the @@ -1682,7 +1699,8 @@ } return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask, - BitValues, BitProvenance, OrValues); + BitValues, BitProvenance, OrValues, + NumOrsRemaining); } } @@ -1754,7 +1772,9 @@ // Try to find all the pieces corresponding to the bswap. APInt BitMask = APInt::getAllOnesValue(BitValues.size()); SmallPtrSet OrValues; - if (CollectBitParts(&I, 0, BitMask, BitValues, BitProvenance, OrValues)) + unsigned NumOrsRemaining = BitMask.getBitWidth(); + if (CollectBitParts(&I, 0, BitMask, BitValues, BitProvenance, OrValues, + NumOrsRemaining)) return nullptr; // Check to see if all of the bits come from the same value. Index: test/Transforms/InstCombine/bitreverse-hang.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/bitreverse-hang.ll @@ -0,0 +1,53 @@ +; RUN: opt < %s -loop-unroll -instcombine -S | FileCheck %s + +; This test is a worst-case scenario for bitreversal/byteswap detection. +; After loop unrolling (the unrolled loop is unreadably large so it has been kept +; rolled here), we have a binary tree of OR operands (as bitreversal detection +; looks straight through shifts): +; +; OR +; | \ +; | LSHR +; | / +; OR +; | \ +; | LSHR +; | / +; OR +; +; This results in exponential runtime. The loop here is 32 iterations which will +; totally hang if we don't deal with this case cleverly. + +@b = common global i32 0, align 4 + +; CHECK: define i32 @fn1 +define i32 @fn1() #0 { +entry: + %b.promoted = load i32, i32* @b, align 4, !tbaa !2 + br label %for.body + +for.body: ; preds = %for.body, %entry + %or4 = phi i32 [ %b.promoted, %entry ], [ %or, %for.body ] + %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %shr = lshr i32 %or4, 1 + %or = or i32 %shr, %or4 + %inc = add nuw nsw i32 %i.03, 1 + %exitcond = icmp eq i32 %inc, 32 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + store i32 %or, i32* @b, align 4, !tbaa !2 + ret i32 undef +} + +attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"PIC Level", i32 2} +!1 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git eb70f4e9cc9a4dc3dd57b032fb858d56b4b64a0e)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"int", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"}