Index: llvm/trunk/include/llvm/Transforms/Scalar/Reassociate.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar/Reassociate.h +++ llvm/trunk/include/llvm/Transforms/Scalar/Reassociate.h @@ -29,6 +29,7 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" +#include namespace llvm { @@ -69,9 +70,14 @@ /// Reassociate commutative expressions. class ReassociatePass : public PassInfoMixin { +public: + using OrderedSet = + SetVector, std::deque>>; + +protected: DenseMap RankMap; DenseMap, unsigned> ValueRankMap; - SetVector> RedoInsts; + OrderedSet RedoInsts; // Arbitrary, but prevents quadratic behavior. static const unsigned GlobalReassociateLimit = 10; @@ -108,8 +114,7 @@ SmallVectorImpl &Ops); Value *RemoveFactorFromExpression(Value *V, Value *Factor); void EraseInst(Instruction *I); - void RecursivelyEraseDeadInsts(Instruction *I, - SetVector> &Insts); + void RecursivelyEraseDeadInsts(Instruction *I, OrderedSet &Insts); void OptimizeInst(Instruction *I); Instruction *canonicalizeNegConstExpr(Instruction *I); void BuildPairMap(ReversePostOrderTraversal &RPOT); Index: llvm/trunk/lib/Transforms/Scalar/Reassociate.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/Reassociate.cpp +++ llvm/trunk/lib/Transforms/Scalar/Reassociate.cpp @@ -810,7 +810,7 @@ /// pushing the negates through adds. These will be revisited to see if /// additional opportunities have been exposed. static Value *NegateValue(Value *V, Instruction *BI, - SetVector> &ToRedo) { + ReassociatePass::OrderedSet &ToRedo) { if (auto *C = dyn_cast(V)) return C->getType()->isFPOrFPVectorTy() ? ConstantExpr::getFNeg(C) : ConstantExpr::getNeg(C); @@ -924,8 +924,8 @@ /// If we have (X-Y), and if either X is an add, or if this is only used by an /// add, transform this into (X+(0-Y)) to promote better reassociation. -static BinaryOperator * -BreakUpSubtract(Instruction *Sub, SetVector> &ToRedo) { +static BinaryOperator *BreakUpSubtract(Instruction *Sub, + ReassociatePass::OrderedSet &ToRedo) { // Convert a subtract into an add and a neg instruction. This allows sub // instructions to be commuted with other add instructions. // @@ -1871,8 +1871,8 @@ // Remove dead instructions and if any operands are trivially dead add them to // Insts so they will be removed as well. -void ReassociatePass::RecursivelyEraseDeadInsts( - Instruction *I, SetVector> &Insts) { +void ReassociatePass::RecursivelyEraseDeadInsts(Instruction *I, + OrderedSet &Insts) { assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!"); SmallVector Ops(I->op_begin(), I->op_end()); ValueRankMap.erase(I); @@ -2333,7 +2333,7 @@ // Make a copy of all the instructions to be redone so we can remove dead // instructions. - SetVector> ToRedo(RedoInsts); + OrderedSet ToRedo(RedoInsts); // Iterate over all instructions to be reevaluated and remove trivially dead // instructions. If any operand of the trivially dead instruction becomes // dead mark it for deletion as well. Continue this process until all @@ -2349,7 +2349,8 @@ // Now that we have removed dead instructions, we can reoptimize the // remaining instructions. while (!RedoInsts.empty()) { - Instruction *I = RedoInsts.pop_back_val(); + Instruction *I = RedoInsts.front(); + RedoInsts.erase(RedoInsts.begin()); if (isInstructionTriviallyDead(I)) EraseInst(I); else Index: llvm/trunk/test/Transforms/Reassociate/long-chains.ll =================================================================== --- llvm/trunk/test/Transforms/Reassociate/long-chains.ll +++ llvm/trunk/test/Transforms/Reassociate/long-chains.ll @@ -0,0 +1,37 @@ +; RUN: opt < %s -reassociate -stats -S 2>&1 | FileCheck %s +; REQUIRES: asserts + +define i8 @longchain(i8 %in1, i8 %in2, i8 %in3, i8 %in4, i8 %in5, i8 %in6, i8 %in7, i8 %in8, i8 %in9, i8 %in10, i8 %in11, i8 %in12, i8 %in13, i8 %in14, i8 %in15, i8 %in16, i8 %in17, i8 %in18, i8 %in19, i8 %in20) { + %tmp1 = add i8 %in1, %in2 + %tmp2 = add i8 %tmp1, %in3 + %tmp3 = add i8 %tmp2, %in4 + %tmp4 = add i8 %tmp3, %in3 + %tmp5 = add i8 %tmp4, %in4 + %tmp6 = add i8 %tmp5, %in5 + %tmp7 = add i8 %tmp6, %in6 + %tmp8 = add i8 %tmp7, %in7 + %tmp9 = add i8 %tmp8, %in8 + %tmp10 = add i8 %tmp9, %in9 + %tmp11 = add i8 %tmp10, %in10 + %tmp12 = add i8 %tmp11, %in11 + %tmp13 = add i8 %tmp12, %in12 + %tmp14 = add i8 %tmp13, %in13 + %tmp15 = add i8 %tmp14, %in14 + %tmp16 = add i8 %tmp15, %in15 + %tmp17 = add i8 %tmp16, %in16 + %tmp18 = add i8 %tmp17, %in17 + %tmp19 = add i8 %tmp18, %in18 + %tmp20 = add i8 %tmp19, %in19 + %tmp21 = add i8 %tmp20, %in20 + ret i8 %tmp20 +} + +; Check the number of instructions reassociated is in the tens not the hundreds. +; At the time of writing, the exact numbers were: +; Bad order: 220 reassociate - Number of insts reassociated +; Good order: 55 reassociate - Number of insts reassociated +; +; CHECK: {{^[1-9][0-9]}} reassociate - Number of insts reassociated + +; Additionally check that we made at least three changes. +; CHECK: {{^ *[3-9]}} reassociate - Number of multiplies factored