Index: include/llvm/Transforms/Scalar/Reassociate.h =================================================================== --- include/llvm/Transforms/Scalar/Reassociate.h +++ include/llvm/Transforms/Scalar/Reassociate.h @@ -72,6 +72,14 @@ DenseMap RankMap; DenseMap, unsigned> ValueRankMap; SetVector> RedoInsts; + + unsigned ReassociateStep; + // Arbitrary, but prevents quadratic behavior. + static const unsigned GlobalReassociateLimit = 10; + static const unsigned NumBinaryOps = + Instruction::BinaryOpsEnd - Instruction::BinaryOpsBegin; + DenseMap, unsigned> PairMap[NumBinaryOps]; + bool MadeChange; public: @@ -105,6 +113,7 @@ SetVector> &Insts); void OptimizeInst(Instruction *I); Instruction *canonicalizeNegConstExpr(Instruction *I); + void BuildPairMap(ReversePostOrderTraversal &RPOT); }; } // end namespace llvm Index: lib/Transforms/Scalar/Reassociate.cpp =================================================================== --- lib/Transforms/Scalar/Reassociate.cpp +++ lib/Transforms/Scalar/Reassociate.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/GlobalsModRef.h" @@ -2187,11 +2188,99 @@ return; } + if (ReassociateStep == 1 && Ops.size() > 2 && + Ops.size() <= GlobalReassociateLimit) { + unsigned Max = 1; + unsigned BestRank = 0; + std::pair BestPair; + unsigned Idx = I->getOpcode() - Instruction::BinaryOpsBegin; + for (unsigned i = 0; i < Ops.size() - 1; ++i) + for (unsigned j = i + 1; j < Ops.size(); ++j) { + unsigned Score = 0; + Value *Op0 = Ops[i].Op; + Value *Op1 = Ops[j].Op; + if (std::less()(Op1, Op0)) + std::swap(Op0, Op1); + auto it = PairMap[Idx].find({Op0, Op1}); + if (it != PairMap[Idx].end()) + Score += it->second; + + unsigned MaxRank = std::max(Ops[i].Rank, Ops[j].Rank); + if (Score > Max || (Score == Max && MaxRank < BestRank)) { + BestPair = {i, j}; + Max = Score; + BestRank = MaxRank; + } + } + if (Max > 1) { + auto Op0 = Ops[BestPair.first]; + auto Op1 = Ops[BestPair.second]; + Ops.erase(&Ops[BestPair.second]); + Ops.erase(&Ops[BestPair.first]); + Ops.push_back(Op0); + Ops.push_back(Op1); + } + } // Now that we ordered and optimized the expressions, splat them back into // the expression tree, removing any unneeded nodes. RewriteExprTree(I, Ops); } +void +ReassociatePass::BuildPairMap(ReversePostOrderTraversal &RPOT) { + // Make a "pairmap" of how often each operand pair occurs. + for (BasicBlock *BI : RPOT) { + for (Instruction &I : *BI) { + if (!I.isAssociative()) + continue; + + // Ignore nodes that aren't at the root of trees. + if (I.hasOneUse() && I.user_back()->getOpcode() == I.getOpcode()) + continue; + + // Collect all operands in a single reassociable expression. + // Since Reassociate has already been run once, we can assume things + // are already canonical according to Reassociation's regime. + SmallVector Worklist = { I.getOperand(0), I.getOperand(1) }; + SmallVector Ops; + while (!Worklist.empty() && Ops.size() <= GlobalReassociateLimit) { + Value *Op = Worklist.pop_back_val(); + Instruction *OpI = dyn_cast(Op); + if (!OpI || OpI->getOpcode() != I.getOpcode() || !OpI->hasOneUse()) { + Ops.push_back(Op); + continue; + } + // be paranoid about self-referencing expressions in unreachable code + if (OpI->getOperand(0) != OpI) + Worklist.push_back(OpI->getOperand(0)); + if (OpI->getOperand(1) != OpI) + Worklist.push_back(OpI->getOperand(1)); + } + // Skip extremely long expressions. + if (Ops.size() > GlobalReassociateLimit) + continue; + + // Add all pairwise combinations of operands to the pair map. + unsigned BinaryIdx = I.getOpcode() - Instruction::BinaryOpsBegin; + SmallSet, 32> Visited; + for (unsigned i = 0; i < Ops.size() - 1; ++i) { + for (unsigned j = i + 1; j < Ops.size(); ++j) { + // Canonicalize operand orderings. + Value *Op0 = Ops[i]; + Value *Op1 = Ops[j]; + if (std::less()(Op1, Op0)) + std::swap(Op0, Op1); + if (!Visited.insert({Op0, Op1}).second) + continue; + auto res = PairMap[BinaryIdx].insert({{Op0, Op1}, 1}); + if (!res.second) + ++res.first->second; + } + } + } + } +} + PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) { // Get the functions basic blocks in Reverse Post Order. This order is used by // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic @@ -2204,43 +2293,49 @@ MadeChange = false; // Traverse the same blocks that was analysed by BuildRankMap. - for (BasicBlock *BI : RPOT) { - assert(RankMap.count(&*BI) && "BB should be ranked."); - // Optimize every instruction in the basic block. - for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;) - if (isInstructionTriviallyDead(&*II)) { - EraseInst(&*II++); - } else { - OptimizeInst(&*II); - assert(II->getParent() == &*BI && "Moved to a different block!"); - ++II; + for (ReassociateStep = 0; ReassociateStep < 2; ++ReassociateStep) { + for (BasicBlock *BI : RPOT) { + assert(RankMap.count(&*BI) && "BB should be ranked."); + // Optimize every instruction in the basic block. + for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;) + if (isInstructionTriviallyDead(&*II)) { + EraseInst(&*II++); + } else { + OptimizeInst(&*II); + assert(II->getParent() == &*BI && "Moved to a different block!"); + ++II; + } + + // Make a copy of all the instructions to be redone so we can remove dead + // instructions. + SetVector> ToRedo(RedoInsts); + // Iterate over all instructions to be reevaluated and remove trivially dead + // instructions. If any operand of the trivially dead instruction becomes + // dead mark it for deletion as well. Continue this process until all + // trivially dead instructions have been removed. + while (!ToRedo.empty()) { + Instruction *I = ToRedo.pop_back_val(); + if (isInstructionTriviallyDead(I)) { + RecursivelyEraseDeadInsts(I, ToRedo); + MadeChange = true; + } } - // Make a copy of all the instructions to be redone so we can remove dead - // instructions. - SetVector> ToRedo(RedoInsts); - // Iterate over all instructions to be reevaluated and remove trivially dead - // instructions. If any operand of the trivially dead instruction becomes - // dead mark it for deletion as well. Continue this process until all - // trivially dead instructions have been removed. - while (!ToRedo.empty()) { - Instruction *I = ToRedo.pop_back_val(); - if (isInstructionTriviallyDead(I)) { - RecursivelyEraseDeadInsts(I, ToRedo); - MadeChange = true; + // Now that we have removed dead instructions, we can reoptimize the + // remaining instructions. + while (!RedoInsts.empty()) { + Instruction *I = RedoInsts.pop_back_val(); + if (isInstructionTriviallyDead(I)) + EraseInst(I); + else + OptimizeInst(I); } } - - // Now that we have removed dead instructions, we can reoptimize the - // remaining instructions. - while (!RedoInsts.empty()) { - Instruction *I = RedoInsts.pop_back_val(); - if (isInstructionTriviallyDead(I)) - EraseInst(I); - else - OptimizeInst(I); - } + if (ReassociateStep == 0) + BuildPairMap(RPOT); } + for (auto &Entry : PairMap) + Entry.clear(); // We are done with the rank map. RankMap.clear(); Index: test/Transforms/Reassociate/basictest.ll =================================================================== --- test/Transforms/Reassociate/basictest.ll +++ test/Transforms/Reassociate/basictest.ll @@ -242,3 +242,18 @@ if.end: ; preds = %entry ret i64 0 } + +; CHECK-LABEL: @test17 +; CHECK: %[[A:.*]] = mul i32 %X4, %X3 +; CHECK-NEXT: %[[C:.*]] = mul i32 %[[A]], %X1 +; CHECK-NEXT: %[[D:.*]] = mul i32 %[[A]], %X2 +; CHECK-NEXT: %[[E:.*]] = xor i32 %[[C]], %[[D]] +; CHECK-NEXT: ret i32 %[[E]] +define i32 @test17(i32 %X1, i32 %X2, i32 %X3, i32 %X4) { + %A = mul i32 %X3, %X1 + %B = mul i32 %X3, %X2 + %C = mul i32 %A, %X4 + %D = mul i32 %B, %X4 + %E = xor i32 %C, %D + ret i32 %E +} Index: test/Transforms/Reassociate/canonicalize-neg-const.ll =================================================================== --- test/Transforms/Reassociate/canonicalize-neg-const.ll +++ test/Transforms/Reassociate/canonicalize-neg-const.ll @@ -169,10 +169,11 @@ define double @pr34078(double %A) { ; CHECK-LABEL: @pr34078( -; CHECK-NEXT: [[SUB:%.*]] = fsub fast double 1.000000e+00, %A +; CHECK-NEXT: [[A_NEG:%.*]] = fsub fast double -0.000000e+00, %A +; CHECK-NEXT: [[SUB:%.*]] = fadd fast double [[A_NEG]], 1.000000e+00 ; CHECK-NEXT: [[POW2:%.*]] = fmul double %A, %A ; CHECK-NEXT: [[MUL5_NEG:%.*]] = fmul fast double [[POW2]], -5.000000e-01 -; CHECK-NEXT: [[SUB1:%.*]] = fadd fast double [[MUL5_NEG]], [[SUB]] +; CHECK-NEXT: [[SUB1:%.*]] = fadd fast double [[SUB]], [[MUL5_NEG]] ; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast double [[SUB1]], 2.000000e+00 ; CHECK-NEXT: ret double [[FACTOR]] ; Index: test/Transforms/Reassociate/fast-ReassociateVector.ll =================================================================== --- test/Transforms/Reassociate/fast-ReassociateVector.ll +++ test/Transforms/Reassociate/fast-ReassociateVector.ll @@ -221,9 +221,8 @@ define <2 x double> @test9(<2 x double> %b, <2 x double> %a) { ; CHECK-LABEL: @test9( -; CHECK-NEXT: [[TMP1:%.*]] = fsub fast <2 x double> zeroinitializer, [[A:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <2 x double> [[B:%.*]], -; CHECK-NEXT: ret <2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = fadd fast <2 x double> [[B:%.*]], +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = fadd fast <2 x double> %a, %2 = fadd fast <2 x double> %b, %1 @@ -253,7 +252,6 @@ define <2 x float> @test10(<2 x float> %a, <2 x float> %b, <2 x float> %z) { ; CHECK-LABEL: @test10( -; CHECK-NEXT: [[TMP1:%.*]] = fsub fast <2 x float> zeroinitializer, zeroinitializer ; CHECK-NEXT: [[E:%.*]] = fmul fast <2 x float> [[A:%.*]], ; CHECK-NEXT: [[F:%.*]] = fmul fast <2 x float> [[E]], [[Z:%.*]] ; CHECK-NEXT: ret <2 x float> [[F]] Index: test/Transforms/Reassociate/fast-basictest.ll =================================================================== --- test/Transforms/Reassociate/fast-basictest.ll +++ test/Transforms/Reassociate/fast-basictest.ll @@ -176,7 +176,7 @@ define float @test8(float %X, float %Y, float %Z) { ; CHECK-LABEL: @test8( -; CHECK-NEXT: [[A:%.*]] = fmul fast float %Y, %X +; CHECK-NEXT: [[A:%.*]] = fmul fast float %X, %Y ; CHECK-NEXT: [[C:%.*]] = fsub fast float %Z, [[A]] ; CHECK-NEXT: ret float [[C]] ; @@ -267,8 +267,8 @@ define float @test12(float %X) { ; CHECK-LABEL: @test12( -; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast float %X, -3.000000e+00 -; CHECK-NEXT: [[Z:%.*]] = fadd fast float [[FACTOR]], 6.000000e+00 +; CHECK-NEXT: [[FACTOR:%.*]] = fmul fast float %X, 3.000000e+00 +; CHECK-NEXT: [[Z:%.*]] = fsub fast float 6.000000e+00, [[FACTOR]] ; CHECK-NEXT: ret float [[Z]] ; %A = fsub fast float 1.000000e+00, %X Index: test/Transforms/Reassociate/mulfactor.ll =================================================================== --- test/Transforms/Reassociate/mulfactor.ll +++ test/Transforms/Reassociate/mulfactor.ll @@ -86,8 +86,8 @@ ; (x^5) * (y^3) * z define i32 @test6(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: @test6( -; CHECK-NEXT: [[TMP1:%.*]] = mul i32 %x, %x -; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], %y +; CHECK-NEXT: [[TMP1:%.*]] = mul i32 %y, %x +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], %x ; CHECK-NEXT: [[F:%.*]] = mul i32 %y, %x ; CHECK-NEXT: [[G:%.*]] = mul i32 [[F]], [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[G]], [[TMP2]] Index: test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll =================================================================== --- test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll +++ test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll @@ -7,7 +7,6 @@ ; CHECK-NEXT: [[T2_NEG:%.*]] = fmul fast half %a, 0xH4500 ; CHECK-NEXT: [[REASS_MUL:%.*]] = fmul fast half %b, 0xH4500 ; CHECK-NEXT: [[T51:%.*]] = fsub fast half [[REASS_MUL]], [[T2_NEG]] -; CHECK-NEXT: [[T5:%.*]] = fadd fast half [[REASS_MUL]], [[T2_NEG]] ; CHECK-NEXT: ret half [[T51]] ; %t1 = fmul fast half %b, 0xH4200 ; 3*b