Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -376,6 +376,7 @@ void initializeVerifierLegacyPassPass(PassRegistry&); void initializeVirtRegMapPass(PassRegistry&); void initializeVirtRegRewriterPass(PassRegistry&); +void initializeWeakReassociatePassPass(PassRegistry&); void initializeWholeProgramDevirtPass(PassRegistry&); void initializeWinEHPreparePass(PassRegistry&); void initializeWriteBitcodePassPass(PassRegistry&); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -254,6 +254,19 @@ //===----------------------------------------------------------------------===// // +// WeakReassociate - This pass reassociates commutative expressions in an order +// that is designed to promote better instruction combining. +// +// For example: (x << 6 | y >> 1) | (z << 6 | w >> 1) -> +// (x << 6 | z << 6) | (y >> 1 | w >> 1) +// +// This pass will not change the topography of the tree. That means, amongst the +// rest, that this pass never creates new instructions. +// +FunctionPass *createWeakReassociatePass(); + +//===----------------------------------------------------------------------===// +// // JumpThreading - Thread control through mult-pred/multi-succ blocks where some // preds always go to some succ. Thresholds other than minus one override the // internal BB duplication default threshold. Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -385,6 +385,7 @@ MPM.add(createCFGSimplificationPass()); // Merge & remove BBs // Clean up after everything. addInstructionCombiningPass(MPM); + MPM.add(createWeakReassociatePass()); // Reassociate expressions addExtensionsToPM(EP_Peephole, MPM); } Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -66,6 +66,7 @@ StraightLineStrengthReduce.cpp StructurizeCFG.cpp TailRecursionElimination.cpp + WeakReassociate.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -102,6 +102,7 @@ initializeLoopVersioningPassPass(Registry); initializeEntryExitInstrumenterPass(Registry); initializePostInlineEntryExitInstrumenterPass(Registry); + initializeWeakReassociatePassPass(Registry); } void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) { Index: lib/Transforms/Scalar/WeakReassociate.cpp =================================================================== --- /dev/null +++ lib/Transforms/Scalar/WeakReassociate.cpp @@ -0,0 +1,378 @@ +//===-------- WeakReassociate.cpp - Reassociate binary expressions --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass reassociates commutative expressions in an order that is designed +// to promote better instruction combining. +// +// For example: (x << 6 | y >> 1) | (z << 6 | w >> 1) -> +// (x << 6 | z << 6) | (y >> 1 | w >> 1) +// +// In the implementation of this algorithm, all leafs are divided into leaf +// classes according to their opcodes and constants (if applicable), and iff +// there are exactly two leaf classes ("Blue" and "Red"), the tree will be +// reassociated such that one side will contain only Blue leaves and the other +// will contain only Red leaves. +// This pass will not change the topography of the tree. That means, amongst the +// rest, that this pass never creates new instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +#include +using namespace llvm; + +#define DEBUG_TYPE "weak-reassociate" + +namespace { +/// Reassociate commutative expressions. +class WeakReassociatePass : public FunctionPass { + /// Tries to optimizae an instruction by re-associating its operands. + /// This function will succeed only if: + /// 1. The instruction's type is not i1. + /// 2. The instruction is associative. + /// 3. The instruction is the root of its opcode tree. + /// If all of the above hold, a call to WeakReassociatePass::RewriteExprTree + /// will be made. + /// + /// \param I The instruction to optimize + /// + /// \returns true iff successfuly optimized \p I + bool OptimizeInst(Instruction *I); + /// Rewrite an expression tree for an associative instructions tree root + /// instruction. + /// This function tried to partition the leaves of this tree into two sets, a + /// blue set and a red set, according to their opcodes and used constants. If + /// it succeeds it will re-arrange the tree such that all of the blue operands + /// will be in one side of the tree, and all of the red operands will be in + /// the other side of the tree. + /// + /// \param I An associative instructions tree root instruction. + /// + /// \returns true iff rewriting of the expression tree succeeded. + bool RewriteExprTree(BinaryOperator *I); + /// Given an associative instructions tree root instruction, this function + /// will return all the nodes of the tree. It may fail if the depth of the + /// tree is too big or there are too many leaves. + /// + /// \param I An associative instructions tree root instruction. + /// \param[out] NonLeaves All of the inner nodes of the tree. + /// \param[out] Leaves All of the leaves of the tree. + /// + /// \return true iff successfully got all of the nodes of the tree. + bool getTreeNodes(BinaryOperator *I, + SmallVector, 8> &NonLeaves, + SmallVectorImpl &Leaves); + +public: + static char ID; // Pass identification, replacement for typeid + WeakReassociatePass() : FunctionPass(ID) { + initializeWeakReassociatePassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addPreserved(); + } +}; +} // namespace + +/// An associative instructions tree leaf class. A class of a leaf will +/// represent its new position in the tree after the optimization. +struct LeafClass { + Instruction::BinaryOps Opcode; + ConstantInt *Op1; +}; + +inline bool operator<(const LeafClass &LHS, const LeafClass &RHS) { + if (LHS.Opcode < RHS.Opcode) + return true; + else if (LHS.Opcode > RHS.Opcode) + return false; + return (LHS.Op1 < RHS.Op1); +} + +/// Given a leaf, this function tries to return its appropriate LeafClass. +/// +/// \param V An associative instructions tree leaf. +/// \param[out] OpClass \p V 's LeafClass. +/// +/// \return true iff successfuly found \p V 's LeafClass. +static bool getLeafClass(Value *V, LeafClass &OpClass) { + if (!V) + return false; + BinaryOperator *BO = dyn_cast(V); + if (!BO) + return false; + ConstantInt *Op1 = dyn_cast(BO->getOperand(1)); + if (!Op1) + return false; + OpClass.Opcode = BO->getOpcode(); + OpClass.Op1 = Op1; + return true; +} + +/// Computes and returns the number of non-leaves (inner nodes) required in an +/// associative instructions tree in order to support a given number of leaves. +/// +/// \param LeafNo The required number of leaves. +/// \param[out] DepthToNodesNum The number of required non-leaves (inner nodes) +/// by depth. i.e. DepthToNodesNum[i] will hold the number of required +/// non-leaves (inner nodes) at depth i. +static void +nonLeafNodesNeededForTree(unsigned LeafNo, + SmallVector &DepthToNodesNum) { + unsigned TotalRemainingNodes = LeafNo - 1; + for (unsigned i = 0; TotalRemainingNodes > 0; ++i) { + unsigned MaxNodesForDepth = 1 << i; + if (TotalRemainingNodes <= MaxNodesForDepth) + DepthToNodesNum.push_back(TotalRemainingNodes); + else + DepthToNodesNum.push_back(MaxNodesForDepth); + TotalRemainingNodes -= DepthToNodesNum[i]; + } +} + +/// Updates an associative instructions tree with new operands as leaves. +/// +/// \param TreeNonLeaves The non-leaves (inner nodes) of the associative +/// instructions tree. TreeNonLeaves[i] will hold the non-leaves (inner nodes) +/// at depth i. +/// \param Leaves The new leaves for the tree. +static void updateTree( + const SmallVector, 8> &TreeNonLeaves, + const SmallVector &Leaves) { + unsigned i = 0; // iterator over the leaves + for (unsigned Depth = 0; Depth < TreeNonLeaves.size(); ++Depth) { + const SmallVector &CurrDepthNodes = + TreeNonLeaves[Depth]; + const SmallVector *NextDepthNodes = + Depth + 1 < TreeNonLeaves.size() ? &TreeNonLeaves[Depth + 1] : nullptr; + unsigned j = 0; // iterator over NextDepthNodes + for (BinaryOperator *CurrDepthNode : CurrDepthNodes) { + for (unsigned k = 0; k < 2; k++) { + Value *NewOperand = nullptr; + if (NextDepthNodes != nullptr && j < NextDepthNodes->size()) { + // First set all the nodes from the next depth, leave the leaves for + // the end + NewOperand = (*NextDepthNodes)[j]; + ++j; + } else { + assert(i < Leaves.size()); + NewOperand = Leaves[i]; + ++i; + } + CurrDepthNode->setOperand(k, NewOperand); + } + } + } + assert(i == Leaves.size() && "Not all the leaves were inserted to the tree"); +} + +bool WeakReassociatePass::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + bool MadeChange = false; + for (Instruction &I : instructions(F)) { + if (!isInstructionTriviallyDead(&I)) + MadeChange |= OptimizeInst(&I); + } + return MadeChange; +} + +bool WeakReassociatePass::OptimizeInst(Instruction *I) { + if (I->getType()->isIntegerTy(1)) + return false; + if (!I->isAssociative()) + return false; + BinaryOperator *BO = cast(I); + + // If this is an interior node of a reassociable tree, ignore it until we + // get to the root of the tree, to avoid N^2 analysis. + unsigned Opcode = BO->getOpcode(); + if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) + return false; + + SmallVector Ops; + std::stack S; + S.push(BO); + while (!S.empty()) { + BinaryOperator *CurrBO = S.top(); + S.pop(); + for (unsigned i = 0; i < 2; ++i) { + Value *Operand = CurrBO->getOperand(i); + BinaryOperator *OperandBO = dyn_cast(Operand); + if (!OperandBO || !OperandBO->hasOneUse() || + OperandBO->getOpcode() != Opcode) { + // This is a leaf + Ops.push_back(Operand); + if (Ops.size() >= 10) + return false; + } else + // not a leaf, needs to examine it too. + S.push(OperandBO); + } + } + if (Ops.size() <= 2) + return false; + return RewriteExprTree(BO); +} + +bool WeakReassociatePass::getTreeNodes( + BinaryOperator *I, + SmallVector, 8> &NonLeaves, + SmallVectorImpl &Leaves) { + unsigned Opcode = I->getOpcode(); + BasicBlock *BB = I->getParent(); + NonLeaves.push_back(SmallVector{I}); + for (unsigned Depth = 1;; ++Depth) { + if (Depth >= 4) + return false; + const SmallVector &PrevDepthNodes = + NonLeaves[Depth - 1]; + SmallVector *CurrDepthNodes = nullptr; + // The operands of PrevDepthNodes are either CurrDepthNodes or leaves. + for (BinaryOperator *PrevDepthNode : PrevDepthNodes) { + for (unsigned i = 0; i < 2; ++i) { + Value *Operand = PrevDepthNode->getOperand(i); + BinaryOperator *OperandBO = dyn_cast(Operand); + if (!OperandBO || !OperandBO->hasOneUse() || + OperandBO->getOpcode() != Opcode) { + // If the operand is not a BinaryOperator with the same opcode of the + // root and one use then it is a leaf. + Leaves.push_back(Operand); + if (Leaves.size() >= 10) + return false; + } else { + if (OperandBO->getParent() != BB) + // All non-leaves (inner nodes) must belong to the same basic block. + return false; + if (CurrDepthNodes == nullptr) { + NonLeaves.push_back(SmallVector()); + CurrDepthNodes = &NonLeaves[Depth]; + } + CurrDepthNodes->push_back(OperandBO); + } + } + } + if (CurrDepthNodes == nullptr) + // No new inner nodes, this was the last level of the tree. + break; + } + return true; +} + +bool WeakReassociatePass::RewriteExprTree(BinaryOperator *I) { + SmallVector, 8> NonLeaves; + SmallVector Leaves; + if (!getTreeNodes(I, NonLeaves, Leaves)) + return false; + + std::map> LeavesSets; + // Dividing the leaves into sets according to their LeafClass. + for (unsigned i = 0, e = Leaves.size(); i != e; ++i) { + LeafClass OpClass; + if (!getLeafClass(Leaves[i], OpClass)) + return false; + auto LeavesSetLocation = LeavesSets.find(OpClass); + if (LeavesSetLocation == LeavesSets.end()) { + if (LeavesSets.size() == 2) + return false; + std::tie(LeavesSetLocation, std::ignore) = + LeavesSets.insert(std::make_pair(OpClass, SmallVector())); + } + LeavesSetLocation->second.push_back(Leaves[i]); + } + // Must have exactly two sets, each with at least two elements. + if (std::find_if( + LeavesSets.begin(), LeavesSets.end(), + [](const std::pair> &KeyValPair) + -> bool { return KeyValPair.second.size() == 1; }) != + LeavesSets.end()) + return false; + if (LeavesSets.size() != 2) + return false; + const SmallVector &BlueLeaves = LeavesSets.begin()->second; + const SmallVector &RedLeaves = (++LeavesSets.begin())->second; + + SmallVector BlueDepthToNodesNum, RedDepthToNodesNum; + nonLeafNodesNeededForTree(BlueLeaves.size(), BlueDepthToNodesNum); + assert(BlueDepthToNodesNum.size() <= NonLeaves.size()); + nonLeafNodesNeededForTree(RedLeaves.size(), RedDepthToNodesNum); + assert(RedDepthToNodesNum.size() <= NonLeaves.size()); + + // NonLeaves[i] will hold the none leaves (inner nodes) at depth i of + // the tree. + SmallVector, 8> BlueNonLeaves, RedNonLeaves; + for (unsigned Depth = 1; Depth < NonLeaves.size(); ++Depth) { + // The two trees, being the immediate children of the tree, have + // depth of one less (e.g. their root is at depth 1 of the big tree). + unsigned ChildDepth = Depth - 1; + unsigned CurrDepthBlueNodesNum = ChildDepth < BlueDepthToNodesNum.size() + ? BlueDepthToNodesNum[ChildDepth] + : 0; + unsigned CurrDepthRedNodesNum = ChildDepth < RedDepthToNodesNum.size() + ? RedDepthToNodesNum[ChildDepth] + : 0; + SmallVector &CurrDepthNonLeaves = NonLeaves[Depth]; + if (CurrDepthBlueNodesNum + CurrDepthRedNodesNum > + CurrDepthNonLeaves.size()) + // If not enough nodes, stop with a failure. Note that this pass is not + // allowed to create new instructions. + return false; + + SmallVector CurrDepthBlueNodes, CurrDepthRedNodes; + // Splitting CurrDepthNonLeaves between CurrDepthBlueNodes and + // CurrDepthRedNodes. + unsigned i = 0; + for (unsigned j = 0; j < CurrDepthBlueNodesNum; ++j) { + CurrDepthBlueNodes.push_back(CurrDepthNonLeaves[i]); + ++i; + } + if (!CurrDepthBlueNodes.empty()) + BlueNonLeaves.push_back(CurrDepthBlueNodes); + for (unsigned j = 0; j < CurrDepthRedNodesNum; ++j) { + CurrDepthRedNodes.push_back(CurrDepthNonLeaves[i]); + ++i; + } + if (!CurrDepthRedNodes.empty()) + RedNonLeaves.push_back(CurrDepthRedNodes); + } + + // Rearranging the inner nodes such that nodes of depth i will necessarily + // appear before nodes of depth i+1. + BinaryOperator *Top = I; + for (unsigned Depth = 1; Depth < NonLeaves.size(); ++Depth) { + SmallVector &DepthNodes = NonLeaves[Depth]; + for (BinaryOperator *DepthNode : DepthNodes) { + DepthNode->moveBefore(Top); + Top = DepthNode; + } + } + + // Updating the two sub trees with the new leaves. + updateTree(BlueNonLeaves, BlueLeaves); + updateTree(RedNonLeaves, RedLeaves); + return true; +} + +char WeakReassociatePass::ID = 0; +INITIALIZE_PASS(WeakReassociatePass, "weak-reassociate", + "Weak Reassociate expressions", false, false) + +// Public interface to the Reassociate pass +FunctionPass *llvm::createWeakReassociatePass() { + return new WeakReassociatePass(); +} Index: test/Transforms/WeakReassociate/or_or_shift.ll =================================================================== --- /dev/null +++ test/Transforms/WeakReassociate/or_or_shift.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -weak-reassociate -instcombine -S | FileCheck %s + +; (((A & 2) >> 1) | ((A & 2) << 6)) | (((A & 4) >> 1) | ((A & 4) << 6)) +; --> (((A & 2) >> 1) | ((A & 4) >> 1)) | (((A & 2) << 6) | ((A & 4) << 6)) +; --> (((A & 2) >> 1) | ((A & 4) >> 1)) | (((A & 2) | (A & 4)) << 6) +; --> (((A & 2) >> 1) | ((A & 4) >> 1)) | ((A & 6) << 6) +; --> (((A & 2) >> 1) | ((A & 4) >> 1)) | ((A << 6) & 384) +; --> (((A & 2) | (A & 4)) >> 1) | ((A << 6) & 384) +; --> ((A & 6) >> 1) | ((A << 6) & 384) +; --> ((A >> 1) & 3) | ((A << 6) & 384) +; --> ((A & 6) >> 1) | ((A & 6) << 6) + +define i32 @func1(i32 %x) local_unnamed_addr #0 { +; CHECK-LABEL: @func1( +; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 6 +; CHECK-NEXT: [[SHL:%.*]] = shl nuw nsw i32 [[AND]], 6 +; CHECK-NEXT: [[SHR:%.*]] = lshr exact i32 [[AND]], 1 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], [[SHR]] +; CHECK-NEXT: ret i32 [[OR]] +; + %1 = and i32 %x, 2 + %2 = and i32 %x, 4 + %3 = shl nuw nsw i32 %1, 6 + %4 = lshr exact i32 %1, 1 + %5 = or i32 %3, %4 + %6 = shl nuw nsw i32 %2, 6 + %7 = lshr exact i32 %2, 1 + %8 = or i32 %6, %7 + %9 = or i32 %5, %8 + ret i32 %9 +} + +define i32 @func2(i32 %x) local_unnamed_addr #0 { +; CHECK-LABEL: @func2( +; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 126 +; CHECK-NEXT: [[SHL1:%.*]] = shl nuw nsw i32 [[AND]], 8 +; CHECK-NEXT: [[SHL2:%.*]] = shl nuw nsw i32 [[AND]], 1 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL1]], [[SHL2]] +; CHECK-NEXT: ret i32 [[OR]] +; + %1 = and i32 %x, 6 + %2 = shl nuw nsw i32 %1, 8 + %3 = shl nuw nsw i32 %1, 1 + %4 = or i32 %2, %3 + %5 = and i32 %x, 24 + %6 = shl nuw nsw i32 %5, 8 + %7 = shl nuw nsw i32 %5, 1 + %8 = or i32 %6, %7 + %9 = or i32 %8, %4 + %10 = and i32 %x, 96 + %11 = shl nuw nsw i32 %10, 8 + %12 = shl nuw nsw i32 %10, 1 + %13 = or i32 %11, %12 + %14 = or i32 %13, %9 + ret i32 %14 +} Index: test/Transforms/WeakReassociate/or_or_xor_and.ll =================================================================== --- /dev/null +++ test/Transforms/WeakReassociate/or_or_xor_and.ll @@ -0,0 +1,23 @@ +; RUN: opt < %s -weak-reassociate -instcombine -S | FileCheck %s + +; (((x ^ C1) | (x & C2)) | ((y ^ C1) | (y & C2))) -> +; (((x | y) & C2) | ((x ^ C1) | (y ^ C1))) +define i32 @or_or_xor_and(i32 %x, i32 %y) local_unnamed_addr #0 { +; CHECK-LABEL: @or_or_xor_and( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 %x, 15 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 %y, 15 +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 %x, %y +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 38 +; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP6]] +; + %1 = xor i32 %x, 15 + %2 = and i32 %x, 38 + %3 = or i32 %1, %2 + %4 = xor i32 %y, 15 + %5 = and i32 %y, 38 + %6 = or i32 %4, %5 + %7 = or i32 %3, %6 + ret i32 %7 +}