Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -394,6 +394,7 @@ void initializeVerifierLegacyPassPass(PassRegistry&); void initializeVirtRegMapPass(PassRegistry&); void initializeVirtRegRewriterPass(PassRegistry&); +void initializeWeakReassociateLegacyPassPass(PassRegistry&); void initializeWasmEHPreparePass(PassRegistry&); void initializeWholeProgramDevirtPass(PassRegistry&); void initializeWinEHPreparePass(PassRegistry&); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -238,6 +238,19 @@ // FunctionPass *createReassociatePass(); +//===----------------------------------------------------------------------===// +// +// WeakReassociate - This pass reassociates commutative expressions in an order +// that is designed to promote better instruction combining. +// +// For example: (x << 6 | y >> 1) | (z << 6 | w >> 1) -> +// (x << 6 | z << 6) | (y >> 1 | w >> 1) +// +// This pass will not change the topography of the tree. That means, amongst the +// rest, that this pass never creates new instructions. +// +FunctionPass *createWeakReassociatePass(); + //===----------------------------------------------------------------------===// // // JumpThreading - Thread control through mult-pred/multi-succ blocks where some Index: include/llvm/Transforms/Scalar/WeakReassociate.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Scalar/WeakReassociate.h @@ -0,0 +1,50 @@ +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Pass.h" + +#define DEBUG_TYPE "weak-reassociate" + +namespace llvm { +/// Reassociate commutative expressions. +class WeakReassociatePass + : public PassInfoMixin { + /// Tries to optimizae an instruction by re-associating its operands. + /// This function will succeed only if: + /// 1. The instruction's type is not i1. + /// 2. The instruction is associative. + /// 3. The instruction is the root of its opcode tree. + /// If all of the above hold, a call to WeakReassociatePass::RewriteExprTree + /// will be made. + /// + /// \param I The instruction to optimize + /// + /// \returns true iff successfuly optimized \p I + bool OptimizeInst(Instruction *I); + /// Rewrite an expression tree for an associative instructions tree root + /// instruction. + /// This function tried to partition the leaves of this tree into two sets, a + /// blue set and a red set, according to their opcodes and used constants. If + /// it succeeds it will re-arrange the tree such that all of the blue operands + /// will be in one side of the tree, and all of the red operands will be in + /// the other side of the tree. + /// + /// \param I An associative instructions tree root instruction. + /// + /// \returns true iff rewriting of the expression tree succeeded. + bool RewriteExprTree(BinaryOperator *I); + /// Given an associative instructions tree root instruction, this function + /// will return all the nodes of the tree. It may fail if the depth of the + /// tree is too big or there are too many leaves. + /// + /// \param I An associative instructions tree root instruction. + /// \param[out] NonLeaves All of the inner nodes of the tree. + /// \param[out] Leaves All of the leaves of the tree. + /// + /// \return true iff successfully got all of the nodes of the tree. + bool getTreeNodes(BinaryOperator *I, + SmallVectorImpl> &NonLeaves, + SmallVectorImpl &Leaves); + +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &); +}; +} // namespace llvm Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -142,6 +142,7 @@ #include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h" #include "llvm/Transforms/Scalar/SpeculativeExecution.h" #include "llvm/Transforms/Scalar/TailRecursionElimination.h" +#include "llvm/Transforms/Scalar/WeakReassociate.h" #include "llvm/Transforms/Utils/AddDiscriminators.h" #include "llvm/Transforms/Utils/BreakCriticalEdges.h" #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" @@ -484,6 +485,7 @@ FPM.addPass(ADCEPass()); FPM.addPass(SimplifyCFGPass()); FPM.addPass(InstCombinePass()); + FPM.addPass(WeakReassociatePass()); invokePeepholeEPCallbacks(FPM, Level); return FPM; Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -410,6 +410,7 @@ MPM.add(createCFGSimplificationPass()); // Merge & remove BBs // Clean up after everything. addInstructionCombiningPass(MPM); + MPM.add(createWeakReassociatePass()); // Reassociate expressions addExtensionsToPM(EP_Peephole, MPM); } Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -68,6 +68,7 @@ StraightLineStrengthReduce.cpp StructurizeCFG.cpp TailRecursionElimination.cpp + WeakReassociate.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -104,6 +104,7 @@ initializeLoopVersioningPassPass(Registry); initializeEntryExitInstrumenterPass(Registry); initializePostInlineEntryExitInstrumenterPass(Registry); + initializeWeakReassociateLegacyPassPass(Registry); } void LLVMAddLoopSimplifyCFGPass(LLVMPassManagerRef PM) { Index: lib/Transforms/Scalar/WeakReassociate.cpp =================================================================== --- /dev/null +++ lib/Transforms/Scalar/WeakReassociate.cpp @@ -0,0 +1,357 @@ +//===-------- WeakReassociate.cpp - Reassociate binary expressions --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass reassociates commutative expressions in an order that is designed +// to promote better instruction combining. +// +// For example: (x << 6 | y >> 1) | (z << 6 | w >> 1) -> +// (x << 6 | z << 6) | (y >> 1 | w >> 1) +// +// In the implementation of this algorithm, all leafs are divided into leaf +// classes according to their opcodes and constants (if applicable), and iff +// there are exactly two leaf classes ("Blue" and "Red"), the tree will be +// reassociated such that one side will contain only Blue leaves and the other +// will contain only Red leaves. +// This pass will not change the topography of the tree. That means, amongst the +// rest, that this pass never creates new instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/WeakReassociate.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" +#include +#include +using namespace llvm; + +#define DEBUG_TYPE "weak-reassociate" + +/// An associative instructions tree leaf class. A class of a leaf will +/// represent its new position in the tree after the optimization. +struct LeafClass { + Instruction::BinaryOps Opcode; + ConstantInt *Op1; +}; + +inline bool operator<(const LeafClass &LHS, const LeafClass &RHS) { + if (LHS.Opcode < RHS.Opcode) + return true; + else if (LHS.Opcode > RHS.Opcode) + return false; + return LHS.Op1->getValue().slt(RHS.Op1->getValue()); +} + +/// Given a leaf, this function tries to return its appropriate LeafClass. +/// +/// \param V An associative instructions tree leaf. +/// \param[out] OpClass \p V 's LeafClass. +/// +/// \return true iff successfuly found \p V 's LeafClass. +static bool getLeafClass(Value *V, LeafClass &OpClass) { + if (!V) + return false; + BinaryOperator *BO = dyn_cast(V); + if (!BO) + return false; + ConstantInt *Op1 = dyn_cast(BO->getOperand(1)); + if (!Op1) + return false; + OpClass.Opcode = BO->getOpcode(); + OpClass.Op1 = Op1; + return true; +} + +/// Computes and returns the number of non-leaves (inner nodes) required in an +/// associative instructions tree in order to support a given number of leaves. +/// +/// \param LeafNo The required number of leaves. +/// \param[out] DepthToNodesNum The number of required non-leaves (inner nodes) +/// by depth. i.e. DepthToNodesNum[i] will hold the number of required +/// non-leaves (inner nodes) at depth i. +static void +nonLeafNodesNeededForTree(unsigned LeafNo, + SmallVectorImpl &DepthToNodesNum) { + unsigned TotalRemainingNodes = LeafNo - 1; + for (unsigned i = 0; TotalRemainingNodes > 0; ++i) { + unsigned MaxNodesForDepth = 1U << i; + if (TotalRemainingNodes <= MaxNodesForDepth) + DepthToNodesNum.push_back(TotalRemainingNodes); + else + DepthToNodesNum.push_back(MaxNodesForDepth); + TotalRemainingNodes -= DepthToNodesNum[i]; + } +} + +/// Updates an associative instructions tree with new operands as leaves. +/// +/// \param TreeNonLeaves The non-leaves (inner nodes) of the associative +/// instructions tree. TreeNonLeaves[i] will hold the non-leaves (inner nodes) +/// at depth i. +/// \param Leaves The new leaves for the tree. +static void updateTree( + const SmallVectorImpl> &TreeNonLeaves, + const SmallVectorImpl &Leaves) { + unsigned i = 0; // iterator over the leaves + for (unsigned Depth = 0; Depth < TreeNonLeaves.size(); ++Depth) { + const SmallVector &CurrDepthNodes = + TreeNonLeaves[Depth]; + const SmallVector *NextDepthNodes = + Depth + 1 < TreeNonLeaves.size() ? &TreeNonLeaves[Depth + 1] : nullptr; + unsigned j = 0; // iterator over NextDepthNodes + for (BinaryOperator *CurrDepthNode : CurrDepthNodes) { + for (unsigned k = 0; k < 2; k++) { + Value *NewOperand = nullptr; + if (NextDepthNodes != nullptr && j < NextDepthNodes->size()) { + // First set all the nodes from the next depth, leave the leaves for + // the end + NewOperand = (*NextDepthNodes)[j]; + ++j; + } else { + assert(i < Leaves.size()); + NewOperand = Leaves[i]; + ++i; + } + CurrDepthNode->setOperand(k, NewOperand); + } + } + } + assert(i == Leaves.size() && "Not all the leaves were inserted to the tree"); +} + +PreservedAnalyses WeakReassociatePass::run(Function &F, + FunctionAnalysisManager &) { + bool MadeChange = false; + for (Instruction &I : instructions(F)) { + if (!isInstructionTriviallyDead(&I)) + MadeChange |= OptimizeInst(&I); + } + + if (MadeChange) { + PreservedAnalyses PA; + PA.preserveSet(); + PA.preserve(); + return PA; + } + + return PreservedAnalyses::all(); +} + +bool WeakReassociatePass::OptimizeInst(Instruction *I) { + if (I->getType()->isIntegerTy(1)) + return false; + if (!I->isAssociative()) + return false; + BinaryOperator *BO = cast(I); + + // If this is an interior node of a reassociable tree, ignore it until we + // get to the root of the tree, to avoid N^2 analysis. + unsigned Opcode = BO->getOpcode(); + if (BO->hasOneUse() && BO->user_back()->getOpcode() == Opcode) + return false; + + SmallVector Ops; + std::stack> S; + S.push(BO); + while (!S.empty()) { + BinaryOperator *CurrBO = S.top(); + S.pop(); + for (unsigned i = 0; i < 2; ++i) { + Value *Operand = CurrBO->getOperand(i); + BinaryOperator *OperandBO = dyn_cast(Operand); + if (!OperandBO || !OperandBO->hasOneUse() || + OperandBO->getOpcode() != Opcode) { + // This is a leaf + Ops.push_back(Operand); + if (Ops.size() >= 10) + return false; + } else + // not a leaf, needs to examine it too. + S.push(OperandBO); + } + } + if (Ops.size() <= 2) + return false; + return RewriteExprTree(BO); +} + +bool WeakReassociatePass::getTreeNodes( + BinaryOperator *I, + SmallVectorImpl> &NonLeaves, + SmallVectorImpl &Leaves) { + unsigned Opcode = I->getOpcode(); + BasicBlock *BB = I->getParent(); + NonLeaves.push_back({I}); + for (unsigned Depth = 1;; ++Depth) { + if (Depth >= 4) + return false; + const SmallVector &PrevDepthNodes = + NonLeaves[Depth - 1]; + SmallVector *CurrDepthNodes = nullptr; + // The operands of PrevDepthNodes are either CurrDepthNodes or leaves. + for (BinaryOperator *PrevDepthNode : PrevDepthNodes) { + for (Value *Operand : PrevDepthNode->operands()) { + BinaryOperator *OperandBO = dyn_cast(Operand); + if (!OperandBO || !OperandBO->hasOneUse() || + OperandBO->getOpcode() != Opcode) { + // If the operand is not a BinaryOperator with the same opcode of the + // root and one use then it is a leaf. + Leaves.push_back(Operand); + if (Leaves.size() >= 10) + return false; + } else { + if (OperandBO->getParent() != BB) + // All non-leaves (inner nodes) must belong to the same basic block. + return false; + if (CurrDepthNodes == nullptr) { + NonLeaves.push_back({}); + CurrDepthNodes = &NonLeaves[Depth]; + } + CurrDepthNodes->push_back(OperandBO); + } + } + } + if (CurrDepthNodes == nullptr) + // No new inner nodes, this was the last level of the tree. + break; + } + return true; +} + +bool WeakReassociatePass::RewriteExprTree(BinaryOperator *I) { + SmallVector, 8> NonLeaves; + SmallVector Leaves; + if (!getTreeNodes(I, NonLeaves, Leaves)) + return false; + + std::map> LeavesSets; + // Dividing the leaves into sets according to their LeafClass. + for (Value *Leaf : Leaves) { + LeafClass OpClass; + if (!getLeafClass(Leaf, OpClass)) + return false; + auto LeavesSetLocation = LeavesSets.find(OpClass); + if (LeavesSetLocation == LeavesSets.end()) { + if (LeavesSets.size() == 2) + return false; + std::tie(LeavesSetLocation, std::ignore) = + LeavesSets.insert(std::make_pair(OpClass, SmallVector())); + } + LeavesSetLocation->second.push_back(Leaf); + } + // Must have exactly two sets, each with at least two elements. + if (std::find_if( + LeavesSets.begin(), LeavesSets.end(), + [](const std::pair> &KeyValPair) + -> bool { return KeyValPair.second.size() == 1; }) != + LeavesSets.end()) + return false; + if (LeavesSets.size() != 2) + return false; + const SmallVector &BlueLeaves = LeavesSets.begin()->second; + const SmallVector &RedLeaves = (++LeavesSets.begin())->second; + + SmallVector BlueDepthToNodesNum, RedDepthToNodesNum; + nonLeafNodesNeededForTree(BlueLeaves.size(), BlueDepthToNodesNum); + assert(BlueDepthToNodesNum.size() <= NonLeaves.size()); + nonLeafNodesNeededForTree(RedLeaves.size(), RedDepthToNodesNum); + assert(RedDepthToNodesNum.size() <= NonLeaves.size()); + + // NonLeaves[i] will hold the none leaves (inner nodes) at depth i of + // the tree. + SmallVector, 8> BlueNonLeaves, RedNonLeaves; + for (unsigned Depth = 1; Depth < NonLeaves.size(); ++Depth) { + // The two trees, being the immediate children of the tree, have + // depth of one less (e.g. their root is at depth 1 of the big tree). + unsigned ChildDepth = Depth - 1; + unsigned CurrDepthBlueNodesNum = ChildDepth < BlueDepthToNodesNum.size() + ? BlueDepthToNodesNum[ChildDepth] + : 0; + unsigned CurrDepthRedNodesNum = ChildDepth < RedDepthToNodesNum.size() + ? RedDepthToNodesNum[ChildDepth] + : 0; + SmallVector &CurrDepthNonLeaves = NonLeaves[Depth]; + if (CurrDepthBlueNodesNum + CurrDepthRedNodesNum > + CurrDepthNonLeaves.size()) + // If not enough nodes, stop with a failure. Note that this pass is not + // allowed to create new instructions. + return false; + + SmallVector CurrDepthBlueNodes, CurrDepthRedNodes; + // Splitting CurrDepthNonLeaves between CurrDepthBlueNodes and + // CurrDepthRedNodes. + unsigned i = 0; + for (unsigned j = 0; j < CurrDepthBlueNodesNum; ++j) { + CurrDepthBlueNodes.push_back(CurrDepthNonLeaves[i]); + ++i; + } + if (!CurrDepthBlueNodes.empty()) + BlueNonLeaves.push_back(CurrDepthBlueNodes); + for (unsigned j = 0; j < CurrDepthRedNodesNum; ++j) { + CurrDepthRedNodes.push_back(CurrDepthNonLeaves[i]); + ++i; + } + if (!CurrDepthRedNodes.empty()) + RedNonLeaves.push_back(CurrDepthRedNodes); + } + + // Rearranging the inner nodes such that nodes of depth i will necessarily + // appear before nodes of depth i+1. + BinaryOperator *Top = I; + for (unsigned Depth = 1; Depth < NonLeaves.size(); ++Depth) { + SmallVector &DepthNodes = NonLeaves[Depth]; + for (BinaryOperator *DepthNode : DepthNodes) { + DepthNode->moveBefore(Top); + Top = DepthNode; + } + } + + // Updating the two sub trees with the new leaves. + updateTree(BlueNonLeaves, BlueLeaves); + updateTree(RedNonLeaves, RedLeaves); + return true; +} + +namespace { + +class WeakReassociateLegacyPass : public FunctionPass { + WeakReassociatePass Impl; + +public: + static char ID; // Pass identification, replacement for typeid + + WeakReassociateLegacyPass() : FunctionPass(ID) { + initializeWeakReassociateLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + + FunctionAnalysisManager DummyFAM; + auto PA = Impl.run(F, DummyFAM); + return !PA.areAllPreserved(); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addPreserved(); + } +}; + +} // end anonymous namespace + +char WeakReassociateLegacyPass::ID = 0; +INITIALIZE_PASS(WeakReassociateLegacyPass, "weak-reassociate", + "Weak Reassociate expressions", false, false) + +// Public interface to the Reassociate pass +FunctionPass *llvm::createWeakReassociatePass() { + return new WeakReassociateLegacyPass(); +} Index: test/Other/new-pm-defaults.ll =================================================================== --- test/Other/new-pm-defaults.ll +++ test/Other/new-pm-defaults.ll @@ -204,6 +204,7 @@ ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass +; CHECK-O-NEXT: Running pass: WeakReassociatePass ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. ; CHECK-EP-CGSCC-LATE-NEXT: Running pass: NoOpCGSCCPass Index: test/Other/new-pm-thinlto-defaults.ll =================================================================== --- test/Other/new-pm-thinlto-defaults.ll +++ test/Other/new-pm-thinlto-defaults.ll @@ -184,6 +184,7 @@ ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass +; CHECK-O-NEXT: Running pass: WeakReassociatePass ; CHECK-O-NEXT: Finished llvm::Function pass manager run. ; CHECK-O-NEXT: Finished CGSCC pass manager run. ; CHECK-O-NEXT: Finished llvm::Module pass manager run. Index: test/Other/opt-O2-pipeline.ll =================================================================== --- test/Other/opt-O2-pipeline.ll +++ test/Other/opt-O2-pipeline.ll @@ -173,6 +173,7 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Weak Reassociate expressions ; CHECK-NEXT: A No-Op Barrier Pass ; CHECK-NEXT: Eliminate Available Externally Globals ; CHECK-NEXT: CallGraph Construction Index: test/Other/opt-O3-pipeline.ll =================================================================== --- test/Other/opt-O3-pipeline.ll +++ test/Other/opt-O3-pipeline.ll @@ -177,6 +177,7 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Weak Reassociate expressions ; CHECK-NEXT: A No-Op Barrier Pass ; CHECK-NEXT: Eliminate Available Externally Globals ; CHECK-NEXT: CallGraph Construction Index: test/Other/opt-Os-pipeline.ll =================================================================== --- test/Other/opt-Os-pipeline.ll +++ test/Other/opt-Os-pipeline.ll @@ -160,6 +160,7 @@ ; CHECK-NEXT: Lazy Block Frequency Analysis ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions +; CHECK-NEXT: Weak Reassociate expressions ; CHECK-NEXT: A No-Op Barrier Pass ; CHECK-NEXT: Eliminate Available Externally Globals ; CHECK-NEXT: CallGraph Construction Index: test/Transforms/WeakReassociate/add_bitwise.ll =================================================================== --- /dev/null +++ test/Transforms/WeakReassociate/add_bitwise.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -weak-reassociate -instcombine -S | FileCheck %s + +define i32 @func1(i32 %x) local_unnamed_addr #0 { +; CHECK-LABEL: @func1( +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[X]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[TMP5]], 6 +; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP4]] +; CHECK-NEXT: ret i32 [[TMP7]] +; + %1 = add i32 %x, 2 + %2 = add i32 %x, 4 + %3 = shl nuw nsw i32 %1, 6 + %4 = lshr exact i32 %1, 1 + %5 = or i32 %3, %4 + %6 = shl nuw nsw i32 %2, 6 + %7 = lshr exact i32 %2, 1 + %8 = or i32 %6, %7 + %9 = or i32 %5, %8 + ret i32 %9 +} + +define i32 @func2(i32 %x) local_unnamed_addr #0 { +; CHECK-LABEL: @func2( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 12 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[X]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP1]], [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], 19 +; CHECK-NEXT: ret i32 [[TMP5]] +; + + %1 = and i32 %x, 12 + %2 = and i32 %x, 1 + %3 = add i32 %1, 3 + %4 = add i32 %1, 17 + %5 = or i32 %3, %4 + %6 = add i32 %2, 3 + %7 = add i32 %2, 17 + %8 = or i32 %6, %7 + %9 = or i32 %5, %8 + ret i32 %9 +} + +define i32 @func3(i32 %x) local_unnamed_addr #0 { +; CHECK-LABEL: @func3( +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], 6 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[X]], 24 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[X]], 96 +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP4]], [[TMP3]] +; CHECK-NEXT: [[TMP7:%.*]] = shl i32 [[TMP6]], 8 +; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP5]], [[TMP3]] +; CHECK-NEXT: [[TMP9:%.*]] = shl i32 [[TMP8]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP7]] +; CHECK-NEXT: ret i32 [[TMP10]] +; + %1 = add i32 %x, 6 + %2 = shl nuw nsw i32 %1, 8 + %3 = shl nuw nsw i32 %1, 1 + %4 = or i32 %2, %3 + %5 = add i32 %x, 24 + %6 = shl nuw nsw i32 %5, 8 + %7 = shl nuw nsw i32 %5, 1 + %8 = or i32 %6, %7 + %9 = or i32 %8, %4 + %10 = add i32 %x, 96 + %11 = shl nuw nsw i32 %10, 8 + %12 = shl nuw nsw i32 %10, 1 + %13 = or i32 %11, %12 + %14 = or i32 %13, %9 + ret i32 %14 +} Index: test/Transforms/WeakReassociate/or_or_shift.ll =================================================================== --- /dev/null +++ test/Transforms/WeakReassociate/or_or_shift.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -weak-reassociate -instcombine -S | FileCheck %s + +; (((A & 2) >> 1) | ((A & 2) << 6)) | (((A & 4) >> 1) | ((A & 4) << 6)) +; --> (((A & 2) >> 1) | ((A & 4) >> 1)) | (((A & 2) << 6) | ((A & 4) << 6)) +; --> (((A & 2) >> 1) | ((A & 4) >> 1)) | (((A & 2) | (A & 4)) << 6) +; --> (((A & 2) >> 1) | ((A & 4) >> 1)) | ((A & 6) << 6) +; --> (((A & 2) >> 1) | ((A & 4) >> 1)) | ((A << 6) & 384) +; --> (((A & 2) | (A & 4)) >> 1) | ((A << 6) & 384) +; --> ((A & 6) >> 1) | ((A << 6) & 384) +; --> ((A >> 1) & 3) | ((A << 6) & 384) +; --> ((A & 6) >> 1) | ((A & 6) << 6) + +define i32 @func1(i32 %x) local_unnamed_addr #0 { +; CHECK-LABEL: @func1( +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[X:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[X]], 6 +; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 384 +; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP2]] +; CHECK-NEXT: ret i32 [[TMP5]] +; + %1 = and i32 %x, 2 + %2 = and i32 %x, 4 + %3 = shl nuw nsw i32 %1, 6 + %4 = lshr exact i32 %1, 1 + %5 = or i32 %3, %4 + %6 = shl nuw nsw i32 %2, 6 + %7 = lshr exact i32 %2, 1 + %8 = or i32 %6, %7 + %9 = or i32 %5, %8 + ret i32 %9 +} + +define i32 @func2(i32 %x) local_unnamed_addr #0 { +; CHECK-LABEL: @func2( +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 96 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[TMP1]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i32 [[TMP1]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = shl i32 [[X]], 8 +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 7680 +; CHECK-NEXT: [[TMP6:%.*]] = shl i32 [[X]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP6]], 60 +; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP5]], [[TMP2]] +; CHECK-NEXT: [[TMP9:%.*]] = or i32 [[TMP7]], [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP8]] +; CHECK-NEXT: ret i32 [[TMP10]] +; + %1 = and i32 %x, 6 + %2 = shl nuw nsw i32 %1, 8 + %3 = shl nuw nsw i32 %1, 1 + %4 = or i32 %2, %3 + %5 = and i32 %x, 24 + %6 = shl nuw nsw i32 %5, 8 + %7 = shl nuw nsw i32 %5, 1 + %8 = or i32 %6, %7 + %9 = or i32 %8, %4 + %10 = and i32 %x, 96 + %11 = shl nuw nsw i32 %10, 8 + %12 = shl nuw nsw i32 %10, 1 + %13 = or i32 %11, %12 + %14 = or i32 %13, %9 + ret i32 %14 +} Index: test/Transforms/WeakReassociate/or_or_xor_and.ll =================================================================== --- /dev/null +++ test/Transforms/WeakReassociate/or_or_xor_and.ll @@ -0,0 +1,23 @@ +; RUN: opt < %s -weak-reassociate -instcombine -S | FileCheck %s + +; (((x ^ C1) | (x & C2)) | ((y ^ C1) | (y & C2))) -> +; (((x | y) & C2) | ((x ^ C1) | (y ^ C1))) +define i32 @or_or_xor_and(i32 %x, i32 %y) local_unnamed_addr #0 { +; CHECK-LABEL: @or_or_xor_and( +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 %x, 15 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 %y, 15 +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 %x, %y +; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 38 +; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP5]], [[TMP3]] +; CHECK-NEXT: ret i32 [[TMP6]] +; + %1 = xor i32 %x, 15 + %2 = and i32 %x, 38 + %3 = or i32 %1, %2 + %4 = xor i32 %y, 15 + %5 = and i32 %y, 38 + %6 = or i32 %4, %5 + %7 = or i32 %3, %6 + ret i32 %7 +}