diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h
--- a/mlir/include/mlir/Dialect/Affine/Passes.h
+++ b/mlir/include/mlir/Dialect/Affine/Passes.h
@@ -18,6 +18,7 @@
 #include <limits>
 
 namespace mlir {
+
 namespace func {
 class FuncOp;
 } // namespace func
@@ -110,10 +111,6 @@
 /// memory hierarchy.
 std::unique_ptr<OperationPass<func::FuncOp>> createPipelineDataTransferPass();
 
-/// Populate patterns that expand affine index operations into more fundamental
-/// operations (not necessarily restricted to Affine dialect).
-void populateAffineExpandIndexOpsPatterns(RewritePatternSet &patterns);
-
 /// Creates a pass to expand affine index operations into more fundamental
 /// operations (not necessarily restricted to Affine dialect).
 std::unique_ptr<Pass> createAffineExpandIndexOpsPass();
diff --git a/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h
new file mode 100644
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Affine/Transforms/Transforms.h
@@ -0,0 +1,47 @@
+//===- Transforms.h - Transforms Entrypoints ---------------------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines a set of transforms specific for the AffineOps
+// dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_AFFINE_TRANSFORMS_TRANSFORMS_H
+#define MLIR_DIALECT_AFFINE_TRANSFORMS_TRANSFORMS_H
+
+#include "mlir/Support/LogicalResult.h"
+
+namespace mlir {
+class RewritePatternSet;
+class RewriterBase;
+class AffineApplyOp;
+
+/// Populate patterns that expand affine index operations into more fundamental
+/// operations (not necessarily restricted to Affine dialect).
+void populateAffineExpandIndexOpsPatterns(RewritePatternSet &patterns);
+
+/// Helper function to rewrite `op`'s affine map and reorder its operands such
+/// that they are in increasing order of hoistability (i.e. the least hoistable)
+/// operands come first in the operand list.
+void reorderOperandsByHoistability(RewriterBase &rewriter, AffineApplyOp op);
+
+/// Split an "affine.apply" operation into 2 smaller ops, exhibiting
+/// opportunities for CSE and LICM.
+/// Return the sink AffineApplyOp on success or failure if the
+/// Note that this can be currently undone by canonicalization which tries to
+/// maximally compose chains of AffineApplyOps.
+FailureOr<AffineApplyOp> decompose(RewriterBase &rewriter, AffineApplyOp op);
+
+//===----------------------------------------------------------------------===//
+// Registration
+//===----------------------------------------------------------------------===//
+
+} // namespace mlir
+
+#endif // MLIR_DIALECT_AFFINE_TRANSFORMS_TRANSFORMS_H
diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineExpandIndexOps.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineExpandIndexOps.cpp
--- a/mlir/lib/Dialect/Affine/Transforms/AffineExpandIndexOps.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineExpandIndexOps.cpp
@@ -13,6 +13,7 @@
 #include "mlir/Dialect/Affine/Passes.h"
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/Transforms/Transforms.h"
 #include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
diff --git a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
--- a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
@@ -5,6 +5,7 @@
   AffineLoopNormalize.cpp
   AffineParallelize.cpp
   AffineScalarReplacement.cpp
+  DecomposeAffineOps.cpp
   LoopCoalescing.cpp
   LoopFusion.cpp
   LoopTiling.cpp
diff --git a/mlir/lib/Dialect/Affine/Transforms/DecomposeAffineOps.cpp b/mlir/lib/Dialect/Affine/Transforms/DecomposeAffineOps.cpp
new file mode 100644
--- /dev/null
+++ b/mlir/lib/Dialect/Affine/Transforms/DecomposeAffineOps.cpp
@@ -0,0 +1,156 @@
+//===- DecomposeAffineOps.cpp - Decompose affine ops into finer-grained ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements functionality to progressively decompose coarse-grained
+// affine ops into finer-grained ops.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
+
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/Transforms/Transforms.h"
+#include "mlir/Dialect/Affine/Utils.h"
+#include "mlir/IR/AffineExpr.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/IR/Value.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "llvm/Support/Debug.h"
+#include <cstdint>
+
+using namespace mlir;
+
+#define DEBUG_TYPE "decompose-affine-ops"
+#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
+#define DBGSNL() (llvm::dbgs() << "\n")
+
+/// Count the number of invariant enclosing
+static int64_t numEnclosingInvariantLoops(OpOperand &operand) {
+  int64_t count = 0;
+  Operation *currentOp = operand.getOwner();
+  while (auto loopOp = currentOp->getParentOfType<LoopLikeOpInterface>()) {
+    if (!loopOp.isDefinedOutsideOfLoop(operand.get()))
+      break;
+    currentOp = loopOp;
+    count++;
+  }
+  return count;
+}
+
+/// Reorder operands by the number of loops above which the operand is defined.
+/// This allows us to unambiguously
+void mlir::reorderOperandsByHoistability(RewriterBase &rewriter,
+                                         AffineApplyOp op) {
+  SmallVector<int64_t> numInvariant = llvm::to_vector<4>(
+      llvm::map_range(op->getOpOperands(), [&](OpOperand &operand) {
+        return numEnclosingInvariantLoops(operand);
+      }));
+
+  int64_t numOperands = op.getNumOperands();
+  SmallVector<int64_t> operandPositions =
+      llvm::to_vector<4>(llvm::seq<int64_t>(0, numOperands));
+  std::sort(operandPositions.begin(), operandPositions.end(),
+            [&numInvariant](size_t i1, size_t i2) {
+              return numInvariant[i1] < numInvariant[i2];
+            });
+
+  SmallVector<AffineExpr> replacements(numOperands);
+  SmallVector<Value> operands(numOperands);
+  for (int64_t i = 0; i < numOperands; ++i) {
+    operands[i] = op.getOperand(operandPositions[i]);
+    replacements[operandPositions[i]] = getAffineSymbolExpr(i, op.getContext());
+  }
+
+  AffineMap map = op.getAffineMap();
+  ArrayRef<AffineExpr> repls{replacements};
+  map = map.replaceDimsAndSymbols(repls.take_front(map.getNumDims()),
+                                  repls.drop_front(map.getNumDims()),
+                                  /*numResultDims=*/0,
+                                  /*numResultSyms=*/numOperands);
+  map = AffineMap::get(0, numOperands,
+                       simplifyAffineExpr(map.getResult(0), 0, numOperands),
+                       op->getContext());
+
+  rewriter.startRootUpdate(op);
+  op.setMap(map);
+  op->setOperands(operands);
+  rewriter.finalizeRootUpdate(op);
+}
+
+/// Build an affine.apply that is a subexpression `expr` of `originalOp`s affine
+/// map and with the same operands.
+/// Canonicalize the map and operands to deduplicate and drop dead operands
+/// before returning but do not perform maximal composition of AffineApplyOp
+/// which would defeat the purpose.
+static AffineApplyOp createSubApply(RewriterBase &rewriter,
+                                    AffineApplyOp originalOp, AffineExpr expr) {
+  MLIRContext *ctx = originalOp->getContext();
+  AffineMap m = originalOp.getAffineMap();
+  auto rhsMap = AffineMap::get(m.getNumDims(), m.getNumSymbols(), expr, ctx);
+  SmallVector<Value> rhsOperands = originalOp->getOperands();
+  canonicalizeMapAndOperands(&rhsMap, &rhsOperands);
+  return rewriter.create<AffineApplyOp>(originalOp.getLoc(), rhsMap,
+                                        rhsOperands);
+}
+
+/// Split an "affine.apply" operation into 2 smaller ops, exhibiting
+/// opportunities for CSE and LICM.
+/// Note that this can be currently undone by canonicalization which tries to
+/// maximally compose chains of AffineApplyOps.
+FailureOr<AffineApplyOp> mlir::decompose(RewriterBase &rewriter,
+                                         AffineApplyOp op) {
+  AffineMap m = op.getAffineMap();
+  AffineExpr exp = m.getResult(0);
+  auto binExpr = exp.dyn_cast<AffineBinaryOpExpr>();
+  if (!binExpr)
+    return rewriter.notifyMatchFailure(op, "terminal affine.apply");
+
+  if (!binExpr.getLHS().isa<AffineBinaryOpExpr>() &&
+      !binExpr.getRHS().isa<AffineBinaryOpExpr>())
+    return rewriter.notifyMatchFailure(op, "terminal affine.apply");
+
+  bool supportedKind = ((binExpr.getKind() == AffineExprKind::Add) ||
+                        (binExpr.getKind() == AffineExprKind::Mul));
+  if (!supportedKind)
+    return rewriter.notifyMatchFailure(
+        op, "only add or mul binary expr can be reassociated");
+
+  LLVM_DEBUG(DBGS() << "Start decomposeIntoFinerGrainedOps: " << op << "\n");
+
+  // Iteratively extract the RHS while the binary operation does not change.
+  // When done, we have an ordered list of affine.apply ops that we can
+  // reassociate.
+  MLIRContext *ctx = op->getContext();
+  SmallVector<AffineApplyOp> rhsOps;
+  while (auto currentBinExpr = exp.dyn_cast<AffineBinaryOpExpr>()) {
+    if (currentBinExpr.getKind() != binExpr.getKind()) {
+      rhsOps.push_back(createSubApply(rewriter, op, currentBinExpr));
+      LLVM_DEBUG(DBGS() << "--subapply: " << rhsOps.back() << "\n");
+      break;
+    }
+    rhsOps.push_back(createSubApply(rewriter, op, currentBinExpr.getRHS()));
+    LLVM_DEBUG(DBGS() << "--subapply: " << rhsOps.back() << "\n");
+    exp = currentBinExpr.getLHS();
+  }
+
+  // Merge back iteratively, thus achieving reassociation.
+  auto s0 = getAffineSymbolExpr(0, ctx);
+  auto s1 = getAffineSymbolExpr(1, ctx);
+  AffineMap binMap = AffineMap::get(
+      /*dimCount=*/0, /*symbolCount=*/2,
+      getAffineBinaryOpExpr(binExpr.getKind(), s0, s1), ctx);
+  AffineApplyOp rhs = rhsOps[0];
+  for (int64_t i = 0, e = rhsOps.size(); i + 1 < e; ++i) {
+    rhs = rewriter.create<AffineApplyOp>(op.getLoc(), binMap,
+                                         ValueRange{rhs, rhsOps[i + 1]});
+    LLVM_DEBUG(DBGS() << "--reassociate into: " << rhs << "\n");
+  }
+
+  rewriter.replaceOp(op, rhs.getResult());
+  return rhs;
+}