diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
--- a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
@@ -26,6 +26,7 @@
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/Matchers.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -49,7 +50,6 @@
 
 /// Loop invariant code motion (LICM) pass.
 /// TODO: The pass is missing zero-trip tests.
-/// TODO: Check for the presence of side effects before hoisting.
 /// TODO: This code should be removed once the new LICM pass can handle its
 ///       uses.
 struct LoopInvariantCodeMotion
@@ -92,13 +92,9 @@
     if (!areAllOpsInTheBlockListInvariant(parOp.getLoopBody(), indVar, iterArgs,
                                           opsWithUsers, opsToHoist))
       return false;
-  } else if (isa<AffineDmaStartOp, AffineDmaWaitOp>(op)) {
-    // TODO: Support DMA ops.
-    // FIXME: This should be fixed to not special-case these affine DMA ops but
-    // instead rely on side effects.
-    return false;
-  } else if (op.getNumRegions() > 0) {
-    // We can't handle region-holding ops we don't know about.
+  } else if (!isMemoryEffectFree(&op) &&
+             !isa<AffineReadOpInterface, AffineWriteOpInterface,
+                  AffinePrefetchOp>(&op)) {
     return false;
   } else if (!matchPattern(&op, m_Constant())) {
     // Register op in the set of ops that have users.
diff --git a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
--- a/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
+++ b/mlir/test/Dialect/Affine/affine-loop-invariant-code-motion.mlir
@@ -867,4 +867,49 @@
   // CHECK-NEXT:   }
   // CHECK-NEXT: }
   return
-}
\ No newline at end of file
+}
+
+// Side-effecting ops shouldn't be hoisted.
+
+// CHECK-LABEL: func @side_effecting_ops
+func.func @side_effecting_ops() {
+  %cst = arith.constant 0.0 : f32
+  %m0 = memref.alloc(): memref<1x512x16x16xf32>
+  %0 = gpu.wait async
+  affine.for %arg783 = 0 to 14 {
+    affine.for %arg784 = 0 to 14 {
+      affine.parallel (%arg785) = (0) to (512) {
+        affine.for %arg786 = 0 to 1 {
+          affine.for %arg787 = 0 to 1 {
+            affine.for %arg788 = 0 to 1 {
+              %m1 = memref.alloc() : memref<1xf32, 3>
+              %m2 = memref.alloc() : memref<1xf32, 3>
+              affine.store %cst, %m1[0] : memref<1xf32, 3>
+              affine.store %cst, %m2[0] : memref<1xf32, 3>
+              %memref_2897, %asyncToken_2898 = gpu.alloc async [%0] () : memref<1x512x16x16xf32>
+              %2432 = gpu.memcpy async [%0] %memref_2897, %m0 : memref<1x512x16x16xf32>, memref<1x512x16x16xf32>
+              affine.for %arg789 = 0 to 16 {
+                affine.for %arg790 = 0 to 16 {
+                  affine.store %cst, %memref_2897[0, %arg785 + %arg788, %arg789, %arg790] : memref<1x512x16x16xf32>
+                }
+              }
+              memref.dealloc %m2 : memref<1xf32, 3>
+              memref.dealloc %m1 : memref<1xf32, 3>
+              %2433 = gpu.memcpy async [%0] %m0, %memref_2897 : memref<1x512x16x16xf32>, memref<1x512x16x16xf32>
+              %2434 = gpu.dealloc async [%asyncToken_2898] %memref_2897 : memref<1x512x16x16xf32>
+            }
+          }
+        }
+      }
+    }
+  }
+  // CHECK:      affine.for %{{.*}} = 0 to 1
+  // CHECK-NEXT:   affine.for %{{.*}} = 0 to 1
+  // CHECK:          memref.alloc
+  // CHECK:          memref.alloc
+  // CHECK:          gpu.memcpy
+  // CHECK:          affine.for %{{.*}} = 0 to 16
+  // CHECK:            affine.for %{{.*}} = 0 to 16
+  // CHECK:          memref.dealloc
+  return
+}