diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp --- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp @@ -659,15 +659,17 @@ /// that would change the read within `memOp`. template static bool hasNoInterveningEffect(Operation *start, T memOp) { - Value memref = memOp.getMemRef(); - bool isOriginalAllocation = memref.getDefiningOp() || - memref.getDefiningOp(); + auto isLocallyAllocated = [](Value memref) { + auto *defOp = memref.getDefiningOp(); + return defOp && hasSingleEffect(defOp, memref); + }; // A boolean representing whether an intervening operation could have impacted // memOp. bool hasSideEffect = false; // Check whether the effect on memOp can be caused by a given operation op. + Value memref = memOp.getMemRef(); std::function checkOperation = [&](Operation *op) { // If the effect has alreay been found, early exit, if (hasSideEffect) @@ -682,12 +684,12 @@ // If op causes EffectType on a potentially aliasing location for // memOp, mark as having the effect. if (isa(effect.getEffect())) { - if (isOriginalAllocation && effect.getValue() && - (effect.getValue().getDefiningOp() || - effect.getValue().getDefiningOp())) { - if (effect.getValue() != memref) - continue; - } + // TODO: This should be replaced with a check for no aliasing. + // Aliasing information should be passed to this method. + if (effect.getValue() && effect.getValue() != memref && + isLocallyAllocated(memref) && + isLocallyAllocated(effect.getValue())) + continue; opMayHaveEffect = true; break; } @@ -1058,18 +1060,20 @@ for (auto *op : opsToErase) op->erase(); - // Check if the store fwd'ed memrefs are now left with only stores and can - // thus be completely deleted. Note: the canonicalize pass should be able - // to do this as well, but we'll do it here since we collected these anyway. + // Check if the store fwd'ed memrefs are now left with only stores and + // deallocs and can thus be completely deleted. Note: the canonicalize pass + // should be able to do this as well, but we'll do it here since we collected + // these anyway. for (auto memref : memrefsToErase) { - // If the memref hasn't been alloc'ed in this function, skip. + // If the memref hasn't been locally alloc'ed, skip. Operation *defOp = memref.getDefiningOp(); - if (!defOp || !isa(defOp)) + if (!defOp || !hasSingleEffect(defOp, memref)) // TODO: if the memref was returned by a 'call' operation, we // could still erase it if the call had no side-effects. continue; if (llvm::any_of(memref.getUsers(), [&](Operation *ownerOp) { - return !isa(ownerOp); + return !isa(ownerOp) && + !hasSingleEffect(ownerOp, memref); })) continue; @@ -1308,7 +1312,8 @@ // Skip dealloc's - no replacement is necessary, and a memref replacement // at other uses doesn't hurt these dealloc's. - if (isa(op) && !replaceInDeallocOp) + if (hasSingleEffect(op, oldMemRef) && + !replaceInDeallocOp) continue; // Check if the memref was used in a non-dereferencing context. It is fine @@ -1732,8 +1737,8 @@ } // Replace any uses of the original alloc op and erase it. All remaining uses // have to be dealloc's; RAMUW above would've failed otherwise. - assert(llvm::all_of(oldMemRef.getUsers(), [](Operation *op) { - return isa(op); + assert(llvm::all_of(oldMemRef.getUsers(), [&](Operation *op) { + return hasSingleEffect(op, oldMemRef); })); oldMemRef.replaceAllUsesWith(newAlloc); allocOp->erase(); diff --git a/mlir/test/Dialect/Affine/scalrep.mlir b/mlir/test/Dialect/Affine/scalrep.mlir --- a/mlir/test/Dialect/Affine/scalrep.mlir +++ b/mlir/test/Dialect/Affine/scalrep.mlir @@ -15,21 +15,21 @@ %v0 = affine.load %m[%i0] : memref<10xf32> %v1 = arith.addf %v0, %v0 : f32 } + memref.dealloc %m : memref<10xf32> return -// CHECK: %{{.*}} = arith.constant 7.000000e+00 : f32 +// CHECK: %[[C7:.*]] = arith.constant 7.000000e+00 : f32 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { -// CHECK-NEXT: %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32 +// CHECK-NEXT: arith.addf %[[C7]], %[[C7]] : f32 // CHECK-NEXT: } // CHECK-NEXT: return } // CHECK-LABEL: func @multi_store_load() { func.func @multi_store_load() { - %c0 = arith.constant 0 : index %cf7 = arith.constant 7.0 : f32 %cf8 = arith.constant 8.0 : f32 %cf9 = arith.constant 9.0 : f32 - %m = memref.alloc() : memref<10xf32> + %m = gpu.alloc() : memref<10xf32> affine.for %i0 = 0 to 10 { affine.store %cf7, %m[%i0] : memref<10xf32> %v0 = affine.load %m[%i0] : memref<10xf32> @@ -40,17 +40,16 @@ %v3 = affine.load %m[%i0] : memref<10xf32> %v4 = arith.mulf %v2, %v3 : f32 } + gpu.dealloc %m : memref<10xf32> return -// CHECK: %{{.*}} = arith.constant 0 : index -// CHECK-NEXT: %{{.*}} = arith.constant 7.000000e+00 : f32 -// CHECK-NEXT: %{{.*}} = arith.constant 8.000000e+00 : f32 -// CHECK-NEXT: %{{.*}} = arith.constant 9.000000e+00 : f32 +// CHECK-NEXT: %[[C7:.*]] = arith.constant 7.000000e+00 : f32 +// CHECK-NEXT: arith.constant 8.000000e+00 : f32 +// CHECK-NEXT: %[[C9:.*]] = arith.constant 9.000000e+00 : f32 // CHECK-NEXT: affine.for %{{.*}} = 0 to 10 { -// CHECK-NEXT: %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32 -// CHECK-NEXT: %{{.*}} = arith.mulf %{{.*}}, %{{.*}} : f32 +// CHECK-NEXT: arith.addf %[[C7]], %[[C7]] : f32 +// CHECK-NEXT: arith.mulf %[[C9]], %[[C9]] : f32 // CHECK-NEXT: } // CHECK-NEXT: return - } // The store-load forwarding can see through affine apply's since it relies on