diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp --- a/mlir/lib/Transforms/Utils/LoopUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp @@ -2152,17 +2152,28 @@ OpBuilder b(forOp); Location loc(forOp.getLoc()); - Value mul = processorId.front(); - for (unsigned i = 1, e = processorId.size(); i < e; ++i) - mul = b.create(loc, b.create(loc, mul, numProcessors[i]), - processorId[i]); - Value lb = b.create(loc, forOp.lowerBound(), - b.create(loc, forOp.step(), mul)); + AffineExpr lhs, rhs; + bindSymbols(forOp.getContext(), lhs, rhs); + auto mulMap = AffineMap::get(0, 2, lhs * rhs); + auto addMap = AffineMap::get(0, 2, lhs + rhs); + + Value linearIndex = processorId.front(); + for (unsigned i = 1, e = processorId.size(); i < e; ++i) { + auto mulApplyOp = b.create( + loc, mulMap, ValueRange{linearIndex, numProcessors[i]}); + linearIndex = b.create( + loc, addMap, ValueRange{mulApplyOp, processorId[i]}); + } + + auto mulApplyOp = b.create( + loc, mulMap, ValueRange{linearIndex, forOp.step()}); + Value lb = b.create( + loc, addMap, ValueRange{mulApplyOp, forOp.lowerBound()}); forOp.setLowerBound(lb); Value step = forOp.step(); for (auto numProcs : numProcessors) - step = b.create(loc, step, numProcs); + step = b.create(loc, mulMap, ValueRange{numProcs, step}); forOp.setStep(step); } diff --git a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir --- a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir @@ -175,23 +175,28 @@ // ----- -// CHECK-LABEL: func @matmul_tensors( +// CHECK: #[[MULMAP:.+]] = affine_map<()[s0, s1] -> (s0 * s1)> +// CHECK: #[[ADDMAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> +// CHECK: func @matmul_tensors( // CHECK-SAME: %[[TA:[0-9a-z]+]]: tensor // CHECK-SAME: %[[TB:[0-9a-z]+]]: tensor // CHECK-SAME: %[[TC:[0-9a-z]+]]: tensor) -> tensor { func @matmul_tensors( %arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { -// CHECK: %[[C8:.*]] = constant 8 : index +// CHECK-DAG: %[[C8:.*]] = constant 8 : index +// CHECK-DAG: %[[C0:.*]] = constant 0 : index // CHECK: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"} // CHECK: %[[NBLOCKSY:.*]] = "gpu.grid_dim"() {dimension = "y"} // CHECK: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"} // CHECK: %[[NBLOCKSX:.*]] = "gpu.grid_dim"() {dimension = "x"} -// CHECK: %[[LBY:.*]] = muli %[[BIDY]], %[[C8]] : index -// CHECK: %[[STEPY:.*]] = muli %[[NBLOCKSY]], %[[C8]] : index +// CHECK: %[[MUL:.+]] = affine.apply #[[MULMAP]]()[%[[BIDY]], %[[C8]]] +// CHECK: %[[LBY:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]] +// CHECK: %[[STEPY:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSY]], %[[C8]]] // CHECK: %[[TD0:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC0:.*]] = %[[TC]]) -> (tensor) { -// CHECK: %[[LBX:.*]] = muli %[[BIDX]], %[[C8]] : index -// CHECK: %[[STEPX:.*]] = muli %[[NBLOCKSX]], %[[C8]] : index +// CHECK: %[[MUL:.+]] = affine.apply #[[MULMAP]]()[%[[BIDX]], %[[C8]]] +// CHECK: %[[LBX:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]] +// CHECK: %[[STEPX:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSX]], %[[C8]]] // CHECK: %[[TD1:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC1:.*]] = %[[TC0]]) -> (tensor) { // CHECK: %[[TD2:.*]] = scf.for {{.*}} to {{.*}} step {{.*}} iter_args(%[[TC2:.*]] = %[[TC1]]) -> (tensor) { // CHECK: %[[sTA:.*]] = subtensor %[[TA]][{{.*}}] : tensor to tensor diff --git a/mlir/test/Transforms/parametric-mapping.mlir b/mlir/test/Transforms/parametric-mapping.mlir --- a/mlir/test/Transforms/parametric-mapping.mlir +++ b/mlir/test/Transforms/parametric-mapping.mlir @@ -1,21 +1,25 @@ // RUN: mlir-opt -allow-unregistered-dialect -test-mapping-to-processing-elements %s | FileCheck %s -// CHECK-LABEL: @map1d -// CHECK: (%[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index) { +// CHECK: #[[mul_map:.+]] = affine_map<()[s0, s1] -> (s0 * s1)> +// CHECK: #[[add_map:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> + +// CHECK: func @map1d +// CHECK-SAME: (%[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index) func @map1d(%lb: index, %ub: index, %step: index) { // CHECK: %[[threads:.*]]:2 = "new_processor_id_and_range"() : () -> (index, index) %0:2 = "new_processor_id_and_range"() : () -> (index, index) - // CHECK: %[[thread_offset:.*]] = muli %[[step]], %[[threads]]#0 - // CHECK: %[[new_lb:.*]] = addi %[[lb]], %[[thread_offset]] - // CHECK: %[[new_step:.*]] = muli %[[step]], %[[threads]]#1 + // CHECK: %[[thread_offset:.+]] = affine.apply #[[mul_map]]()[%[[threads]]#0, %[[step]]] + // CHECK: %[[new_lb:.+]] = affine.apply #[[add_map]]()[%[[thread_offset]], %[[lb]]] + // CHECK: %[[new_step:.+]] = affine.apply #[[mul_map]]()[%[[threads]]#1, %[[step]]] + // CHECK: scf.for %{{.*}} = %[[new_lb]] to %[[ub]] step %[[new_step]] { scf.for %i = %lb to %ub step %step {} return } -// CHECK-LABEL: @map2d -// CHECK: (%[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index) { +// CHECK: func @map2d +// CHECK-SAME: (%[[lb:.*]]: index, %[[ub:.*]]: index, %[[step:.*]]: index) func @map2d(%lb : index, %ub : index, %step : index) { // CHECK: %[[blocks:.*]]:2 = "new_processor_id_and_range"() : () -> (index, index) %0:2 = "new_processor_id_and_range"() : () -> (index, index) @@ -24,24 +28,25 @@ %1:2 = "new_processor_id_and_range"() : () -> (index, index) // blockIdx.x * blockDim.x - // CHECK: %[[bidxXbdimx:.*]] = muli %[[blocks]]#0, %[[threads]]#1 : index + // CHECK: %[[bidxXbdimx:.+]] = affine.apply #[[mul_map]]()[%[[blocks]]#0, %[[threads]]#1] // // threadIdx.x + blockIdx.x * blockDim.x - // CHECK: %[[tidxpbidxXbdimx:.*]] = addi %[[bidxXbdimx]], %[[threads]]#0 : index + // CHECK: %[[tidxpbidxXbdimx:.+]] = affine.apply #[[add_map]]()[%[[bidxXbdimx]], %[[threads]]#0] // // thread_offset = step * (threadIdx.x + blockIdx.x * blockDim.x) - // CHECK: %[[thread_offset:.*]] = muli %[[step]], %[[tidxpbidxXbdimx]] : index + // CHECK: %[[thread_offset:.+]] = affine.apply #[[mul_map]]()[%[[tidxpbidxXbdimx]], %[[step]]] // // new_lb = lb + thread_offset - // CHECK: %[[new_lb:.*]] = addi %[[lb]], %[[thread_offset]] : index + // CHECK: %[[new_lb:.+]] = affine.apply #[[add_map]]()[%[[thread_offset]], %[[lb]]] // // stepXgdimx = step * gridDim.x - // CHECK: %[[stepXgdimx:.*]] = muli %[[step]], %[[blocks]]#1 : index + // CHECK: %[[stepXgdimx:.+]] = affine.apply #[[mul_map]]()[%[[blocks]]#1, %[[step]]] // // new_step = step * gridDim.x * blockDim.x - // CHECK: %[[new_step:.*]] = muli %[[stepXgdimx]], %[[threads]]#1 : index + // CHECK: %[[new_step:.+]] = affine.apply #[[mul_map]]()[%[[threads]]#1, %[[stepXgdimx]]] // // CHECK: scf.for %{{.*}} = %[[new_lb]] to %[[ub]] step %[[new_step]] { + scf.for %i = %lb to %ub step %step {} return } diff --git a/mlir/test/lib/Transforms/TestGpuMemoryPromotion.cpp b/mlir/test/lib/Transforms/TestGpuMemoryPromotion.cpp --- a/mlir/test/lib/Transforms/TestGpuMemoryPromotion.cpp +++ b/mlir/test/lib/Transforms/TestGpuMemoryPromotion.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/GPU/GPUDialect.h" #include "mlir/Dialect/GPU/MemoryPromotion.h" #include "mlir/Dialect/SCF/SCF.h" @@ -30,7 +31,7 @@ : public PassWrapper> { void getDependentDialects(DialectRegistry ®istry) const override { - registry.insert(); + registry.insert(); } void runOnOperation() override { diff --git a/mlir/test/lib/Transforms/TestLoopMapping.cpp b/mlir/test/lib/Transforms/TestLoopMapping.cpp --- a/mlir/test/lib/Transforms/TestLoopMapping.cpp +++ b/mlir/test/lib/Transforms/TestLoopMapping.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/IR/Builders.h" #include "mlir/Pass/Pass.h" @@ -27,6 +28,10 @@ public: explicit TestLoopMappingPass() {} + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + } + void runOnFunction() override { FuncOp func = getFunction();