diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h b/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgTraits.h @@ -351,10 +351,11 @@ class NamedStructuredOpTraits : public OpTrait::TraitBase { public: - llvm::Optional> referenceIterators(); - llvm::Optional> referenceIndexingMaps(); - std::function)> - emitScalarImplementation(); + static SmallVector referenceIterators(TypeRange inputTypes, + TypeRange outputTypes); + + static SmallVector referenceIndexingMaps(TypeRange inputTypes, + TypeRange outputTypes); }; } // namespace linalg diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -33,10 +33,9 @@ /// Forward declarations. template -static void buildNamedStructuredOpRegion(Builder &builder, - OperationState &result, - TypeRange operandTypes, - TypeRange tensorResultTypes); +static void buildNamedStructuredOpRegionAndAttributes( + Builder &builder, OperationState &result, TypeRange operandTypes, + TypeRange tensorResultTypes); template static void printNamedStructuredOp(OpAsmPrinter &p, NamedStructuredOpType op); template @@ -1085,9 +1084,10 @@ //===----------------------------------------------------------------------===// template -void buildNamedStructuredOpRegion(Builder &builder, OperationState &result, - TypeRange operandTypes, - TypeRange tensorResultTypes) { +void buildNamedStructuredOpRegionAndAttributes(Builder &builder, + OperationState &result, + TypeRange operandTypes, + TypeRange tensorResultTypes) { Region ®ion = *result.addRegion(); Block *body = new Block(); // TODO: atm all operands go through getElementTypeOrSelf, @@ -1102,12 +1102,24 @@ opBuilder.setInsertionPointToStart(®ion.front()); mlir::edsc::ScopedContext scope(opBuilder, builder.getUnknownLoc()); NamedStructuredOpType::regionBuilder(*body); + + auto indexingMaps = builder.getAffineMapArrayAttr( + NamedStructuredOpType::referenceIndexingMaps(operandTypes, + tensorResultTypes)); + result.addAttribute(getIndexingMapsAttrName(), indexingMaps); + + auto iterators = + builder.getStrArrayAttr(NamedStructuredOpType::referenceIterators( + operandTypes, tensorResultTypes)); + result.addAttribute(getIteratorTypesAttrName(), iterators); } template static void printNamedStructuredOp(OpAsmPrinter &p, NamedStructuredOpType op) { + SmallVector silentAttrNames{getIndexingMapsAttrName(), + getIteratorTypesAttrName()}; p << op.getOperationName() << ' '; - p.printOptionalAttrDict(op.getAttrs()); + p.printOptionalAttrDict(op.getAttrs(), silentAttrNames); p << ' ' << op.getOperands(); p << ": (" << op.getOperandTypes() << ")"; auto outputTensorTypes = op.getResultTypes(); @@ -1139,7 +1151,7 @@ if (!tensorResultTypes.empty()) result.addTypes(tensorResultTypes); - buildNamedStructuredOpRegion( + buildNamedStructuredOpRegionAndAttributes( parser.getBuilder(), result, operandTypes, tensorResultTypes); return parser.resolveOperands(operandsInfo, operandTypes, diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp @@ -95,7 +95,7 @@ Operation &terminator = block.back(); assert(isa(terminator) && - "expected an yield op in the end of the region"); + "expected a yield op in the end of the region"); for (unsigned i = 0, e = terminator.getNumOperands(); i < e; ++i) { std_store(map.lookupOrDefault(terminator.getOperand(i)), outputBuffers[i], ArrayRef{indexing[i].begin(), indexing[i].end()}); @@ -123,9 +123,36 @@ namespace { -// Generic loop emitter, to be specialized on an op-per op basis. -// TODO: Hook up to named ops interface and, later, retire when all named ops -// are auto-generated. +// Emits the MLIR for the scalar part of the generic op by: +// 1. Emitting std_load and std_store ops for each input and output +// view in order. This is achieved by applying the appropriate input or +// output map to the enclosing induction variables. +// 2. Emitting a call to `op.fun()` that takes as arguments the scalars +// from point 1. above. +// 3. Emitting std_store to store the results of 2. to the output +// views. +// +// An example output may resemble: +// +// ``` +// loop.for %i = %c0 to %0 step %c1 { +// loop.for %j = %c0 to %1 step %c1 { +// loop.for %k = %c0 to %4 step %c1 { +// %11 = load %arg0[%i, %j] : +// memref +// %12 = load %arg1[%i, %j, %k] : +// memref +// %13 = load %arg2[%i, %k, %j] : +// memref +// %14:2 = call @foo(%11, %12, %13) : (f32, f32, f32) -> (f32, f32) +// store %14#0, %arg1[%i, %j, %k] : +// memref +// store %14#1, %arg2[%i, %k, %j] : +// memref +// } +// } +// } +// ``` template class LinalgScopedEmitter { public: @@ -133,9 +160,41 @@ LinalgOpType linalgOp) { assert(linalgOp.hasBufferSemantics() && "expected linalg op with buffer semantics"); - llvm_unreachable("NYI"); - linalgOp.emitScalarImplementation()(ScopedContext::getBuilder(), - ScopedContext::getLocation(), allIvs); + auto b = ScopedContext::getBuilder(); + auto loc = ScopedContext::getLocation(); + unsigned nInputs = linalgOp.getNumInputs(); + unsigned nOutputs = linalgOp.getNumOutputs(); + SmallVector indexedValues(nInputs + nOutputs); + + // 1.a. Emit std_load from input views. + for (unsigned i = 0; i < nInputs; ++i) { + auto indexing = makeCanonicalAffineApplies( + b, loc, linalgOp.getInputIndexingMap(i), allIvs); + indexedValues[i] = std_load(linalgOp.getInput(i), indexing); + } + + // 1.b. Emit std_load from output views. + // TODO(mravishankar): Avoid the loads if the corresponding argument of the + // region has no uses. + for (unsigned i = 0; i < nOutputs; ++i) { + Value output = linalgOp.getOutputBuffer(i); + auto indexing = makeCanonicalAffineApplies( + b, loc, linalgOp.getOutputIndexingMap(i), allIvs); + indexedValues[nInputs + i] = std_load(output, indexing); + } + + // TODO(ntv): When a region inliner exists, use it. + // 2. Inline region, currently only works for a single basic block. + // 3. Emit std_store. + SmallVector, 8> indexing; + SmallVector outputBuffers; + for (unsigned i = 0; i < nOutputs; ++i) { + indexing.push_back(makeCanonicalAffineApplies( + b, loc, linalgOp.getOutputIndexingMap(i), allIvs)); + outputBuffers.push_back(linalgOp.getOutputBuffer(i)); + } + inlineRegionAndEmitStdStore(linalgOp, indexedValues, indexing, + outputBuffers); } }; @@ -344,81 +403,6 @@ } }; -// Emits the MLIR for the scalar part of the generic op by: -// 1. Emitting std_load and std_store ops for each input and output -// view in order. This is achieved by applying the appropriate input or -// output map to the enclosing induction variables. -// 2. Emitting a call to `op.fun()` that takes as arguments the scalars -// from point 1. above. -// 3. Emitting std_store to store the results of 2. to the output -// views. -// -// An example output may resemble: -// -// ``` -// loop.for %i = %c0 to %0 step %c1 { -// loop.for %j = %c0 to %1 step %c1 { -// loop.for %k = %c0 to %4 step %c1 { -// %11 = load %arg0[%i, %j] : -// memref -// %12 = load %arg1[%i, %j, %k] : -// memref -// %13 = load %arg2[%i, %k, %j] : -// memref -// %14:2 = call @foo(%11, %12, %13) : (f32, f32, f32) -> (f32, f32) -// store %14#0, %arg1[%i, %j, %k] : -// memref -// store %14#1, %arg2[%i, %k, %j] : -// memref -// } -// } -// } -// ``` -template -class LinalgScopedEmitter { -public: - static void emitScalarImplementation(ArrayRef allIvs, - GenericOp genericOp) { - assert(genericOp.hasBufferSemantics() && - "expected linalg op with buffer semantics"); - auto b = ScopedContext::getBuilder(); - auto loc = ScopedContext::getLocation(); - unsigned nInputs = genericOp.getNumInputs(); - unsigned nOutputs = genericOp.getNumOutputs(); - SmallVector indexedValues(nInputs + nOutputs); - - // 1.a. Emit std_load from input views. - for (unsigned i = 0; i < nInputs; ++i) { - auto indexing = makeCanonicalAffineApplies( - b, loc, genericOp.getInputIndexingMap(i), allIvs); - indexedValues[i] = std_load(genericOp.getInput(i), indexing); - } - - // 1.b. Emit std_load from output views. - // TODO(mravishankar): Avoid the loads if the corresponding argument of the - // region has no uses. - for (unsigned i = 0; i < nOutputs; ++i) { - Value output = genericOp.getOutputBuffer(i); - auto indexing = makeCanonicalAffineApplies( - b, loc, genericOp.getOutputIndexingMap(i), allIvs); - indexedValues[nInputs + i] = std_load(output, indexing); - } - - // TODO(ntv): When a region inliner exists, use it. - // 2. Inline region, currently only works for a single basic block. - // 3. Emit std_store. - SmallVector, 8> indexing; - SmallVector outputBuffers; - for (unsigned i = 0; i < nOutputs; ++i) { - indexing.push_back(makeCanonicalAffineApplies( - b, loc, genericOp.getOutputIndexingMap(i), allIvs)); - outputBuffers.push_back(genericOp.getOutputBuffer(i)); - } - inlineRegionAndEmitStdStore(genericOp, indexedValues, indexing, - outputBuffers); - } -}; - // Emits the MLIR for the scalar part of the indexed generic op by: // 1. Emitting std_load and std_store ops for each input and output view in // order. This is achieved by applying the appropriate input or output map diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir --- a/mlir/test/Dialect/Linalg/loops.mlir +++ b/mlir/test/Dialect/Linalg/loops.mlir @@ -1,870 +1,64 @@ -// RUN: mlir-opt %s -convert-linalg-to-loops | FileCheck --check-prefix=CHECKLOOP %s -// RUN: mlir-opt %s -convert-linalg-to-parallel-loops | FileCheck --check-prefix=CHECKPARALLEL %s - -// Test that we can lower all the way to LLVM without crashing, don't check results here. -// RUN: mlir-opt %s --convert-linalg-to-llvm -o=/dev/null 2>&1 - -// CHECKLOOP-DAG: #[[strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> -// CHECKLOOP-DAG: #[[strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> -// CHECKLOOP-DAG: #[[strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)> -// CHECKLOOP-DAG: #[[strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)> -// CHECKLOOP-DAG: #[[clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)> - -// CHECKLOOP-DAG: #[[Stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)> -// CHECKLOOP-DAG: #[[Stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> -// CHECKLOOP-DAG: #[[Stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)> -// CHECKLOOP-DAG: #[[Stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)> - -// CHECKPARALLEL-DAG: #[[strided1D:.*]] = affine_map<(d0)[s0] -> (d0 + s0)> -// CHECKPARALLEL-DAG: #[[strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> -// CHECKPARALLEL-DAG: #[[strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)> -// CHECKPARALLEL-DAG: #[[strided4D:.*]] = affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3 + d3)> -// CHECKPARALLEL-DAG: #[[clampMinMap:.*]] = affine_map<(d0) -> (d0, 0)> - -// CHECKPARALLEL-DAG: #[[Stride1Dilation1:.*]] = affine_map<(d0, d1) -> (d0 + d1)> -// CHECKPARALLEL-DAG: #[[Stride2Dilation1:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1)> -// CHECKPARALLEL-DAG: #[[Stride2Dilation4:.*]] = affine_map<(d0, d1) -> (d0 * 2 + d1 * 4)> -// CHECKPARALLEL-DAG: #[[Stride3Dilation5:.*]] = affine_map<(d0, d1) -> (d0 * 3 + d1 * 5)> - - -func @matmul(%arg0: memref, %M: index, %N: index, %K: index) { - %c0 = constant 0 : index - %c1 = constant 1 : index - %A = view %arg0[%c0][%M, %K] : memref to memref - %B = view %arg0[%c0][%K, %N] : memref to memref - %C = view %arg0[%c0][%M, %N] : memref to memref - linalg.matmul(%A, %B, %C) : memref, memref, memref - return -} -// CHECKLOOP-LABEL: func @matmul(%{{.*}}: memref, -// CHECKLOOP-SAME: [[M:arg[0-9]+]]: index -// CHECKLOOP-SAME: [[N:arg[0-9]+]]: index -// CHECKLOOP-SAME: [[K:arg[0-9]+]]: index -// CHECKLOOP: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECKLOOP: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECKLOOP: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[N]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKLOOP-DAG: %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref -// CHECKLOOP-DAG: %[[b:.*]] = load %[[B]][%{{.*}}, %{{.*}}] : memref -// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECKLOOP-DAG: %[[c:.*]] = load %[[C]][%{{.*}}, %{{.*}}] : memref -// CHECKLOOP-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECKLOOP: store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref - -// CHECKPARALLEL-LABEL: func @matmul(%{{.*}}: memref, -// CHECKPARALLEL-SAME: [[M:arg[0-9]+]]: index -// CHECKPARALLEL-SAME: [[N:arg[0-9]+]]: index -// CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index -// CHECKPARALLEL: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECKPARALLEL: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECKPARALLEL: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[M]], %[[N]]) step (%{{.*}}, %{{.*}} { -// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKPARALLEL-DAG: %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL-DAG: %[[b:.*]] = load %[[B]][%{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECKPARALLEL-DAG: %[[c:.*]] = load %[[C]][%{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECKPARALLEL: store %[[res]], %[[C]][%{{.*}}, %{{.*}}] : memref - - - -func @matvec(%arg0: memref, %M: index, %N: index) { - %c0 = constant 0 : index - %c1 = constant 1 : index - %2 = view %arg0[%c0][%M, %N] : memref to memref - %3 = view %arg0[%c0][%M] : memref to memref - %4 = view %arg0[%c0][%N] : memref to memref - linalg.matvec(%2, %3, %4) : memref, memref, memref - return -} -// CHECKLOOP-LABEL: func @matvec(%{{.*}}: memref, -// CHECKLOOP-SAME: [[M:arg[0-9]+]]: index -// CHECKLOOP-SAME: [[K:arg[0-9]+]]: index -// CHECKLOOP: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECKLOOP: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECKLOOP: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[M]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKLOOP-DAG: %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref -// CHECKLOOP-DAG: %[[b:.*]] = load %[[B]][%{{.*}}] : memref -// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECKLOOP-DAG: %[[c:.*]] = load %[[C]][%{{.*}}] : memref -// CHECKLOOP-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECKLOOP: store %[[res]], %[[C]][%{{.*}}] : memref - -// CHECKPARALLEL-LABEL: func @matvec(%{{.*}}: memref, -// CHECKPARALLEL-SAME: [[M:arg[0-9]+]]: index -// CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index -// CHECKPARALLEL: %[[A:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECKPARALLEL: %[[B:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECKPARALLEL: %[[C:.*]] = std.view %{{.*}}[{{.*}}] : memref to memref -// CHECKPARALLEL: loop.parallel (%{{.*}}) = (%{{.*}}) to (%[[M]]) step (%{{.*}}) { -// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKPARALLEL-DAG: %[[a:.*]] = load %[[A]][%{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL-DAG: %[[b:.*]] = load %[[B]][%{{.*}}] : memref -// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECKPARALLEL-DAG: %[[c:.*]] = load %[[C]][%{{.*}}] : memref -// CHECKPARALLEL-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECKPARALLEL: store %[[res]], %[[C]][%{{.*}}] : memref - - -func @dot(%arg0: memref, %M: index) { - %c0 = constant 0 : index - %c1 = constant 1 : index - %1 = view %arg0[%c0][%M] : memref to memref - %2 = view %arg0[%c0][%M] : memref to memref - %3 = view %arg0[][] : memref to memref - linalg.dot(%1, %2, %3) : memref, memref, memref - return -} -// CHECKLOOP-LABEL: func @dot(%{{.*}}: memref, -// CHECKLOOP-SAME: [[K:arg[0-9]+]]: index -// CHECKLOOP: %[[A:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref -// CHECKLOOP: %[[B:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref -// CHECKLOOP: %[[C:.*]] = std.view %{{.*}}[][] : memref to memref -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKLOOP-DAG: %[[a:.*]] = load %[[A]][%{{.*}}] : memref -// CHECKLOOP-DAG: %[[b:.*]] = load %[[B]][%{{.*}}] : memref -// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECKLOOP-DAG: %[[c:.*]] = load %[[C]][] : memref -// CHECKLOOP-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECKLOOP: store %[[res]], %[[C]][] : memref - -// CHECKPARALLEL-LABEL: func @dot(%{{.*}}: memref, -// CHECKPARALLEL-SAME: [[K:arg[0-9]+]]: index -// CHECKPARALLEL: %[[A:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref -// CHECKPARALLEL: %[[B:.*]] = std.view %{{.*}}[{{.*}}][{{.*}}] : memref to memref -// CHECKPARALLEL: %[[C:.*]] = std.view %{{.*}}[][] : memref to memref -// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKPARALLEL-DAG: %[[a:.*]] = load %[[A]][%{{.*}}] : memref -// CHECKPARALLEL-DAG: %[[b:.*]] = load %[[B]][%{{.*}}] : memref -// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECKPARALLEL-DAG: %[[c:.*]] = load %[[C]][] : memref -// CHECKPARALLEL-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECKPARALLEL: store %[[res]], %[[C]][] : memref - - -func @dot_view(%arg0: memref, %arg1: memref, %arg2: memref) { - linalg.dot(%arg0, %arg1, %arg2) : memref, memref, memref - return -} -// CHECKLOOP-LABEL: func @dot_view( -// CHECKLOOP: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { -// CHECKLOOP: %[[K:.*]] = dim %arg0, 0 : memref -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKLOOP-DAG: %[[a:.*]] = load %arg0[%{{.*}}] : memref -// CHECKLOOP-DAG: %[[b:.*]] = load %{{.*}}[%{{.*}}] : memref -// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECKLOOP-DAG: %[[c:.*]] = load %{{.*}}[] : memref -// CHECKLOOP-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECKLOOP: store %[[res]], %{{.*}}[] : memref - -// CHECKPARALLEL-LABEL: func @dot_view( -// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { -// CHECKPARALLEL: %[[K:.*]] = dim %arg0, 0 : memref -// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKPARALLEL-DAG: %[[a:.*]] = load %arg0[%{{.*}}] : memref -// CHECKPARALLEL-DAG: %[[b:.*]] = load %{{.*}}[%{{.*}}] : memref -// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECKPARALLEL-DAG: %[[c:.*]] = load %{{.*}}[] : memref -// CHECKPARALLEL-DAG: %[[res:.*]] = addf %[[c]], %[[inc]] : f32 -// CHECKPARALLEL: store %[[res]], %{{.*}}[] : memref - -func @fill_view(%arg0: memref, %arg1: f32) { - linalg.fill(%arg0, %arg1) : memref, f32 - return -} -// CHECKLOOP-LABEL: func @fill_view( -// CHECKLOOP: %{{.*}}: memref, %{{.*}}: f32) { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}] : memref - -// CHECKPARALLEL-LABEL: func @fill_view( -// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: f32) { -// CHECKPARALLEL: loop.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) { -// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}] : memref - -func @fill_view0(%arg0: memref, %arg1: f32) { - linalg.fill(%arg0, %arg1) : memref, f32 - return -} -// CHECKLOOP-LABEL: func @fill_view0(%{{.*}}: memref, %{{.*}}: f32) { -// CHECKLOOP: store %{{.*}}, %{{.*}}[] : memref - -// CHECKPARALLEL-LABEL: func @fill_view0(%{{.*}}: memref, %{{.*}}: f32) { -// CHECKPARALLEL: store %{{.*}}, %{{.*}}[] : memref - -func @fill_view3(%arg0: memref, %arg1: f32) { - linalg.fill(%arg0, %arg1) : memref, f32 - return -} -// CHECKLOOP-LABEL: func @fill_view3( -// CHECKLOOP: %{{.*}}: memref, %{{.*}}: f32) { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref - -// CHECKPARALLEL-LABEL: func @fill_view3( -// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: f32) { -// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) { -// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref - -func @copy_view(%arg0: memref, %arg1: memref) { - linalg.copy(%arg0, %arg1) : memref, memref - return -} -// CHECKLOOP-LABEL: func @copy_view( -// CHECKLOOP: %{{.*}}: memref, %{{.*}}: memref) { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECKLOOP: %[[L:.*]] = load %{{.*}}[%{{.*}}] : memref -// CHECKLOOP: store %[[L]], %{{.*}}[%{{.*}}] : memref - -// CHECKPARALLEL-LABEL: func @copy_view( -// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: memref) { -// CHECKPARALLEL: loop.parallel (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) { -// CHECKPARALLEL: %[[L:.*]] = load %{{.*}}[%{{.*}}] : memref -// CHECKPARALLEL: store %[[L]], %{{.*}}[%{{.*}}] : memref - -func @copy_view0(%arg0: memref, %arg1: memref) { - linalg.copy(%arg0, %arg1) : memref, memref - return -} -// CHECKLOOP-LABEL: func @copy_view0(%{{.*}}: memref, %{{.*}}: memref) { -// CHECKLOOP: %{{.*}} = load %{{.*}}[] : memref -// CHECKLOOP: store %{{.*}}, %{{.*}}[] : memref - -// CHECKPARALLEL-LABEL: func @copy_view0(%{{.*}}: memref, %{{.*}}: memref) { -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[] : memref -// CHECKPARALLEL: store %{{.*}}, %{{.*}}[] : memref - -func @copy_view3(%arg0: memref, %arg1: memref) { - linalg.copy(%arg0, %arg1) {inputPermutation = affine_map<(i, j, k) -> (i, k, j)>, - outputPermutation = affine_map<(i, j, k) -> (k, j, i)>} : - memref, memref - return -} -// CHECKLOOP-LABEL: func @copy_view3 -// CHECKLOOP: (%{{.*}}: memref, %{{.*}}: memref) { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { -// CHECKLOOP: %[[L:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKLOOP: store %[[L]], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref - -// CHECKPARALLEL-LABEL: func @copy_view3 -// CHECKPARALLEL: (%{{.*}}: memref, %{{.*}}: memref) { -// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}, %{{.*}}) { -// CHECKPARALLEL: %[[L:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: store %[[L]], %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref - -func @conv_view3(%arg0: memref, %arg1: memref, %arg2: memref) { - linalg.conv(%arg0, %arg1, %arg2) {strides = [2]}: memref, memref, memref - return -} -// CHECKLOOP-LABEL: func @conv_view3( -// CHECKLOOP: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { -// CHECKLOOP: %[[Z0:.*]] = dim %arg0, 0 : memref -// CHECKLOOP: %[[Q:.*]] = dim %arg0, 1 : memref -// CHECKLOOP: %[[K:.*]] = dim %arg0, 2 : memref -// CHECKLOOP: %[[B:.*]] = dim %arg1, 0 : memref -// CHECKLOOP: %[[X0:.*]] = dim %arg2, 1 : memref -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECKLOOP: %[[SUM:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref - -// CHECKPARALLEL-LABEL: func @conv_view3( -// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { -// CHECKPARALLEL: %[[Z0:.*]] = dim %arg0, 0 : memref -// CHECKPARALLEL: %[[Q:.*]] = dim %arg0, 1 : memref -// CHECKPARALLEL: %[[K:.*]] = dim %arg0, 2 : memref -// CHECKPARALLEL: %[[B:.*]] = dim %arg1, 0 : memref -// CHECKPARALLEL: %[[X0:.*]] = dim %arg2, 1 : memref -// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}) { -// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECKPARALLEL: %[[SUM:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM]], %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] : memref - -func @conv_view4(%arg0: memref, %arg1: memref, %arg2: memref) { - linalg.conv(%arg0, %arg1, %arg2) {dilations = [4, 5], strides = [2, 3]} : memref, memref, memref - return -} -// CHECKLOOP-LABEL: func @conv_view4( -// CHECKLOOP: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { -// CHECKLOOP: %[[Z0:.*]] = dim %arg0, 0 : memref -// CHECKLOOP: %[[Z1:.*]] = dim %arg0, 1 : memref -// CHECKLOOP: %[[Q:.*]] = dim %arg0, 2 : memref -// CHECKLOOP: %[[K:.*]] = dim %arg0, 3 : memref -// CHECKLOOP: %[[B:.*]] = dim %arg1, 0 : memref -// CHECKLOOP: %[[X0:.*]] = dim %arg2, 1 : memref -// CHECKLOOP: %[[X1:.*]] = dim %arg2, 2 : memref -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { -// CHECKLOOP: %[[SUM0:.*]] = affine.apply #[[Stride2Dilation4]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[SUM1:.*]] = affine.apply #[[Stride3Dilation5]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref - -// CHECKPARALLEL-LABEL: func @conv_view4( -// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { -// CHECKPARALLEL: %[[Z0:.*]] = dim %arg0, 0 : memref -// CHECKPARALLEL: %[[Z1:.*]] = dim %arg0, 1 : memref -// CHECKPARALLEL: %[[Q:.*]] = dim %arg0, 2 : memref -// CHECKPARALLEL: %[[K:.*]] = dim %arg0, 3 : memref -// CHECKPARALLEL: %[[B:.*]] = dim %arg1, 0 : memref -// CHECKPARALLEL: %[[X0:.*]] = dim %arg2, 1 : memref -// CHECKPARALLEL: %[[X1:.*]] = dim %arg2, 2 : memref -// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) { -// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { -// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #[[Stride2Dilation4]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #[[Stride3Dilation5]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[SUM0]], %[[SUM1]], %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref - - -func @conv_padding(%arg0: memref, - %arg1: memref, - %arg2: memref) { - linalg.conv(%arg0, %arg1, %arg2) {dilations = [1, 1], - padding = dense<[[0, 1], [1, 1]]> : tensor<2x2xi64>, - strides = [1, 1]} : - memref, memref, memref - return -} -// CHECKLOOP-LABEL: func @conv_padding -// CHECKLOOP: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { -// CHECKLOOP: %[[ZERO:.*]] = constant 0.000000e+00 : f32 -// CHECKLOOP: %[[Z0:.*]] = dim %arg0, 0 : memref -// CHECKLOOP: %[[Z1:.*]] = dim %arg0, 1 : memref -// CHECKLOOP: %[[Q:.*]] = dim %arg0, 2 : memref -// CHECKLOOP: %[[K:.*]] = dim %arg0, 3 : memref -// CHECKLOOP: %[[B:.*]] = dim %arg1, 0 : memref -// CHECKLOOP: %[[X0:.*]] = dim %arg2, 1 : memref -// CHECKLOOP: %[[X1:.*]] = dim %arg2, 2 : memref -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[B]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[X0]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[X1]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[K]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { -// CHECKLOOP: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[IDX:.*]] = affine.max #[[clampMinMap]](%[[SUM0]]) -// CHECKLOOP: %[[IDY:.*]] = affine.max #[[clampMinMap]](%[[SUM1]]) -// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32 -// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECKLOOP: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref - -// CHECKPARALLEL-LABEL: func @conv_padding -// CHECKPARALLEL: %{{.*}}: memref, %{{.*}}: memref, %{{.*}}: memref) { -// CHECKPARALLEL: %[[ZERO:.*]] = constant 0.000000e+00 : f32 -// CHECKPARALLEL: %[[Z0:.*]] = dim %arg0, 0 : memref -// CHECKPARALLEL: %[[Z1:.*]] = dim %arg0, 1 : memref -// CHECKPARALLEL: %[[Q:.*]] = dim %arg0, 2 : memref -// CHECKPARALLEL: %[[K:.*]] = dim %arg0, 3 : memref -// CHECKPARALLEL: %[[B:.*]] = dim %arg1, 0 : memref -// CHECKPARALLEL: %[[X0:.*]] = dim %arg2, 1 : memref -// CHECKPARALLEL: %[[X1:.*]] = dim %arg2, 2 : memref -// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) to (%[[B]], %[[X0]], %[[X1]], %[[K]]) step (%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) { -// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Q]] step %{{.*}} { -// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Z0]] step %{{.*}} { -// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[Z1]] step %{{.*}} { -// CHECKPARALLEL: %[[SUM0:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[SUM1:.*]] = affine.apply #{{.*}}(%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[IDX:.*]] = affine.max #[[clampMinMap]](%[[SUM0]]) -// CHECKPARALLEL: %[[IDY:.*]] = affine.max #[[clampMinMap]](%[[SUM1]]) -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %[[IDX]], %[[IDY]], %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = select %{{.*}}, %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = mulf %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = addf %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] : memref - -func @pooling_max(%arg0: memref, - %arg1: memref, - %arg2: memref) { - linalg.pooling_max(%arg0, %arg1, %arg2) { strides = [2, 1] }: - memref, memref, memref - return -} -// CHECKLOOP-LABEL: func @pooling_max -// CHECKLOOP: %[[WX:.*]] = dim %arg1, 0 : memref -// CHECKLOOP: %[[WY:.*]] = dim %arg1, 1 : memref -// CHECKLOOP: %[[OX:.*]] = dim %arg2, 0 : memref -// CHECKLOOP: %[[OY:.*]] = dim %arg2, 1 : memref -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKLOOP: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = load %{{.*}}[%[[IX]], %[[IY]]] : memref -// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 -// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref - -// CHECKPARALLEL-LABEL: func @pooling_max -// CHECKPARALLEL: %[[WX:.*]] = dim %arg1, 0 : memref -// CHECKPARALLEL: %[[WY:.*]] = dim %arg1, 1 : memref -// CHECKPARALLEL: %[[OX:.*]] = dim %arg2, 0 : memref -// CHECKPARALLEL: %[[OY:.*]] = dim %arg2, 1 : memref -// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { -// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%[[IX]], %[[IY]]] : memref -// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref - -func @pooling_min(%arg0: memref, - %arg1: memref, - %arg2: memref) { - linalg.pooling_min(%arg0, %arg1, %arg2) { strides = [2, 1] }: - memref, memref, memref - return -} -// CHECKLOOP-LABEL: func @pooling_min -// CHECKLOOP: %[[WX:.*]] = dim %arg1, 0 : memref -// CHECKLOOP: %[[WY:.*]] = dim %arg1, 1 : memref -// CHECKLOOP: %[[OX:.*]] = dim %arg2, 0 : memref -// CHECKLOOP: %[[OY:.*]] = dim %arg2, 1 : memref -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKLOOP: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %{{.*}} = load %{{.*}}[%[[IX]], %[[IY]]] : memref -// CHECKLOOP: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 -// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref - -// CHECKPARALLEL-LABEL: func @pooling_min -// CHECKPARALLEL: %[[WX:.*]] = dim %arg1, 0 : memref -// CHECKPARALLEL: %[[WY:.*]] = dim %arg1, 1 : memref -// CHECKPARALLEL: %[[OX:.*]] = dim %arg2, 0 : memref -// CHECKPARALLEL: %[[OY:.*]] = dim %arg2, 1 : memref -// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { -// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %{{.*}} = load %{{.*}}[%[[IX]], %[[IY]]] : memref -// CHECKPARALLEL: %[[RES:.*]] = select %{{.*}}, %{{.*}}, %{{.*}} : f32 -// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref - -func @pooling_sum(%arg0: memref, - %arg1: memref, - %arg2: memref) { - linalg.pooling_sum(%arg0, %arg1, %arg2) { strides = [2, 1] }: - memref, memref, memref - return -} -// CHECKLOOP-LABEL: func @pooling_sum -// CHECKLOOP: %[[WX:.*]] = dim %arg1, 0 : memref -// CHECKLOOP: %[[WY:.*]] = dim %arg1, 1 : memref -// CHECKLOOP: %[[OX:.*]] = dim %arg2, 0 : memref -// CHECKLOOP: %[[OY:.*]] = dim %arg2, 1 : memref -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[OX]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[OY]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKLOOP: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKLOOP: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) -// CHECKLOOP: %[[RHS:.*]] = load %{{.*}}[%[[IX]], %[[IY]]] : memref -// CHECKLOOP: %[[LHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECKLOOP: %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32 -// CHECKLOOP: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref - -// CHECKPARALLEL-LABEL: func @pooling_sum -// CHECKPARALLEL: %[[WX:.*]] = dim %arg1, 0 : memref -// CHECKPARALLEL: %[[WY:.*]] = dim %arg1, 1 : memref -// CHECKPARALLEL: %[[OX:.*]] = dim %arg2, 0 : memref -// CHECKPARALLEL: %[[OY:.*]] = dim %arg2, 1 : memref -// CHECKPARALLEL: loop.parallel (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%[[OX]], %[[OY]]) step (%{{.*}}, %{{.*}}) { -// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[WX]] step %{{.*}} { -// CHECKPARALLEL: loop.for %{{.*}} = %{{.*}} to %[[WY]] step %{{.*}} { -// CHECKPARALLEL: %[[IX:.*]] = affine.apply #[[Stride2Dilation1]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[IY:.*]] = affine.apply #[[Stride1Dilation1]](%{{.*}}, %{{.*}}) -// CHECKPARALLEL: %[[RHS:.*]] = load %{{.*}}[%[[IX]], %[[IY]]] : memref -// CHECKPARALLEL: %[[LHS:.*]] = load %{{.*}}[%{{.*}}, %{{.*}}] : memref -// CHECKPARALLEL: %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32 -// CHECKPARALLEL: store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref - -#accesses = [ - affine_map<(i, j, k) -> (i, j)>, - affine_map<(i, j, k) -> (i, j, k)>, - affine_map<(i, j, k) -> (i, k, j)> -] -#trait2 = { - args_in = 1, - args_out = 2, - iterator_types = ["parallel", "parallel", "parallel"], - indexing_maps = #accesses, - library_call = "some_external_function_name_2", - doc = "B(i,j,k), C(i,k,j) = foo(A(i, j), B(i,j,k), C(i,k,j))" -} -func @generic_region(%arg0: memref, %arg1: memref, %arg2: memref) { - linalg.generic #trait2 %arg0, %arg1, %arg2 { - ^bb0(%a: f32, %b: f32, %c: f32): - %d = mulf %a, %b : f32 - %e = addf %c, %d : f32 - linalg.yield %d, %e : f32, f32 - }: memref, memref, memref - return -} -// CHECKLOOP-LABEL: @generic_region -// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} -// CHECKLOOP: loop.for %[[j:.*]] = {{.*}} -// CHECKLOOP: loop.for %[[k:.*]] = {{.*}} -// CHECKLOOP: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref -// CHECKLOOP: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref -// CHECKLOOP: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref -// CHECKLOOP: %[[d:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECKLOOP: %[[e:.*]] = addf %[[c]], %[[d]] : f32 -// CHECKLOOP: store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref -// CHECKLOOP: store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref - -// CHECKPARALLEL-LABEL: @generic_region -// CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]]) -// CHECKPARALLEL: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref -// CHECKPARALLEL: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref -// CHECKPARALLEL: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref -// CHECKPARALLEL: %[[d:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECKPARALLEL: %[[e:.*]] = addf %[[c]], %[[d]] : f32 -// CHECKPARALLEL: store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref -// CHECKPARALLEL: store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref - -#trait4 = { - args_in = 1, - args_out = 2, - iterator_types = ["parallel", "parallel", "parallel"], - indexing_maps = #accesses, - library_call = "some_external_function_name_2", - doc = "B(i,j,k), C(i,k,j) = foo(A(i, j) * B(i,j,k), i * j * k + C(i,k,j))" -} -func @indexed_generic_region( - %arg0: memref, - %arg1: memref, - %arg2: memref) { - linalg.indexed_generic #trait4 %arg0, %arg1, %arg2 { - ^bb0(%i: index, %j: index, %k: index, %a: f32, %b: f32, %c: f32): - %result_1 = mulf %a, %b : f32 - - %ij = addi %i, %j : index - %ijk = addi %ij, %k : index - %ijk_int = index_cast %ijk : index to i32 - %ijk_float = sitofp %ijk_int : i32 to f32 - - %result_2 = addf %c, %ijk_float : f32 - linalg.yield %result_1, %result_2 : f32, f32 - }: memref, - memref, - memref - return -} - -// CHECKLOOP-LABEL: @indexed_generic_region -// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} -// CHECKLOOP: loop.for %[[j:.*]] = {{.*}} -// CHECKLOOP: loop.for %[[k:.*]] = {{.*}} -// CHECKLOOP: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] -// CHECKLOOP: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] -// CHECKLOOP: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] -// CHECKLOOP: %[[result_1:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECKLOOP: %[[ij:.*]] = addi %[[i]], %[[j]] : index -// CHECKLOOP: %[[ijk:.*]] = addi %[[ij]], %[[k]] : index -// CHECKLOOP: %[[ijk_int:.*]] = index_cast %[[ijk]] : index to i32 -// CHECKLOOP: %[[ijk_float:.*]] = sitofp %[[ijk_int]] : i32 to f32 -// CHECKLOOP: %[[result_2:.*]] = addf %[[c]], %[[ijk_float]] : f32 -// CHECKLOOP: store %[[result_1]], %{{.*}}[%[[i]], %[[j]], %[[k]]] -// CHECKLOOP: store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]] - -// CHECKPARALLEL-LABEL: @indexed_generic_region -// CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]]) -// CHECKPARALLEL: %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] -// CHECKPARALLEL: %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] -// CHECKPARALLEL: %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] -// CHECKPARALLEL: %[[result_1:.*]] = mulf %[[a]], %[[b]] : f32 -// CHECKPARALLEL: %[[ij:.*]] = addi %[[i]], %[[j]] : index -// CHECKPARALLEL: %[[ijk:.*]] = addi %[[ij]], %[[k]] : index -// CHECKPARALLEL: %[[ijk_int:.*]] = index_cast %[[ijk]] : index to i32 -// CHECKPARALLEL: %[[ijk_float:.*]] = sitofp %[[ijk_int]] : i32 to f32 -// CHECKPARALLEL: %[[result_2:.*]] = addf %[[c]], %[[ijk_float]] : f32 -// CHECKPARALLEL: store %[[result_1]], %{{.*}}[%[[i]], %[[j]], %[[k]]] -// CHECKPARALLEL: store %[[result_2]], %{{.*}}[%[[i]], %[[k]], %[[j]]] - -// ----- - -#broadcast_access = [ - affine_map<(i, j) -> ()>, - affine_map<(i, j) -> (i, j)> -] - -#trait_broadcast = { - args_in = 1, - args_out = 1, - indexing_maps = #broadcast_access, - iterator_types = ["parallel", "parallel"], - library_call = "some_broadcast_external_fn" -} - -func @generic_op_zero_rank(%arg0: memref, %arg1: memref<3x4xf32>) -{ - linalg.generic #trait_broadcast %arg0, %arg1 { - ^bb(%a: f32, %b: f32) : - linalg.yield %a : f32 - } : memref, memref<3x4xf32> - return -} - -// CHECKLOOP-LABEL: @generic_op_zero_rank -// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32> -// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} -// CHECKLOOP: loop.for %[[j:.*]] = {{.*}} -// CHECKLOOP: %[[a:.*]] = load %[[ARG0]][] -// CHECKLOOP: store %[[a]], %[[ARG1]][%[[i]], %[[j]]] - -// CHECKPARALLEL-LABEL: @generic_op_zero_rank -// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xf32> -// CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]]) -// CHECKPARALLEL: %[[a:.*]] = load %[[ARG0]][] -// CHECKPARALLEL: store %[[a]], %[[ARG1]][%[[i]], %[[j]]] - -func @indexed_generic_op_zero_rank(%arg0: memref, %arg1: memref<3x4xi32>) -{ - linalg.indexed_generic #trait_broadcast %arg0, %arg1 { - ^bb(%i: index, %j: index, %a: i32, %b: i32) : - %ij = addi %i, %j : index - %ij_int = index_cast %ij : index to i32 - %result = addi %a, %ij_int : i32 - linalg.yield %result : i32 - } : memref, memref<3x4xi32> - return -} - -// CHECKLOOP-LABEL: @indexed_generic_op_zero_rank -// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32> -// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} -// CHECKLOOP: loop.for %[[j:.*]] = {{.*}} -// CHECKLOOP: %[[a:.*]] = load %[[ARG0]][ -// CHECKLOOP: %[[ij:.*]] = addi %[[i]], %[[j]] : index -// CHECKLOOP: %[[ij_int:.*]] = index_cast %[[ij]] : index to i32 -// CHECKLOOP: %[[result:.*]] = addi %[[a]], %[[ij_int]] : i32 -// CHECKLOOP: store %[[result]], %[[ARG1]][%[[i]], %[[j]]] - -// CHECKPARALLEL-LABEL: @indexed_generic_op_zero_rank -// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref<3x4xi32> -// CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]]) -// CHECKPARALLEL: %[[a:.*]] = load %[[ARG0]][ -// CHECKPARALLEL: %[[ij:.*]] = addi %[[i]], %[[j]] : index -// CHECKPARALLEL: %[[ij_int:.*]] = index_cast %[[ij]] : index to i32 -// CHECKPARALLEL: %[[result:.*]] = addi %[[a]], %[[ij_int]] : i32 -// CHECKPARALLEL: store %[[result]], %[[ARG1]][%[[i]], %[[j]]] - -#reduce_1D_access = [ - affine_map<(i) -> (i)>, - affine_map<(i) -> ()> -] - -#trait_reduce_1D = { - args_in = 1, - args_out = 1, - indexing_maps = #reduce_1D_access, - iterator_types = ["reduction"], - library_call = "some_reduce_external_fn" -} - -func @generic_op_1D_reduce(%arg0: memref, %arg1: memref) -{ - linalg.generic #trait_reduce_1D %arg0, %arg1 { - ^bb(%a: f32, %b: f32) : - %0 = addf %a, %b : f32 - linalg.yield %0 : f32 - } : memref, memref - return -} -// CHECKLOOP-LABEL: @generic_op_1D_reduce -// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} -// CHECKLOOP: %[[a:.*]] = load %[[ARG0]][%[[i]]] -// CHECKLOOP: %[[b:.*]] = load %[[ARG1]][] -// CHECKLOOP: %[[c:.*]] = addf %[[a]], %[[b]] : f32 -// CHECKLOOP: store %[[c]], %[[ARG1]][] - -// CHECKPARALLEL-LABEL: @generic_op_1D_reduce -// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECKPARALLEL: loop.for %[[i:.*]] = {{.*}} -// CHECKPARALLEL: %[[a:.*]] = load %[[ARG0]][%[[i]]] -// CHECKPARALLEL: %[[b:.*]] = load %[[ARG1]][] -// CHECKPARALLEL: %[[c:.*]] = addf %[[a]], %[[b]] : f32 -// CHECKPARALLEL: store %[[c]], %[[ARG1]][] - - -#reduce_init_1D_access = [ - affine_map<(i) -> (i)>, - affine_map<(i) -> ()>, - affine_map<(i) -> ()> -] - -#trait_reduce_init_1D = { - args_in = 2, - args_out = 1, - indexing_maps = #reduce_init_1D_access, - iterator_types = ["reduction"], - library_call = "some_reduce_external_fn" -} - -func @indexed_generic_op_1D_reduce(%arg0: memref, - %arg1: memref, - %arg2: memref) -{ - linalg.indexed_generic #trait_reduce_init_1D %arg0, %arg1, %arg2 { - ^bb(%i : index, %a: f32, %b: f32, %c: f32) : - %0 = constant 0 : index - %1 = cmpi "eq", %0, %i : index - %2 = select %1, %b, %c : f32 - %3 = addf %a, %2 : f32 - linalg.yield %3 : f32 - } : memref, memref, memref - return -} -// CHECKLOOP-LABEL: @indexed_generic_op_1D_reduce -// CHECKLOOP-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECKLOOP-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECKLOOP-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} -// CHECKLOOP: %[[a:.*]] = load %[[ARG0]][%[[i]]] -// CHECKLOOP: %[[b:.*]] = load %[[ARG1]][] -// CHECKLOOP: %[[c:.*]] = load %[[ARG2]][] -// CHECKLOOP: %[[d:.*]] = select %{{.*}}, %[[b]], %[[c]] -// CHECKLOOP: %[[e:.*]] = addf %[[a]], %[[d]] -// CHECKLOOP: store %[[e]], %[[ARG2]][] - -// CHECKPARALLEL-LABEL: @indexed_generic_op_1D_reduce -// CHECKPARALLEL-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref -// CHECKPARALLEL-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref -// CHECKPARALLEL-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECKPARALLEL: loop.for %[[i:.*]] = {{.*}} -// CHECKPARALLEL: %[[a:.*]] = load %[[ARG0]][%[[i]]] -// CHECKPARALLEL: %[[b:.*]] = load %[[ARG1]][] -// CHECKPARALLEL: %[[c:.*]] = load %[[ARG2]][] -// CHECKPARALLEL: %[[d:.*]] = select %{{.*}}, %[[b]], %[[c]] -// CHECKPARALLEL: %[[e:.*]] = addf %[[a]], %[[d]] -// CHECKPARALLEL: store %[[e]], %[[ARG2]][] - -#trait_const_fill = { - args_in = 0, - args_out = 1, - indexing_maps = [affine_map<(i) -> (i)>], - iterator_types = ["parallel"], - library_call = "some_external_fn" -} -func @generic_const_init(%arg0: memref) { - %cst = constant 1.0 : f32 - linalg.generic #trait_const_fill %arg0 { - ^bb0(%arg1: f32): // no predecessors - linalg.yield %cst : f32 - }: memref - return -} -// CHECKLOOP-LABEL: @generic_const_init -// CHECKLOOP-SAME: %[[ARG0:.*]]: memref -// CHECKLOOP: %[[CONST:.*]] = constant 1.000000e+00 : f32 -// CHECKLOOP: loop.for %[[i:.*]] = {{.*}} -// CHECKLOOP: store %[[CONST]], %[[ARG0]] - -// CHECKPARALLEL-LABEL: @generic_const_init -// CHECKPARALLEL-SAME: %[[ARG0:.*]]: memref -// CHECKPARALLEL: %[[CONST:.*]] = constant 1.000000e+00 : f32 -// CHECKPARALLEL: loop.parallel (%[[i:.*]]) -// CHECKPARALLEL: store %[[CONST]], %[[ARG0]] - -#scalar_access = [ - affine_map<() -> ()>, - affine_map<() -> ()>, - affine_map<() -> ()> -] -#scalar_trait = { - args_in = 2, - args_out = 1, - iterator_types = [], - indexing_maps = #scalar_access, - library_call = "some_external_fn" -} -func @scalar_code(%arg0: memref, %arg1 : memref, %arg2 : memref) -{ - linalg.generic #scalar_trait %arg0, %arg1, %arg2 { - ^bb(%a : f32, %b : f32, %c : f32) : - %0 = addf %a, %b : f32 - linalg.yield %0 : f32 - } : memref, memref, memref - return -} -// CHECKLOOP-LABEL: @scalar_code -// CHECKLOOP-SAME: %[[ARG0]]: memref -// CHECKLOOP-SAME: %[[ARG1]]: memref -// CHECKLOOP-SAME: %[[ARG2]]: memref -// CHECKLOOP-NOT: loop.for -// CHECKLOOP-DAG: load %[[ARG0]][] -// CHECKLOOP-DAG: load %[[ARG1]][] -// CHECKLOOP: addf -// CHECKLOOP: store %{{.*}}, %[[ARG2]][] - -// CHECKPARALLEL-LABEL: @scalar_code -// CHECKPARALLEL-SAME: %[[ARG0]]: memref -// CHECKPARALLEL-SAME: %[[ARG1]]: memref -// CHECKPARALLEL-SAME: %[[ARG2]]: memref -// CHECKPARALLEL-NOT: loop.for -// CHECKPARALLEL-DAG: load %[[ARG0]][] -// CHECKPARALLEL-DAG: load %[[ARG1]][] -// CHECKPARALLEL: addf -// CHECKPARALLEL: store %{{.*}}, %[[ARG2]][] +// RUN: mlir-opt %s -convert-linalg-to-affine-loops +// | FileCheck --check-prefix=CHECKAFFINE %s + +//----------------------------------------------------------------------------// +// Named ops to loops. +//----------------------------------------------------------------------------// +func @named_batch_matmul(%A: memref, %B: memref, %C: memref) { + linalg.batch_matmul %A, %B, %C : (memref, memref, memref) -> () + return +} +// CHECKLOOP-LABEL: @named_batch_matmul +// CHECKLOOP-SAME: %[[mA:[a-zA-Z0-9]+]]: memref +// CHECKLOOP-SAME: %[[mB:[a-zA-Z0-9]+]]: memref +// CHECKLOOP-SAME: %[[mC:[a-zA-Z0-9]+]]: memref +// CHECKLOOP: %[[B:.*]] = dim %[[mA]], 0 : memref +// CHECKLOOP: %[[M:.*]] = dim %[[mA]], 1 : memref +// CHECKLOOP: %[[K:.*]] = dim %[[mA]], 2 : memref +// CHECKLOOP: %[[N:.*]] = dim %[[mB]], 2 : memref +// CHECKLOOP: loop.for %[[b:.*]] = %{{.*}} to %[[B]] step %{{.*}} { +// CHECKLOOP: loop.for %[[m:.*]] = %{{.*}} to %[[M]] step %{{.*}} { +// CHECKLOOP: loop.for %[[n:.*]] = %{{.*}} to %[[N]] step %{{.*}} { +// CHECKLOOP: loop.for %[[k:.*]] = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKLOOP-DAG: %[[va:.*]] = load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref +// CHECKLOOP-DAG: %[[vb:.*]] = load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref +// CHECKLOOP-DAG: %[[vc:.*]] = load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref +// CHECKLOOP-DAG: %[[inc:.*]] = mulf %[[va]], %[[vb]] : f32 +// CHECKLOOP-DAG: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 +// CHECKLOOP: store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref + +// CHECKAFFINE-LABEL: @named_batch_matmul +// CHECKAFFINE-SAME: %[[mA:[a-zA-Z0-9]+]]: memref +// CHECKAFFINE-SAME: %[[mB:[a-zA-Z0-9]+]]: memref +// CHECKAFFINE-SAME: %[[mC:[a-zA-Z0-9]+]]: memref +// CHECKAFFINE: %[[B:.*]] = dim %[[mA]], 0 : memref +// CHECKAFFINE: %[[M:.*]] = dim %[[mA]], 1 : memref +// CHECKAFFINE: %[[K:.*]] = dim %[[mA]], 2 : memref +// CHECKAFFINE: %[[N:.*]] = dim %[[mB]], 2 : memref +// CHECKAFFINE: loop.for %[[b:.*]] = %{{.*}} to %[[B]] step %{{.*}} { +// CHECKAFFINE: loop.for %[[m:.*]] = %{{.*}} to %[[M]] step %{{.*}} { +// CHECKAFFINE: loop.for %[[n:.*]] = %{{.*}} to %[[N]] step %{{.*}} { +// CHECKAFFINE: loop.for %[[k:.*]] = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKAFFINE-DAG: %[[va:.*]] = load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref +// CHECKAFFINE-DAG: %[[vb:.*]] = load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref +// CHECKAFFINE-DAG: %[[vc:.*]] = load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref +// CHECKAFFINE-DAG: %[[inc:.*]] = mulf %[[va]], %[[vb]] : f32 +// CHECKAFFINE-DAG: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 +// CHECKAFFINE: store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref + +// CHECKPARALLEL-LABEL: @named_batch_matmul +// CHECKPARALLEL-SAME: %[[mA:[a-zA-Z0-9]+]]: memref +// CHECKPARALLEL-SAME: %[[mB:[a-zA-Z0-9]+]]: memref +// CHECKPARALLEL-SAME: %[[mC:[a-zA-Z0-9]+]]: memref +// CHECKPARALLEL: %[[B:.*]] = dim %[[mA]], 0 : memref +// CHECKPARALLEL: %[[M:.*]] = dim %[[mA]], 1 : memref +// CHECKPARALLEL: %[[K:.*]] = dim %[[mA]], 2 : memref +// CHECKPARALLEL: %[[N:.*]] = dim %[[mB]], 2 : memref +// CHECKPARALLEL: loop.parallel (%[[b:.*]], %[[m:.*]], %[[n:.*]]) = ({{.*}}) to (%[[B]], %[[M]], %[[N]]) step ({{.*}}) { +// CHECKPARALLEL: loop.for %[[k:.*]] = %{{.*}} to %[[K]] step %{{.*}} { +// CHECKPARALLEL-DAG: %[[va:.*]] = load %[[mA]][%[[b]], %[[m]], %[[k]]] : memref +// CHECKPARALLEL-DAG: %[[vb:.*]] = load %[[mB]][%[[b]], %[[k]], %[[n]]] : memref +// CHECKPARALLEL-DAG: %[[vc:.*]] = load %[[mC]][%[[b]], %[[m]], %[[n]]] : memref +// CHECKPARALLEL-DAG: %[[inc:.*]] = mulf %[[va]], %[[vb]] : f32 +// CHECKPARALLEL-DAG: %[[res:.*]] = addf %[[vc]], %[[inc]] : f32 +// CHECKPARALLEL: store %[[res]], %[[mC]][%[[b]], %[[m]], %[[n]]] : memref diff --git a/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc b/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc --- a/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc +++ b/mlir/test/mlir-linalg-ods-gen/test-linalg-ods-gen.tc @@ -7,15 +7,15 @@ // ODS-NEXT: NamedStructuredOpTraits // ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp"> // -// IMPL-LABEL: Test1Op::referenceIterators() { -// IMPL-NEXT: { {{.*}}Parallel{{.*}}, {{.*}}Reduction{{.*}} } +// IMPL-LABEL: SmallVector Test1Op::referenceIterators +// IMPL: { {{.*}}Parallel{{.*}}, {{.*}}Reduction{{.*}} } // -// IMPL: Test1Op::referenceIndexingMaps() { +// IMPL: SmallVector Test1Op::referenceIndexingMaps // IMPL: AffineMap::get(2, 0, {d0, d1}, context), // IMPL-NEXT: AffineMap::get(2, 0, {d1}, context), // IMPL-NEXT: AffineMap::get(2, 0, {d0}, context) }; // -// IMPL: Test1Op::regionBuilder(Block &block) { +// IMPL: void Test1Op::regionBuilder(Block &block) { // IMPL: Value [[a:.*]](args[0]), [[b:.*]](args[1]), [[c:.*]](args[2]); // IMPL: Value [[d:.*]] = std_mulf([[a]], [[b]]); // IMPL: Value [[e:.*]] = std_addf([[c]], [[d]]); @@ -32,10 +32,10 @@ // ODS-NEXT: NamedStructuredOpTraits // ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp"> // -// IMPL-LABEL: Test2Op::referenceIterators() { -// IMPL-NEXT: { {{.*}}Parallel{{.*}}, {{.*}}Parallel{{.*}}, {{.*}}Reduction{{.*}} } +// IMPL-LABEL: SmallVector Test2Op::referenceIterators +// IMPL: { {{.*}}Parallel{{.*}}, {{.*}}Parallel{{.*}}, {{.*}}Reduction{{.*}} } // -// IMPL: Test2Op::referenceIndexingMaps() { +// IMPL: SmallVector Test2Op::referenceIndexingMaps // IMPL: AffineMap::get(3, 0, {d0, d2}, context), // IMPL-NEXT: AffineMap::get(3, 0, {d2, d1}, context), // IMPL-NEXT: AffineMap::get(3, 0, {d0, d1}, context) }; @@ -57,10 +57,10 @@ // ODS-NEXT: NamedStructuredOpTraits // ODS-NEXT: SingleBlockImplicitTerminator<"YieldOp"> // -// IMPL-LABEL: Test3Op::referenceIterators() { -// IMPL-NEXT: { {{.*}}Parallel{{.*}}, {{.*}}Parallel{{.*}}, {{.*}}Reduction{{.*}} } +// IMPL-LABEL: SmallVector Test3Op::referenceIterators +// IMPL: { {{.*}}Parallel{{.*}}, {{.*}}Parallel{{.*}}, {{.*}}Reduction{{.*}} } // -// IMPL: Test3Op::referenceIndexingMaps() { +// IMPL: SmallVector Test3Op::referenceIndexingMaps // IMPL: AffineMap::get(4, 0, {d0, d1, d3}, context), // IMPL-NEXT: AffineMap::get(4, 0, {d3, d2}, context), // IMPL-NEXT: AffineMap::get(4, 0, {d0, d1, d2}, context) }; diff --git a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp --- a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp +++ b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-gen.cpp @@ -1472,7 +1472,7 @@ [{{ result.addOperands(views); result.addTypes(outputTypes); - buildNamedStructuredOpRegion<{0}>( + buildNamedStructuredOpRegionAndAttributes<{0}>( b, result, TypeRange(views), outputTypes); }]> ]; @@ -1481,7 +1481,13 @@ }]; let extraClassDeclaration = [{{ llvm::Optional> referenceIterators(); + static SmallVector referenceIterators( + TypeRange inputTypes, TypeRange outputTypes); + llvm::Optional> referenceIndexingMaps(); + static SmallVector referenceIndexingMaps( + TypeRange inputTypes, TypeRange outputTypes); + static void regionBuilder(Block &block); }]; })FMT"; @@ -1503,7 +1509,13 @@ ComprehensionParsingState &state) { const char *referenceReferenceIteratorsFmt = R"FMT( - llvm::Optional> {0}::referenceIterators() { + // This is temporary until we transition out of manually specified ops + // that should be auto-generated with linalg-ods-gen. + llvm::Optional> {0}::referenceIterators() {{ + llvm_unreachable("Unexpected missing `iterator_types` attribute."); + } + SmallVector {0}::referenceIterators( + TypeRange inputTypes, TypeRange outputTypes) { return SmallVector{{ {1} }; })FMT"; @@ -1538,8 +1550,15 @@ ComprehensionParsingState &state) { const char *referenceIndexingMapsFmt = R"FMT( - llvm::Optional> {0}::referenceIndexingMaps() { - MLIRContext *context = getContext(); + // This is temporary until we transition out of manually specified ops that + // should be auto-generated with linalg-ods-gen. + llvm::Optional> {0}::referenceIndexingMaps() {{ + llvm_unreachable("Unexpected missing `indexing_maps` attribute."); + } + SmallVector {0}::referenceIndexingMaps( + TypeRange inputTypes, TypeRange outputTypes) { + assert(!inputTypes.empty() && "At least one input expected"); + MLIRContext *context = (*inputTypes.begin()).getContext(); AffineExpr {1}; bindDims(context, {1}); return SmallVector{{ {2} }; @@ -1555,7 +1574,7 @@ std::string mapsStr; llvm::raw_string_ostream mapsStringStream(mapsStr); SmallVector orderedUses(state.orderedTensorArgs.size()); - for (auto it : state.orderedTensorArgs) + for (const auto &it : state.orderedTensorArgs) orderedUses[it.second] = it.first; llvm::interleaveComma(orderedUses, mapsStringStream, [&](TensorUse u) { assert(u.indexingMap);