diff --git a/mlir/include/mlir/Dialect/GPU/GPUBase.td b/mlir/include/mlir/Dialect/GPU/GPUBase.td --- a/mlir/include/mlir/Dialect/GPU/GPUBase.td +++ b/mlir/include/mlir/Dialect/GPU/GPUBase.td @@ -13,6 +13,7 @@ #ifndef GPU_BASE #define GPU_BASE +include "mlir/IR/AttrTypeBase.td" include "mlir/IR/OpBase.td" //===----------------------------------------------------------------------===// @@ -117,4 +118,13 @@ ]; } +//===----------------------------------------------------------------------===// +// GPU Attributes. +//===----------------------------------------------------------------------===// + +class GPU_Attr traits = []> + : AttrDef { + let mnemonic = attrMnemonic; +} + #endif // GPU_BASE diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -15,6 +15,7 @@ include "mlir/Dialect/DLTI/DLTIBase.td" include "mlir/Dialect/GPU/GPUBase.td" +include "mlir/Dialect/GPU/ParallelLoopMapperAttr.td" include "mlir/IR/EnumAttr.td" include "mlir/IR/FunctionInterfaces.td" include "mlir/IR/SymbolInterfaces.td" diff --git a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h --- a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h +++ b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h @@ -14,11 +14,9 @@ #ifndef MLIR_DIALECT_GPU_PARALLELLOOPMAPPER_H #define MLIR_DIALECT_GPU_PARALLELLOOPMAPPER_H -#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/Dialect/GPU/GPUDialect.h" #include "mlir/Support/LLVM.h" -#include "llvm/ADT/DenseMap.h" - -#include "mlir/Dialect/GPU/ParallelLoopMapperEnums.h.inc" +#include "llvm/ADT/StringRef.h" namespace mlir { @@ -29,8 +27,6 @@ } // namespace mlir -#include "mlir/Dialect/GPU/ParallelLoopMapperAttr.h.inc" - namespace mlir { namespace scf { class ParallelOp; @@ -41,24 +37,13 @@ /// Name of the mapping attribute produced by loop mappers. StringRef getMappingAttrName(); -/// Get the value of the processor in the ParallelLoopDimMapping attribute. -inline Processor getProcessor(ParallelLoopDimMapping attr) { - return static_cast(attr.processor().getInt()); -} - -/// Helper function to create a ParallelDimMapperAttr. -/// TODO: Replace its uses with an auto-gened method. -ParallelLoopDimMapping getParallelLoopDimMappingAttr(Processor processor, - AffineMap map, - AffineMap bound); - /// Sets the mapping attribute of a scf.parallel operation. Verifies that the /// mapping passed is valid. /// - the number of DimMapperAttr provided is same as the number of loops of /// the `ploopOp`. /// - the mapping does not map multiple loops to the same processor. LogicalResult setMappingAttr(scf::ParallelOp ploopOp, - ArrayRef mapping); + ArrayRef mapping); } // namespace gpu } // namespace mlir #endif // MLIR_DIALECT_GPU_PARALLELLOOPMAPPER_H diff --git a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapperAttr.td b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapperAttr.td --- a/mlir/include/mlir/Dialect/GPU/ParallelLoopMapperAttr.td +++ b/mlir/include/mlir/Dialect/GPU/ParallelLoopMapperAttr.td @@ -17,15 +17,15 @@ include "mlir/Dialect/GPU/GPUBase.td" include "mlir/IR/EnumAttr.td" -def BlockX : I64EnumAttrCase<"BlockX", 0>; -def BlockY : I64EnumAttrCase<"BlockY", 1>; -def BlockZ : I64EnumAttrCase<"BlockZ", 2>; -def ThreadX : I64EnumAttrCase<"ThreadX", 3>; -def ThreadY : I64EnumAttrCase<"ThreadY", 4>; -def ThreadZ : I64EnumAttrCase<"ThreadZ", 5>; -def Sequential : I64EnumAttrCase<"Sequential", 6>; - -def ProcessorAttr : I64EnumAttr<"Processor", "processor for loop mapping", [ +def BlockX : I64EnumAttrCase<"BlockX", 0, "block_x">; +def BlockY : I64EnumAttrCase<"BlockY", 1, "block_y">; +def BlockZ : I64EnumAttrCase<"BlockZ", 2, "block_z">; +def ThreadX : I64EnumAttrCase<"ThreadX", 3, "thread_x">; +def ThreadY : I64EnumAttrCase<"ThreadY", 4, "thread_y">; +def ThreadZ : I64EnumAttrCase<"ThreadZ", 5, "thread_z">; +def Sequential : I64EnumAttrCase<"Sequential", 6, "sequential">; + +def ProcessorEnum : I64EnumAttr<"Processor", "processor for loop mapping", [ BlockX, BlockY, BlockZ, ThreadX, ThreadY, ThreadZ, Sequential]> { let cppNamespace = "::mlir::gpu"; } @@ -37,12 +37,15 @@ // substitution. // bound : An affine map that is used to compute the bound of the hardware // id based on an upper bound of the number of iterations. -def ParallelLoopDimMappingAttr : - StructAttr<"ParallelLoopDimMapping", GPU_Dialect, - [StructFieldAttr<"processor", ProcessorAttr>, - StructFieldAttr<"map", AffineMapAttr>, - StructFieldAttr<"bound", AffineMapAttr>]>; - +def ParallelLoopDimMappingAttr + : GPU_Attr<"ParallelLoopDimMapping", "loop_dim_map"> { + let parameters = (ins + EnumParameter:$processor, + "AffineMap":$map, + "AffineMap":$bound + ); + let assemblyFormat = "`<` struct(params) `>`"; +} def ParallelLoopMappingAttr : TypedArrayAttrBase OpT cloneWithoutRegions(OpT op) { + template + OpT cloneWithoutRegions(OpT op) { return cast(cloneWithoutRegions(*op.getOperation())); } diff --git a/mlir/include/mlir/IR/DialectImplementation.h b/mlir/include/mlir/IR/DialectImplementation.h --- a/mlir/include/mlir/IR/DialectImplementation.h +++ b/mlir/include/mlir/IR/DialectImplementation.h @@ -127,6 +127,17 @@ } }; +/// Parse an affine map. +template <> +struct FieldParser { + static FailureOr parse(AsmParser &parser) { + AffineMap map; + if (failed(parser.parseAffineMap(map))) + return failure(); + return map; + } +}; + } // namespace mlir #endif // MLIR_IR_DIALECTIMPLEMENTATION_H diff --git a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp --- a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp +++ b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp @@ -429,12 +429,13 @@ Attribute mappingAttribute; Value iv, lowerBound, upperBound, step; std::tie(mappingAttribute, iv, lowerBound, upperBound, step) = config; - auto annotation = mappingAttribute.dyn_cast(); + auto annotation = + mappingAttribute.dyn_cast(); if (!annotation) return parallelOp.emitOpError() << "expected mapping attribute for lowering to GPU"; Value newIndex; - gpu::Processor processor = gpu::getProcessor(annotation); + gpu::Processor processor = annotation.getProcessor(); if (isMappedToProcessor(processor)) { // Use the corresponding thread/grid index as replacement for the loop iv. @@ -449,11 +450,11 @@ rewriter.getAffineDimExpr(0) * rewriter.getAffineSymbolExpr(0) + rewriter.getAffineSymbolExpr(1)); newIndex = rewriter.create( - loc, annotation.map().getValue().compose(lowerAndStep), + loc, annotation.getMap().compose(lowerAndStep), ValueRange{operand, step, lowerBound}); // If there was also a bound, insert that, too. // TODO: Check that we do not assign bounds twice. - if (annotation.bound().getValue()) { + if (annotation.getBound()) { // We pass as the single operand to the bound-map the number of // iterations, which is (upperBound - lowerBound) ceilDiv step. To // support inner loops with dynamic upper bounds (as generated by e.g. @@ -493,7 +494,7 @@ ((rewriter.getAffineDimExpr(0) - rewriter.getAffineSymbolExpr(0)) .ceilDiv(rewriter.getAffineSymbolExpr(1)))); Value launchBound = rewriter.create( - loc, annotation.bound().getValue().compose(stepMap), + loc, annotation.getBound().compose(stepMap), ValueRange{ ensureLaunchIndependent( cloningMap.lookupOrDefault(upperBound)), diff --git a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp --- a/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/ParallelLoopMapper.cpp @@ -18,10 +18,6 @@ #include "mlir/Dialect/GPU/Passes.h" #include "mlir/Dialect/SCF/SCF.h" #include "mlir/IR/AffineMap.h" -#include "mlir/Pass/Pass.h" - -#include "mlir/Dialect/GPU/ParallelLoopMapperAttr.cpp.inc" -#include "mlir/Dialect/GPU/ParallelLoopMapperEnums.cpp.inc" namespace mlir { @@ -29,22 +25,13 @@ StringRef gpu::getMappingAttrName() { return "mapping"; } -gpu::ParallelLoopDimMapping -gpu::getParallelLoopDimMappingAttr(Processor processor, AffineMap map, - AffineMap bound) { - MLIRContext *context = map.getContext(); - OpBuilder builder(context); - return ParallelLoopDimMapping::get( - ProcessorAttr::get(builder.getContext(), processor), - AffineMapAttr::get(map), AffineMapAttr::get(bound), context); -} - -LogicalResult gpu::setMappingAttr(ParallelOp ploopOp, - ArrayRef mapping) { +LogicalResult +gpu::setMappingAttr(ParallelOp ploopOp, + ArrayRef mapping) { // Verify that each processor is mapped to only once. llvm::DenseSet specifiedMappings; for (auto dimAttr : mapping) { - gpu::Processor processor = getProcessor(dimAttr); + gpu::Processor processor = dimAttr.getProcessor(); if (processor != gpu::Processor::Sequential && specifiedMappings.count(processor)) return ploopOp.emitError( @@ -123,10 +110,10 @@ MLIRContext *ctx = parallelOp.getContext(); Builder b(ctx); - SmallVector attrs; + SmallVector attrs; attrs.reserve(parallelOp.getNumLoops()); for (int i = 0, e = parallelOp.getNumLoops(); i < e; ++i) { - attrs.push_back(getParallelLoopDimMappingAttr( + attrs.push_back(b.getAttr( getHardwareIdForMapping(mappingLevel, i), b.getDimIdentityMap(), b.getDimIdentityMap())); } diff --git a/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir b/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir --- a/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir +++ b/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir @@ -11,7 +11,7 @@ step (%arg4, %step) { %val = memref.load %buf[%i0, %i1] : memref memref.store %val, %res[%i1, %i0] : memref - } { mapping = [{processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, {processor = 0, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}] } + } { mapping = [#gpu.loop_dim_map (d0), bound = (d0) -> (d0)>, #gpu.loop_dim_map (d0), bound = (d0) -> (d0)>] } return } @@ -56,12 +56,12 @@ %val = memref.load %buf[%idx0, %idx1] : memref memref.store %val, %res[%idx1, %idx0] : memref } { mapping = [ - {processor = 4, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, - {processor = 3, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>} + #gpu.loop_dim_map (d0), bound = (d0) -> (d0)>, + #gpu.loop_dim_map (d0), bound = (d0) -> (d0)> ] } } { mapping = [ - {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, - {processor = 0, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>} + #gpu.loop_dim_map (d0), bound = (d0) -> (d0)>, + #gpu.loop_dim_map (d0), bound = (d0) -> (d0)> ] } return } @@ -109,8 +109,8 @@ %val = memref.load %buf[%i0, %i1] : memref memref.store %val, %res[%i1, %i0] : memref } { mapping = [ - {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, - {processor = 6, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>} + #gpu.loop_dim_map (d0), bound = (d0) -> (d0)>, + #gpu.loop_dim_map (d0), bound = (d0) -> (d0)> ] } return } @@ -156,12 +156,12 @@ %val = memref.load %buf[%idx0, %idx1] : memref memref.store %val, %res[%idx1, %idx0] : memref } { mapping = [ - {processor = 4, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, - {processor = 6, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>} + #gpu.loop_dim_map (d0), bound = (d0) -> (d0)>, + #gpu.loop_dim_map (d0), bound = (d0) -> (d0)> ] } } { mapping = [ - {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, - {processor = 6, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>} + #gpu.loop_dim_map (d0), bound = (d0) -> (d0)>, + #gpu.loop_dim_map (d0), bound = (d0) -> (d0)> ] } return } @@ -234,9 +234,9 @@ %20 = arith.addf %17, %18 : f32 memref.store %20, %16[%arg5, %arg6] : memref scf.yield - } {mapping = [{bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 3 : i64}, {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 4 : i64}]} + } {mapping = [#gpu.loop_dim_map (d0), map = (d0) -> (d0), processor = thread_x>, #gpu.loop_dim_map (d0), map = (d0) -> (d0), processor = thread_y>]} scf.yield - } {mapping = [{bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 0 : i64}, {bound = affine_map<(d0) -> (d0)>, map = affine_map<(d0) -> (d0)>, processor = 1 : i64}]} + } {mapping = [#gpu.loop_dim_map (d0), map = (d0) -> (d0), processor = block_x>, #gpu.loop_dim_map (d0), map = (d0) -> (d0), processor = block_y>]} return } } @@ -310,7 +310,7 @@ %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index scf.parallel (%i0) = (%c0) to (%c1) step (%c1) { - } { mapping = [{processor = 0, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}], optional_attr = 1 } + } { mapping = [#gpu.loop_dim_map (d0), bound = (d0) -> (d0)>], optional_attr = 1 } // CHECK: optional_attr = 1 return } @@ -327,8 +327,8 @@ scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) step (%four, %four) { } { mapping = [ - {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, - {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>} + #gpu.loop_dim_map (d0), bound = (d0) -> (d0)>, + #gpu.loop_dim_map (d0), bound = (d0) -> (d0)> ] } return } @@ -356,12 +356,12 @@ %val = memref.load %buf[%idx0, %idx1] : memref memref.store %val, %res[%idx1, %idx0] : memref } { mapping = [ - {processor = 4, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, - {processor = 6, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>} + #gpu.loop_dim_map (d0), bound = (d0) -> (d0)>, + #gpu.loop_dim_map (d0), bound = (d0) -> (d0)> ] } } { mapping = [ - {processor = 1, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>}, - {processor = 6, map = affine_map<(d0) -> (d0)>, bound = affine_map<(d0) -> (d0)>} + #gpu.loop_dim_map (d0), bound = (d0) -> (d0)>, + #gpu.loop_dim_map (d0), bound = (d0) -> (d0)> ] } return } diff --git a/mlir/test/Dialect/GPU/mapping.mlir b/mlir/test/Dialect/GPU/mapping.mlir --- a/mlir/test/Dialect/GPU/mapping.mlir +++ b/mlir/test/Dialect/GPU/mapping.mlir @@ -14,14 +14,13 @@ return } -// CHECK: #[[$MAP:.*]] = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @parallel_loop( // CHECK: scf.parallel // CHECK: scf.parallel -// CHECK: {mapping = [{bound = #[[$MAP]], map = #[[$MAP]], processor = 3 : i64}, -// CHECK-SAME: {bound = #[[$MAP]], map = #[[$MAP]], processor = 4 : i64}]} -// CHECK: {mapping = [{bound = #[[$MAP]], map = #[[$MAP]], processor = 0 : i64}, -// CHECK-SAME: {bound = #[[$MAP]], map = #[[$MAP]], processor = 1 : i64}]} +// CHECK: {mapping = [#gpu.loop_dim_map (d0), bound = (d0) -> (d0)>, +// CHECK-SAME: #gpu.loop_dim_map (d0), bound = (d0) -> (d0)>]} +// CHECK: {mapping = [#gpu.loop_dim_map (d0), bound = (d0) -> (d0)>, +// CHECK-SAME: #gpu.loop_dim_map (d0), bound = (d0) -> (d0)>]} // CHECK-NOT: mapping // ----- @@ -43,21 +42,20 @@ return } -// CHECK: #[[$MAP:.*]] = affine_map<(d0) -> (d0)> // CHECK-LABEL: func @parallel_loop_4d( // CHECK: scf.parallel // CHECK: scf.parallel // CHECK: scf.parallel -// CHECK: {mapping = [{bound = #[[$MAP]], map = #[[$MAP]], processor = 6 : i64}, -// CHECK-SAME: {bound = #[[$MAP]], map = #[[$MAP]], processor = 6 : i64}, -// CHECK-SAME: {bound = #[[$MAP]], map = #[[$MAP]], processor = 6 : i64}, -// CHECK-SAME: {bound = #[[$MAP]], map = #[[$MAP]], processor = 6 : i64}]} -// CHECK: {mapping = [{bound = #[[$MAP]], map = #[[$MAP]], processor = 3 : i64}, -// CHECK-SAME: {bound = #[[$MAP]], map = #[[$MAP]], processor = 4 : i64}, -// CHECK-SAME: {bound = #[[$MAP]], map = #[[$MAP]], processor = 5 : i64}, -// CHECK-SAME: {bound = #[[$MAP]], map = #[[$MAP]], processor = 6 : i64}]} -// CHECK: {mapping = [{bound = #[[$MAP]], map = #[[$MAP]], processor = 0 : i64}, -// CHECK-SAME: {bound = #[[$MAP]], map = #[[$MAP]], processor = 1 : i64}, -// CHECK-SAME: {bound = #[[$MAP]], map = #[[$MAP]], processor = 2 : i64}, -// CHECK-SAME: {bound = #[[$MAP]], map = #[[$MAP]], processor = 6 : i64}]} +// CHECK: {mapping = [#gpu.loop_dim_map (d0), bound = (d0) -> (d0)>, +// CHECK-SAME: #gpu.loop_dim_map (d0), bound = (d0) -> (d0)>, +// CHECK-SAME: #gpu.loop_dim_map (d0), bound = (d0) -> (d0)>, +// CHECK-SAME: #gpu.loop_dim_map (d0), bound = (d0) -> (d0)>]} +// CHECK: {mapping = [#gpu.loop_dim_map (d0), bound = (d0) -> (d0)>, +// CHECK-SAME: #gpu.loop_dim_map (d0), bound = (d0) -> (d0)>, +// CHECK-SAME: #gpu.loop_dim_map (d0), bound = (d0) -> (d0)>, +// CHECK-SAME: #gpu.loop_dim_map (d0), bound = (d0) -> (d0)>]} +// CHECK: {mapping = [#gpu.loop_dim_map (d0), bound = (d0) -> (d0)>, +// CHECK-SAME: #gpu.loop_dim_map (d0), bound = (d0) -> (d0)>, +// CHECK-SAME: #gpu.loop_dim_map (d0), bound = (d0) -> (d0)>, +// CHECK-SAME: #gpu.loop_dim_map (d0), bound = (d0) -> (d0)>]} // CHECK-NOT: mapping diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -3453,6 +3453,7 @@ srcs = [ "include/mlir/Dialect/GPU/GPUBase.td", "include/mlir/Dialect/GPU/GPUOps.td", + "include/mlir/Dialect/GPU/ParallelLoopMapperAttr.td", ], includes = ["include"], deps = [ @@ -3465,35 +3466,6 @@ ], ) -gentbl_cc_library( - name = "ParallelLoopMapperAttrGen", - strip_include_prefix = "include", - tbl_outs = [ - ( - ["-gen-struct-attr-decls"], - "include/mlir/Dialect/GPU/ParallelLoopMapperAttr.h.inc", - ), - ( - ["-gen-struct-attr-defs"], - "include/mlir/Dialect/GPU/ParallelLoopMapperAttr.cpp.inc", - ), - ( - ["-gen-enum-decls"], - "include/mlir/Dialect/GPU/ParallelLoopMapperEnums.h.inc", - ), - ( - ["-gen-enum-defs"], - "include/mlir/Dialect/GPU/ParallelLoopMapperEnums.cpp.inc", - ), - ], - tblgen = ":mlir-tblgen", - td_file = "include/mlir/Dialect/GPU/ParallelLoopMapperAttr.td", - deps = [ - ":AttrTdFiles", - ":GPUOpsTdFiles", - ], -) - gentbl_cc_library( name = "GPUBaseIncGen", strip_include_prefix = "include", @@ -3571,7 +3543,9 @@ "lib/Dialect/GPU/IR/*.h", ], ), - hdrs = ["include/mlir/Dialect/GPU/GPUDialect.h"], + hdrs = [ + "include/mlir/Dialect/GPU/GPUDialect.h", + ], includes = ["include"], deps = [ ":ArithmeticDialect", @@ -3644,7 +3618,6 @@ ":GPUPassIncGen", ":MemRefDialect", ":IR", - ":ParallelLoopMapperAttrGen", ":Parser", ":Pass", ":ROCDLToLLVMIRTranslation", @@ -5068,7 +5041,6 @@ ":FuncDialect", ":IR", ":MemRefDialect", - ":ParallelLoopMapperAttrGen", ":Pass", ":SCFDialect", ":TensorDialect",