diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -188,55 +188,50 @@ // Create and insert the operation. auto parallelOp = firOpBuilder.create( currentLocation, argTy, ifClauseOperand, numThreadsClauseOperand, - defaultClauseOperand.dyn_cast_or_null(), + defaultClauseOperand.dyn_cast_or_null(), privateClauseOperands, firstprivateClauseOperands, sharedClauseOperands, copyinClauseOperands, ValueRange(), ValueRange(), - procBindClauseOperand.dyn_cast_or_null()); + procBindClauseOperand.dyn_cast_or_null()); // Handle attribute based clauses. for (const auto &clause : parallelOpClauseList.v) { if (const auto &defaultClause = std::get_if(&clause.u)) { const auto &ompDefaultClause{defaultClause->v}; + omp::ClauseDefault clause; switch (ompDefaultClause.v) { case Fortran::parser::OmpDefaultClause::Type::Private: - parallelOp.default_valAttr(firOpBuilder.getStringAttr( - omp::stringifyClauseDefault(omp::ClauseDefault::defprivate))); + clause = omp::ClauseDefault::defprivate; break; case Fortran::parser::OmpDefaultClause::Type::Firstprivate: - parallelOp.default_valAttr( - firOpBuilder.getStringAttr(omp::stringifyClauseDefault( - omp::ClauseDefault::deffirstprivate))); + clause = omp::ClauseDefault::deffirstprivate; break; case Fortran::parser::OmpDefaultClause::Type::Shared: - parallelOp.default_valAttr(firOpBuilder.getStringAttr( - omp::stringifyClauseDefault(omp::ClauseDefault::defshared))); + clause = omp::ClauseDefault::defshared; break; case Fortran::parser::OmpDefaultClause::Type::None: - parallelOp.default_valAttr(firOpBuilder.getStringAttr( - omp::stringifyClauseDefault(omp::ClauseDefault::defnone))); + clause = omp::ClauseDefault::defnone; break; } + parallelOp.default_valAttr( + omp::ClauseDefaultAttr::get(firOpBuilder.getContext(), clause)); } if (const auto &procBindClause = std::get_if(&clause.u)) { const auto &ompProcBindClause{procBindClause->v}; + omp::ClauseProcBindKind pbKind; switch (ompProcBindClause.v) { case Fortran::parser::OmpProcBindClause::Type::Master: - parallelOp.proc_bind_valAttr( - firOpBuilder.getStringAttr(omp::stringifyClauseProcBindKind( - omp::ClauseProcBindKind::master))); + pbKind = omp::ClauseProcBindKind::master; break; case Fortran::parser::OmpProcBindClause::Type::Close: - parallelOp.proc_bind_valAttr( - firOpBuilder.getStringAttr(omp::stringifyClauseProcBindKind( - omp::ClauseProcBindKind::close))); + pbKind = omp::ClauseProcBindKind::close; break; case Fortran::parser::OmpProcBindClause::Type::Spread: - parallelOp.proc_bind_valAttr( - firOpBuilder.getStringAttr(omp::stringifyClauseProcBindKind( - omp::ClauseProcBindKind::spread))); + pbKind = omp::ClauseProcBindKind::spread; break; } + parallelOp.proc_bind_valAttr(omp::ClauseProcBindKindAttr::get( + firOpBuilder.getContext(), pbKind)); } } createBodyOfOp(parallelOp, firOpBuilder, currentLocation); diff --git a/mlir/include/mlir/Dialect/GPU/CMakeLists.txt b/mlir/include/mlir/Dialect/GPU/CMakeLists.txt --- a/mlir/include/mlir/Dialect/GPU/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/GPU/CMakeLists.txt @@ -27,4 +27,9 @@ mlir_tablegen(GPUOpsEnums.cpp.inc -gen-enum-defs) add_public_tablegen_target(MLIRGPUOpsEnumsGen) +set(LLVM_TARGET_DEFINITIONS GPUOps.td) +mlir_tablegen(GPUOpsAttributes.h.inc -gen-attrdef-decls -attrdefs-dialect=gpu) +mlir_tablegen(GPUOpsAttributes.cpp.inc -gen-attrdef-defs -attrdefs-dialect=gpu) +add_public_tablegen_target(MLIRGPUOpsAttributesIncGen) + add_mlir_doc(Passes GPUPasses ./ -gen-pass-doc) diff --git a/mlir/include/mlir/Dialect/GPU/GPUBase.td b/mlir/include/mlir/Dialect/GPU/GPUBase.td --- a/mlir/include/mlir/Dialect/GPU/GPUBase.td +++ b/mlir/include/mlir/Dialect/GPU/GPUBase.td @@ -53,6 +53,7 @@ }]; let dependentDialects = ["arith::ArithmeticDialect"]; + let useDefaultAttributePrinterParser = 1; } def GPU_AsyncToken : DialectType< diff --git a/mlir/include/mlir/Dialect/GPU/GPUDialect.h b/mlir/include/mlir/Dialect/GPU/GPUDialect.h --- a/mlir/include/mlir/Dialect/GPU/GPUDialect.h +++ b/mlir/include/mlir/Dialect/GPU/GPUDialect.h @@ -173,6 +173,9 @@ #include "mlir/Dialect/GPU/GPUOpInterfaces.h.inc" +#define GET_ATTRDEF_CLASSES +#include "mlir/Dialect/GPU/GPUOpsAttributes.h.inc" + #define GET_OP_CLASSES #include "mlir/Dialect/GPU/GPUOps.h.inc" diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td --- a/mlir/include/mlir/Dialect/GPU/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td @@ -16,6 +16,7 @@ include "mlir/Dialect/DLTI/DLTIBase.td" include "mlir/Dialect/GPU/GPUBase.td" include "mlir/Dialect/LLVMIR/LLVMOpBase.td" +include "mlir/IR/EnumAttr.td" include "mlir/IR/SymbolInterfaces.td" include "mlir/Interfaces/DataLayoutInterfaces.td" include "mlir/Interfaces/InferTypeOpInterface.td" @@ -28,10 +29,22 @@ class GPU_Op traits = []> : Op; +def GPU_Dimension : I32EnumAttr<"Dimension", + "a dimension, either 'x', 'y', or 'z'", + [ + I32EnumAttrCase<"x", 0>, + I32EnumAttrCase<"y", 1>, + I32EnumAttrCase<"z", 2> + ]>{ + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::gpu"; +} +def GPU_DimensionAttr : EnumAttr; + class GPU_IndexOp traits = []> : GPU_Op, - Arguments<(ins StrAttr:$dimension)>, Results<(outs Index)> { - let verifier = [{ return ::verifyIndexOp(*this); }]; + Arguments<(ins GPU_DimensionAttr:$dimension)>, Results<(outs Index)> { + let assemblyFormat = "$dimension attr-dict"; } def GPU_BlockDimOp : GPU_IndexOp<"block_dim"> { @@ -42,7 +55,7 @@ Example: ```mlir - %bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index) + %bDimX = gpu.block_dim x ``` }]; } @@ -54,7 +67,7 @@ Example: ```mlir - %bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index) + %bIdY = gpu.block_id y ``` }]; } @@ -66,7 +79,7 @@ Example: ```mlir - %gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index) + %gDimZ = gpu.grid_dim z ``` }]; } @@ -78,7 +91,7 @@ Example: ```mlir - %tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index) + %tIdX = gpu.thread_id x ``` }]; } @@ -339,21 +352,21 @@ // Operations that produce block/thread IDs and dimensions are // injected when outlining the `gpu.launch` body to a function called // by `gpu.launch_func`. - %tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index) - %tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index) - %tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index) + %tIdX = gpu.thread_id x + %tIdY = gpu.thread_id y + %tIdZ = gpu.thread_id z - %bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index) - %bDimY = "gpu.block_dim"() {dimension = "y"} : () -> (index) - %bDimZ = "gpu.block_dim"() {dimension = "z"} : () -> (index) + %bDimX = gpu.block_dim x + %bDimY = gpu.block_dim y + %bDimZ = gpu.block_dim z - %bIdX = "gpu.block_id"() {dimension = "x"} : () -> (index) - %bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index) - %bIdZ = "gpu.block_id"() {dimension = "z"} : () -> (index) + %bIdX = gpu.block_id x + %bIdY = gpu.block_id y + %bIdZ = gpu.block_id z - %gDimX = "gpu.grid_dim"() {dimension = "x"} : () -> (index) - %gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index) - %gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index) + %gDimX = gpu.grid_dim x + %gDimY = gpu.grid_dim y + %gDimZ = gpu.grid_dim z "some_op"(%bx, %tx) : (index, index) -> () %42 = load %arg1[%bx] : memref @@ -608,15 +621,15 @@ } // add, mul mirror the XLA ComparisonDirection enum. -def GPU_AllReduceOpAdd : StrEnumAttrCase<"ADD", -1, "add">; -def GPU_AllReduceOpAnd : StrEnumAttrCase<"AND", -1, "and">; -def GPU_AllReduceOpMax : StrEnumAttrCase<"MAX", -1, "max">; -def GPU_AllReduceOpMin : StrEnumAttrCase<"MIN", -1, "min">; -def GPU_AllReduceOpMul : StrEnumAttrCase<"MUL", -1, "mul">; -def GPU_AllReduceOpOr : StrEnumAttrCase<"OR", -1, "or">; -def GPU_AllReduceOpXor : StrEnumAttrCase<"XOR", -1, "xor">; - -def GPU_AllReduceOperationAttr : StrEnumAttr<"AllReduceOperationAttr", +def GPU_AllReduceOpAdd : I32EnumAttrCase<"ADD", 0, "add">; +def GPU_AllReduceOpAnd : I32EnumAttrCase<"AND", 1, "and">; +def GPU_AllReduceOpMax : I32EnumAttrCase<"MAX", 2, "max">; +def GPU_AllReduceOpMin : I32EnumAttrCase<"MIN", 3, "min">; +def GPU_AllReduceOpMul : I32EnumAttrCase<"MUL", 4, "mul">; +def GPU_AllReduceOpOr : I32EnumAttrCase<"OR", 5, "or">; +def GPU_AllReduceOpXor : I32EnumAttrCase<"XOR", 6, "xor">; + +def GPU_AllReduceOperation : I32EnumAttr<"AllReduceOperation", "built-in reduction operations supported by gpu.allreduce.", [ GPU_AllReduceOpAdd, @@ -627,8 +640,11 @@ GPU_AllReduceOpOr, GPU_AllReduceOpXor ]>{ + let genSpecializedAttr = 0; let cppNamespace = "::mlir::gpu"; } +def GPU_AllReduceOperationAttr : EnumAttr; def GPU_AllReduceOp : GPU_Op<"all_reduce", [SameOperandsAndResultType, IsolatedFromAbove]>, @@ -643,12 +659,12 @@ For example, both ```mlir - %1 = "gpu.all_reduce"(%0) ({}) { op = "add" } : (f32) -> (f32) - %2 = "gpu.all_reduce"(%0) ({ + %1 = gpu.all_reduce add %0 {} : (f32) -> (f32) + %2 = gpu.all_reduce %0 { ^bb(%lhs : f32, %rhs : f32): %sum = arith.addf %lhs, %rhs : f32 "gpu.yield"(%sum) : (f32) -> () - }) : (f32) -> (f32) + } : (f32) -> (f32) ``` compute the sum of each work item's %0 value. The first version specifies @@ -661,31 +677,34 @@ }]; let regions = (region AnyRegion:$body); let verifier = [{ return ::verifyAllReduce(*this); }]; + let assemblyFormat = [{ custom($op) $value $body attr-dict + `:` functional-type(operands, results)" }]; } -def GPU_ShuffleOpXor : StrEnumAttrCase<"XOR", -1, "xor">; -def GPU_ShuffleOpDown : StrEnumAttrCase<"DOWN", -1, "down">; -def GPU_ShuffleOpUp : StrEnumAttrCase<"UP", -1, "up">; -def GPU_ShuffleOpIdx : StrEnumAttrCase<"IDX", -1, "idx">; +def GPU_ShuffleOpXor : I32EnumAttrCase<"XOR", 0, "xor">; +def GPU_ShuffleOpDown : I32EnumAttrCase<"DOWN", 1, "down">; +def GPU_ShuffleOpUp : I32EnumAttrCase<"UP", 2, "up">; +def GPU_ShuffleOpIdx : I32EnumAttrCase<"IDX", 3, "idx">; -def GPU_ShuffleModeAttr : StrEnumAttr<"ShuffleModeAttr", +def GPU_ShuffleMode : I32EnumAttr<"ShuffleMode", "Indexing modes supported by gpu.shuffle.", [ GPU_ShuffleOpXor, GPU_ShuffleOpUp, GPU_ShuffleOpDown, GPU_ShuffleOpIdx, - ]>{ + ]> { + let genSpecializedAttr = 0; let cppNamespace = "::mlir::gpu"; - let storageType = "mlir::StringAttr"; - let returnType = "::mlir::gpu::ShuffleModeAttr"; - let convertFromStorage = - "*symbolizeEnum<::mlir::gpu::ShuffleModeAttr>($_self.getValue())"; - let constBuilderCall = "$_builder.getStringAttr(stringifyEnum($0))"; } +def GPU_ShuffleModeAttr : EnumAttr; +def I32OrF32 : TypeConstraint, + "i32 or f32">; -def GPU_ShuffleOp : GPU_Op<"shuffle", [NoSideEffect]>, - Arguments<(ins AnyType:$value, I32:$offset, I32:$width, +def GPU_ShuffleOp : GPU_Op< + "shuffle", [NoSideEffect, AllTypesMatch<["value", "result"]>]>, + Arguments<(ins I32OrF32:$value, I32:$offset, I32:$width, GPU_ShuffleModeAttr:$mode)>, - Results<(outs AnyType:$result, I1:$valid)> { + Results<(outs I32OrF32:$result, I1:$valid)> { let summary = "Shuffles values within a subgroup."; let description = [{ The "shuffle" op moves values to a different invocation within the same @@ -706,9 +725,7 @@ in the shuffle. Exactly the first `width` invocations of a subgroup need to execute this op in convergence. }]; - let verifier = [{ return ::verifyShuffleOp(*this); }]; - let printer = [{ printShuffleOp(p, *this); }]; - let parser = [{ return parseShuffleOp(parser, result); }]; + let assemblyFormat = "$mode $value `,` $offset `,` $width attr-dict `:` type($value)"; } def GPU_BarrierOp : GPU_Op<"barrier"> { @@ -1151,22 +1168,25 @@ }]; } -def GPU_ELEMENTWISE_OP_ADD : StrEnumAttrCase<"ADDF">; -def GPU_ELEMENTWISE_OP_MUL : StrEnumAttrCase<"MULF">; -def GPU_ELEMENTWISE_OP_MAXF : StrEnumAttrCase<"MAXF">; -def GPU_ELEMENTWISE_OP_MINF : StrEnumAttrCase<"MINF">; -def GPU_ELEMENTWISE_OP_DIVF : StrEnumAttrCase<"DIVF">; - -def MMAElementWiseAttr : StrEnumAttr<"MMAElementwiseOp", - "elementwise operation to apply to mma matrix", - [GPU_ELEMENTWISE_OP_ADD, GPU_ELEMENTWISE_OP_MUL, - GPU_ELEMENTWISE_OP_MAXF, GPU_ELEMENTWISE_OP_MINF, GPU_ELEMENTWISE_OP_DIVF]> { +def GPU_ElementwiseOpAdd : I32EnumAttrCase<"ADDF", 0, "addf">; +def GPU_ElementwiseOpMul : I32EnumAttrCase<"MULF", 1, "mulf">; +def GPU_ElementwiseOpMaxF : I32EnumAttrCase<"MAXF", 2, "maxf">; +def GPU_ElementwiseOpMinF : I32EnumAttrCase<"MINF", 3, "minf">; +def GPU_ElementwiseOpDivF : I32EnumAttrCase<"DIVF", 4, "divf">; + +def MMAElementWise : I32EnumAttr<"MMAElementwiseOp", + "elementwise operation to apply to mma matrix", [ + GPU_ElementwiseOpAdd, + GPU_ElementwiseOpMul, + GPU_ElementwiseOpMaxF, + GPU_ElementwiseOpMinF, + GPU_ElementwiseOpDivF + ]> { + let genSpecializedAttr = 0; let cppNamespace = "::mlir::gpu"; - let storageType = "::mlir::StringAttr"; - let returnType = "::mlir::gpu::MMAElementwiseOp"; - let convertFromStorage = "*symbolizeMMAElementwiseOp($_self.getValue())"; - let constBuilderCall = "$_builder.getStringAttr(stringifyEnum($0))"; } +def MMAElementWiseAttr : EnumAttr; def GPU_SubgroupMmaElementwiseOp : GPU_Op<"subgroup_mma_elementwise", [NoSideEffect, @@ -1193,7 +1213,8 @@ ``` }]; - let arguments = (ins Variadic:$args, MMAElementWiseAttr:$operation); + let arguments = (ins Variadic:$args, + MMAElementWiseAttr:$operation); let results = (outs GPU_MMAMatrix:$res); @@ -1204,7 +1225,7 @@ }]; let assemblyFormat = [{ - $args attr-dict `:` functional-type($args, $res) + $operation $args attr-dict `:` functional-type($args, $res) }]; } diff --git a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt --- a/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/LLVMIR/CMakeLists.txt @@ -35,6 +35,8 @@ mlir_tablegen(NVVMConversions.inc -gen-llvmir-conversions) mlir_tablegen(NVVMOpsEnums.h.inc -gen-enum-decls) mlir_tablegen(NVVMOpsEnums.cpp.inc -gen-enum-defs) +mlir_tablegen(NVVMOpsAttributes.h.inc -gen-attrdef-decls -attrdefs-dialect=nvvm) +mlir_tablegen(NVVMOpsAttributes.cpp.inc -gen-attrdef-defs -attrdefs-dialect=nvvm) add_public_tablegen_target(MLIRNVVMConversionsIncGen) add_mlir_dialect(ROCDLOps rocdl) diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMDialect.h @@ -30,6 +30,9 @@ mlir::MLIRContext *context); ///// Ops ///// +#define GET_ATTRDEF_CLASSES +#include "mlir/Dialect/LLVMIR/NVVMOpsAttributes.h.inc" + #define GET_OP_CLASSES #include "mlir/Dialect/LLVMIR/NVVMOps.h.inc" diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -13,6 +13,7 @@ #ifndef NVVMIR_OPS #define NVVMIR_OPS +include "mlir/IR/EnumAttr.td" include "mlir/Dialect/LLVMIR/LLVMOpBase.td" include "mlir/Interfaces/SideEffectInterfaces.td" @@ -34,6 +35,8 @@ /// functions. static StringRef getKernelFuncAttrName() { return "nvvm.kernel"; } }]; + + let useDefaultAttributePrinterParser = 1; } //===----------------------------------------------------------------------===// @@ -100,20 +103,18 @@ let assemblyFormat = "attr-dict"; } -def ShflKindBfly : StrEnumAttrCase<"bfly">; -def ShflKindUp : StrEnumAttrCase<"up">; -def ShflKindDown : StrEnumAttrCase<"down">; -def ShflKindIdx : StrEnumAttrCase<"idx">; +def ShflKindBfly : I32EnumAttrCase<"bfly", 0>; +def ShflKindUp : I32EnumAttrCase<"up", 1>; +def ShflKindDown : I32EnumAttrCase<"down", 2>; +def ShflKindIdx : I32EnumAttrCase<"idx", 3>; /// Enum attribute of the different shuffle kinds. -def ShflKind : StrEnumAttr<"ShflKind", "NVVM shuffle kind", +def ShflKind : I32EnumAttr<"ShflKind", "NVVM shuffle kind", [ShflKindBfly, ShflKindUp, ShflKindDown, ShflKindIdx]> { + let genSpecializedAttr = 0; let cppNamespace = "::mlir::NVVM"; - let storageType = "mlir::StringAttr"; - let returnType = "NVVM::ShflKind"; - let convertFromStorage = "*symbolizeEnum($_self.getValue())"; - let constBuilderCall = "$_builder.getStringAttr(stringifyEnum($0))"; } +def ShflKindAttr : EnumAttr; def NVVM_ShflOp : NVVM_Op<"shfl.sync">, @@ -122,7 +123,7 @@ LLVM_Type:$val, I32:$offset, I32:$mask_and_clamp, - ShflKind:$kind, + ShflKindAttr:$kind, OptionalAttr:$return_value_and_is_valid)> { string llvmBuilder = [{ auto intId = getShflIntrinsicId( @@ -420,52 +421,52 @@ string id = !foldl("", f, acc, el, acc # "\n" # el); } -def MMALayoutRow : StrEnumAttrCase<"row">; -def MMALayoutCol : StrEnumAttrCase<"col">; +def MMALayoutRow : I32EnumAttrCase<"row", 0>; +def MMALayoutCol : I32EnumAttrCase<"col", 1>; /// Enum attribute of the different matrix layout. -def MMALayout : StrEnumAttr<"MMALayout", "NVVM MMA layout", +def MMALayout : I32EnumAttr<"MMALayout", "NVVM MMA layout", [MMALayoutRow, MMALayoutCol]> { + let genSpecializedAttr = 0; let cppNamespace = "::mlir::NVVM"; - let storageType = "mlir::StringAttr"; - let returnType = "NVVM::MMALayout"; - let convertFromStorage = "*symbolizeEnum($_self.getValue())"; - let constBuilderCall = "$_builder.getStringAttr(stringifyEnum($0))"; +} +def MMALayoutAttr : EnumAttr { + let assemblyFormat = "`<` $value `>`"; } -def MMATypeF16 : StrEnumAttrCase<"f16">; -def MMATypeF32 : StrEnumAttrCase<"f32">; -def MMATypeTF32 : StrEnumAttrCase<"tf32">; +def MMATypeF16 : I32EnumAttrCase<"f16", 0>; +def MMATypeF32 : I32EnumAttrCase<"f32", 1>; +def MMATypeTF32 : I32EnumAttrCase<"tf32", 2>; /// Enum attribute of the different matrix types. -def MMATypes : StrEnumAttr<"MMATypes", "NVVM MMA types", +def MMATypes : I32EnumAttr<"MMATypes", "NVVM MMA types", [MMATypeF16, MMATypeF32, MMATypeTF32]> { + let genSpecializedAttr = 0; let cppNamespace = "::mlir::NVVM"; - let storageType = "mlir::StringAttr"; - let returnType = "NVVM::MMATypes"; - let convertFromStorage = "*symbolizeEnum($_self.getValue())"; - let constBuilderCall = "$_builder.getStringAttr(stringifyEnum($0))"; +} +def MMATypesAttr : EnumAttr { + let assemblyFormat = "`<` $value `>`"; } -def MMAFragA : StrEnumAttrCase<"a">; -def MMAFragB : StrEnumAttrCase<"b">; -def MMAFragC : StrEnumAttrCase<"c">; +def MMAFragA : I32EnumAttrCase<"a", 0>; +def MMAFragB : I32EnumAttrCase<"b", 1>; +def MMAFragC : I32EnumAttrCase<"c", 2>; /// Enum attribute of the different frag types. -def MMAFragAttr : StrEnumAttr<"MMAFrag", "NVVM MMA frag type", +def MMAFrag: I32EnumAttr<"MMAFrag", "NVVM MMA frag type", [MMAFragA, MMAFragB, MMAFragC]> { + let genSpecializedAttr = 0; let cppNamespace = "::mlir::NVVM"; - let storageType = "mlir::StringAttr"; - let returnType = "NVVM::MMAFrag"; - let convertFromStorage = "*symbolizeEnum($_self.getValue())"; - let constBuilderCall = "$_builder.getStringAttr(stringifyEnum($0))"; +} +def MMAFragAttr : EnumAttr { + let assemblyFormat = "`<` $value `>`"; } def NVVM_WMMALoadOp: NVVM_Op<"wmma.load">, Results<(outs LLVM_AnyStruct:$res)>, Arguments<(ins LLVM_AnyPointer: $ptr, I32: $stride, I32Attr:$m, - I32Attr:$n, I32Attr:$k, MMALayout:$layout, MMATypes:$eltype, - MMAFragAttr:$frag)> { + I32Attr:$n, I32Attr:$k, MMALayoutAttr:$layout, + MMATypesAttr:$eltype, MMAFragAttr:$frag)> { let summary = "Warp synchronous matrix load"; @@ -542,8 +543,8 @@ def NVVM_WMMAStoreOp : NVVM_Op<"wmma.store">, Arguments<(ins LLVM_AnyPointer: $ptr, - I32Attr:$m, I32Attr:$n, I32Attr:$k, MMALayout:$layout, - MMATypes:$eltype, Variadic:$args, I32: $stride)>{ + I32Attr:$m, I32Attr:$n, I32Attr:$k, MMALayoutAttr:$layout, + MMATypesAttr:$eltype, Variadic:$args, I32: $stride)>{ let summary = "Warp synchronous matrix store"; let extraClassDeclaration = @@ -598,9 +599,9 @@ // Base class for all the variants of WMMA mmaOps that may be defined. def NVVM_WMMAMmaOp : NVVM_Op<"wmma.mma">, Results<(outs LLVM_AnyStruct:$res)>, - Arguments<(ins I32Attr:$m, I32Attr:$n, I32Attr:$k, MMALayout:$layoutA, - MMALayout:$layoutB, MMATypes:$eltypeA, MMATypes:$eltypeB, - Variadic:$args)>{ + Arguments<(ins I32Attr:$m, I32Attr:$n, I32Attr:$k, MMALayoutAttr:$layoutA, + MMALayoutAttr:$layoutB, MMATypesAttr:$eltypeA, + MMATypesAttr:$eltypeB, Variadic:$args)>{ let summary = "Warp synchronous matrix-multiply accumulate using tensor cores."; let extraClassDeclaration = diff --git a/mlir/include/mlir/Dialect/OpenACC/CMakeLists.txt b/mlir/include/mlir/Dialect/OpenACC/CMakeLists.txt --- a/mlir/include/mlir/Dialect/OpenACC/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/OpenACC/CMakeLists.txt @@ -1,5 +1,5 @@ set(LLVM_TARGET_DEFINITIONS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend/OpenACC/ACC.td) -mlir_tablegen(AccCommon.td --gen-directive-decl) +mlir_tablegen(AccCommon.td --gen-directive-decl --directives-dialect=OpenACC) add_public_tablegen_target(acc_common_td) set(LLVM_TARGET_DEFINITIONS OpenACCOps.td) @@ -9,6 +9,8 @@ mlir_tablegen(OpenACCOps.cpp.inc -gen-op-defs) mlir_tablegen(OpenACCOpsEnums.h.inc -gen-enum-decls) mlir_tablegen(OpenACCOpsEnums.cpp.inc -gen-enum-defs) +mlir_tablegen(OpenACCOpsAttributes.h.inc -gen-attrdef-decls -attrdefs-dialect=acc) +mlir_tablegen(OpenACCOpsAttributes.cpp.inc -gen-attrdef-defs -attrdefs-dialect=acc) add_mlir_doc(OpenACCOps OpenACCDialect Dialects/ -gen-dialect-doc) add_public_tablegen_target(MLIROpenACCOpsIncGen) add_dependencies(OpenACCDialectDocGen acc_common_td) diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h --- a/mlir/include/mlir/Dialect/OpenACC/OpenACC.h +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACC.h @@ -19,6 +19,9 @@ #include "mlir/Dialect/OpenACC/OpenACCOpsDialect.h.inc" #include "mlir/Dialect/OpenACC/OpenACCOpsEnums.h.inc" +#define GET_ATTRDEF_CLASSES +#include "mlir/Dialect/OpenACC/OpenACCOpsAttributes.h.inc" + #define GET_OP_CLASSES #include "mlir/Dialect/OpenACC/OpenACCOps.h.inc" diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td @@ -13,8 +13,8 @@ #ifndef OPENACC_OPS #define OPENACC_OPS +include "mlir/IR/EnumAttr.td" include "mlir/IR/OpBase.td" -include "mlir/Dialect/OpenACC/AccCommon.td" def OpenACC_Dialect : Dialect { let name = "acc"; @@ -25,9 +25,13 @@ This dialect models the construct from the OpenACC 3.1 directive language. }]; + let useDefaultAttributePrinterParser = 1; let cppNamespace = "::mlir::acc"; } +// AccCommon requires definition of OpenACC_Dialect. +include "mlir/Dialect/OpenACC/AccCommon.td" + // Base class for OpenACC dialect ops. class OpenACC_Op traits = []> : Op { @@ -38,19 +42,19 @@ } // Reduction operation enumeration. -def OpenACC_ReductionOpAdd : StrEnumAttrCase<"redop_add">; -def OpenACC_ReductionOpMul : StrEnumAttrCase<"redop_mul">; -def OpenACC_ReductionOpMax : StrEnumAttrCase<"redop_max">; -def OpenACC_ReductionOpMin : StrEnumAttrCase<"redop_min">; -def OpenACC_ReductionOpAnd : StrEnumAttrCase<"redop_and">; -def OpenACC_ReductionOpOr : StrEnumAttrCase<"redop_or">; -def OpenACC_ReductionOpXor : StrEnumAttrCase<"redop_xor">; -def OpenACC_ReductionOpLogEqv : StrEnumAttrCase<"redop_leqv">; -def OpenACC_ReductionOpLogNeqv : StrEnumAttrCase<"redop_lneqv">; -def OpenACC_ReductionOpLogAnd : StrEnumAttrCase<"redop_land">; -def OpenACC_ReductionOpLogOr : StrEnumAttrCase<"redop_lor">; - -def OpenACC_ReductionOpAttr : StrEnumAttr<"ReductionOpAttr", +def OpenACC_ReductionOpAdd : I32EnumAttrCase<"redop_add", 0>; +def OpenACC_ReductionOpMul : I32EnumAttrCase<"redop_mul", 1>; +def OpenACC_ReductionOpMax : I32EnumAttrCase<"redop_max", 2>; +def OpenACC_ReductionOpMin : I32EnumAttrCase<"redop_min", 3>; +def OpenACC_ReductionOpAnd : I32EnumAttrCase<"redop_and", 4>; +def OpenACC_ReductionOpOr : I32EnumAttrCase<"redop_or", 5>; +def OpenACC_ReductionOpXor : I32EnumAttrCase<"redop_xor", 6>; +def OpenACC_ReductionOpLogEqv : I32EnumAttrCase<"redop_leqv", 7>; +def OpenACC_ReductionOpLogNeqv : I32EnumAttrCase<"redop_lneqv", 8>; +def OpenACC_ReductionOpLogAnd : I32EnumAttrCase<"redop_land", 9>; +def OpenACC_ReductionOpLogOr : I32EnumAttrCase<"redop_lor", 10>; + +def OpenACC_ReductionOp : I32EnumAttr<"ReductionOp", "built-in reduction operations supported by OpenACC", [OpenACC_ReductionOpAdd, OpenACC_ReductionOpMul, OpenACC_ReductionOpMax, OpenACC_ReductionOpMin, OpenACC_ReductionOpAnd, OpenACC_ReductionOpOr, @@ -58,8 +62,11 @@ OpenACC_ReductionOpLogNeqv, OpenACC_ReductionOpLogAnd, OpenACC_ReductionOpLogOr ]> { + let genSpecializedAttr = 0; let cppNamespace = "::mlir::acc"; } +def OpenACC_ReductionOpAttr : EnumAttr; // Type used in operation below. def IntOrIndex : AnyTypeOf<[AnyInteger, Index]>; @@ -110,7 +117,7 @@ Variadic:$attachOperands, Variadic:$gangPrivateOperands, Variadic:$gangFirstPrivateOperands, - OptionalAttr:$defaultAttr); + OptionalAttr:$defaultAttr); let regions = (region AnyRegion:$region); @@ -188,7 +195,7 @@ Variadic:$presentOperands, Variadic:$deviceptrOperands, Variadic:$attachOperands, - OptionalAttr:$defaultAttr); + OptionalAttr:$defaultAttr); let regions = (region AnyRegion:$region); diff --git a/mlir/include/mlir/Dialect/OpenMP/CMakeLists.txt b/mlir/include/mlir/Dialect/OpenMP/CMakeLists.txt --- a/mlir/include/mlir/Dialect/OpenMP/CMakeLists.txt +++ b/mlir/include/mlir/Dialect/OpenMP/CMakeLists.txt @@ -1,5 +1,5 @@ set(LLVM_TARGET_DEFINITIONS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend/OpenMP/OMP.td) -mlir_tablegen(OmpCommon.td --gen-directive-decl) +mlir_tablegen(OmpCommon.td --gen-directive-decl --directives-dialect=OpenMP) add_public_tablegen_target(omp_common_td) set(LLVM_TARGET_DEFINITIONS OpenMPOps.td) @@ -11,6 +11,8 @@ mlir_tablegen(OpenMPOpsEnums.cpp.inc -gen-enum-defs) mlir_tablegen(OpenMPTypeInterfaces.h.inc -gen-type-interface-decls) mlir_tablegen(OpenMPTypeInterfaces.cpp.inc -gen-type-interface-defs) +mlir_tablegen(OpenMPOpsAttributes.h.inc -gen-attrdef-decls -attrdefs-dialect=omp) +mlir_tablegen(OpenMPOpsAttributes.cpp.inc -gen-attrdef-defs -attrdefs-dialect=omp) add_mlir_doc(OpenMPOps OpenMPDialect Dialects/ -gen-dialect-doc) add_public_tablegen_target(MLIROpenMPOpsIncGen) add_dependencies(OpenMPDialectDocGen omp_common_td) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPDialect.h @@ -25,6 +25,9 @@ #include "mlir/Dialect/OpenMP/OpenMPOpsInterfaces.h.inc" #include "mlir/Dialect/OpenMP/OpenMPTypeInterfaces.h.inc" +#define GET_ATTRDEF_CLASSES +#include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.h.inc" + #define GET_OP_CLASSES #include "mlir/Dialect/OpenMP/OpenMPOps.h.inc" diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -14,11 +14,11 @@ #ifndef OPENMP_OPS #define OPENMP_OPS +include "mlir/IR/EnumAttr.td" include "mlir/IR/OpBase.td" include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/IR/SymbolInterfaces.td" -include "mlir/Dialect/OpenMP/OmpCommon.td" include "mlir/Dialect/LLVMIR/LLVMOpBase.td" include "mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td" @@ -26,8 +26,12 @@ let name = "omp"; let cppNamespace = "::mlir::omp"; let dependentDialects = ["::mlir::LLVM::LLVMDialect"]; + let useDefaultAttributePrinterParser = 1; } +// OmpCommon requires definition of OpenACC_Dialect. +include "mlir/Dialect/OpenMP/OmpCommon.td" + class OpenMP_Op traits = []> : Op; @@ -60,18 +64,21 @@ //===----------------------------------------------------------------------===// // Possible values for the default clause -def ClauseDefaultPrivate : StrEnumAttrCase<"defprivate">; -def ClauseDefaultFirstPrivate : StrEnumAttrCase<"deffirstprivate">; -def ClauseDefaultShared : StrEnumAttrCase<"defshared">; -def ClauseDefaultNone : StrEnumAttrCase<"defnone">; +def ClauseDefaultPrivate : I32EnumAttrCase<"defprivate", 0>; +def ClauseDefaultFirstPrivate : I32EnumAttrCase<"deffirstprivate", 1>; +def ClauseDefaultShared : I32EnumAttrCase<"defshared", 2>; +def ClauseDefaultNone : I32EnumAttrCase<"defnone", 3>; -def ClauseDefault : StrEnumAttr< +def ClauseDefault : I32EnumAttr< "ClauseDefault", "default clause", [ClauseDefaultPrivate, ClauseDefaultFirstPrivate, ClauseDefaultShared, ClauseDefaultNone]> { + let genSpecializedAttr = 0; let cppNamespace = "::mlir::omp"; } +def ClauseDefaultAttr : EnumAttr; def ParallelOp : OpenMP_Op<"parallel", [AttrSizedOperandSegments, DeclareOpInterfaceMethods]> { @@ -105,14 +112,14 @@ let arguments = (ins Optional:$if_expr_var, Optional:$num_threads_var, - OptionalAttr:$default_val, + OptionalAttr:$default_val, Variadic:$private_vars, Variadic:$firstprivate_vars, Variadic:$shared_vars, Variadic:$copyin_vars, Variadic:$allocate_vars, Variadic:$allocators_vars, - OptionalAttr:$proc_bind_val); + OptionalAttr:$proc_bind_val); let regions = (region AnyRegion:$region); @@ -136,19 +143,20 @@ let assemblyFormat = "attr-dict"; } -def OMP_SCHEDULE_MOD_None : StrEnumAttrCase<"none", 0>; -def OMP_SCHEDULE_MOD_Monotonic : StrEnumAttrCase<"monotonic", 1>; -def OMP_SCHEDULE_MOD_Nonmonotonic : StrEnumAttrCase<"nonmonotonic", 2>; -def OMP_SCHEDULE_MOD_SIMD : StrEnumAttrCase<"simd", 3>; - -def ScheduleModifier : StrEnumAttr<"ScheduleModifier", "OpenMP Schedule Modifier", - [OMP_SCHEDULE_MOD_None, - OMP_SCHEDULE_MOD_Monotonic, - OMP_SCHEDULE_MOD_Nonmonotonic, - OMP_SCHEDULE_MOD_SIMD]> -{ - let cppNamespace = "::mlir::omp"; +def OMP_ScheduleModNone : I32EnumAttrCase<"none", 0>; +def OMP_ScheduleModMonotonic : I32EnumAttrCase<"monotonic", 1>; +def OMP_ScheduleModNonmonotonic : I32EnumAttrCase<"nonmonotonic", 2>; +def OMP_ScheduleModSIMD : I32EnumAttrCase<"simd", 3>; + +def ScheduleModifier + : I32EnumAttr<"ScheduleModifier", "OpenMP Schedule Modifier", + [OMP_ScheduleModNone, OMP_ScheduleModMonotonic, + OMP_ScheduleModNonmonotonic, OMP_ScheduleModSIMD]> { + let genSpecializedAttr = 0; + let cppNamespace = "::mlir::omp"; } +def ScheduleModifierAttr : EnumAttr; //===----------------------------------------------------------------------===// // 2.8.1 Sections Construct @@ -288,14 +296,14 @@ Variadic:$linear_step_vars, Variadic:$reduction_vars, OptionalAttr:$reductions, - OptionalAttr:$schedule_val, + OptionalAttr:$schedule_val, Optional:$schedule_chunk_var, - OptionalAttr:$schedule_modifier, + OptionalAttr:$schedule_modifier, UnitAttr:$simd_modifier, Confined, [IntMinValue<0>]>:$collapse_val, UnitAttr:$nowait, Confined, [IntMinValue<0>]>:$ordered_val, - OptionalAttr:$order_val, + OptionalAttr:$order_val, UnitAttr:$inclusive); let skipDefaultBuilders = 1; @@ -487,15 +495,19 @@ // [5.1] 2.19.9 ordered Construct //===----------------------------------------------------------------------===// -def ClauseDependSource : StrEnumAttrCase<"dependsource">; -def ClauseDependSink : StrEnumAttrCase<"dependsink">; +def ClauseDependSource : I32EnumAttrCase<"dependsource", 0>; +def ClauseDependSink : I32EnumAttrCase<"dependsink", 1>; -def ClauseDepend : StrEnumAttr< +def ClauseDepend : I32EnumAttr< "ClauseDepend", "depend clause", [ClauseDependSource, ClauseDependSink]> { + let genSpecializedAttr = 0; let cppNamespace = "::mlir::omp"; } +def ClauseDependAttr : EnumAttr { + let assemblyFormat = "`(` $value `)`"; +} def OrderedOp : OpenMP_Op<"ordered"> { let summary = "ordered construct without region"; @@ -516,12 +528,12 @@ clauses exist in one ORDERED directive. }]; - let arguments = (ins OptionalAttr:$depend_type_val, + let arguments = (ins OptionalAttr:$depend_type_val, Confined, [IntMinValue<0>]>:$num_loops_val, Variadic:$depend_vec_vars); let assemblyFormat = [{ - ( `depend_type` `(` $depend_type_val^ `)` )? + ( `depend_type` `` $depend_type_val^ )? ( `depend_vec` `(` $depend_vec_vars^ `:` type($depend_vec_vars) `)` )? attr-dict }]; @@ -596,7 +608,7 @@ let arguments = (ins OpenMP_PointerLikeType:$x, OpenMP_PointerLikeType:$v, DefaultValuedAttr:$hint, - OptionalAttr:$memory_order); + OptionalAttr:$memory_order); let parser = [{ return parseAtomicReadOp(parser, result); }]; let printer = [{ return printAtomicReadOp(p, *this); }]; let verifier = [{ return verifyAtomicReadOp(*this); }]; @@ -625,7 +637,7 @@ let arguments = (ins OpenMP_PointerLikeType:$address, AnyType:$value, DefaultValuedAttr:$hint, - OptionalAttr:$memory_order); + OptionalAttr:$memory_order); let parser = [{ return parseAtomicWriteOp(parser, result); }]; let printer = [{ return printAtomicWriteOp(p, *this); }]; let verifier = [{ return verifyAtomicWriteOp(*this); }]; @@ -690,7 +702,7 @@ UnitAttr:$isXBinopExpr, AtomicBinOpKindAttr:$binop, DefaultValuedAttr:$hint, - OptionalAttr:$memory_order); + OptionalAttr:$memory_order); let parser = [{ return parseAtomicUpdateOp(parser, result); }]; let printer = [{ return printAtomicUpdateOp(p, *this); }]; let verifier = [{ return verifyAtomicUpdateOp(*this); }]; diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVAttributes.h b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVAttributes.h --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVAttributes.h +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVAttributes.h @@ -23,9 +23,9 @@ namespace mlir { namespace spirv { enum class Capability : uint32_t; -enum class DeviceType; -enum class Extension; -enum class Vendor; +enum class DeviceType : uint32_t; +enum class Extension : uint32_t; +enum class Vendor : uint32_t; enum class Version : uint32_t; namespace detail { diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td @@ -15,6 +15,7 @@ #ifndef MLIR_DIALECT_SPIRV_IR_BASE #define MLIR_DIALECT_SPIRV_IR_BASE +include "mlir/IR/EnumAttr.td" include "mlir/IR/OpBase.td" include "mlir/Dialect/SPIRV/IR/SPIRVAvailability.td" @@ -97,17 +98,15 @@ let cppNamespace = "::mlir::spirv"; } -// Wrapper over base StrEnumAttr to set common fields. -class SPV_StrEnumAttr cases> : - StrEnumAttr { - let predicate = And<[ - StrAttr.predicate, - CPred<"::mlir::spirv::symbolize" # name # "(" - "$_self.cast().getValue()).hasValue()">, - ]>; +// Wrapper over base I32EnumAttr to set common fields. +class SPV_Enum cases> + : I32EnumAttr { + let genSpecializedAttr = 0; let cppNamespace = "::mlir::spirv"; } +class SPV_EnumAttr cases> : + EnumAttr, mnemonic>; //===----------------------------------------------------------------------===// // SPIR-V availability definitions @@ -147,7 +146,7 @@ }]; } -class Extension extensions> : Availability { +class Extension extensions> : Availability { let cppNamespace = "::mlir::spirv"; let interfaceName = "QueryExtensionInterface"; let interfaceDescription = [{ @@ -263,31 +262,31 @@ // SPIR-V target GPU vendor and device definitions //===----------------------------------------------------------------------===// -def SPV_DT_CPU : StrEnumAttrCase<"CPU">; -def SPV_DT_DiscreteGPU : StrEnumAttrCase<"DiscreteGPU">; -def SPV_DT_IntegratedGPU : StrEnumAttrCase<"IntegratedGPU">; +def SPV_DT_CPU : I32EnumAttrCase<"CPU", 0>; +def SPV_DT_DiscreteGPU : I32EnumAttrCase<"DiscreteGPU", 1>; +def SPV_DT_IntegratedGPU : I32EnumAttrCase<"IntegratedGPU", 2>; // An accelerator other than GPU or CPU -def SPV_DT_Other : StrEnumAttrCase<"Other">; +def SPV_DT_Other : I32EnumAttrCase<"Other", 3>; // Information missing. -def SPV_DT_Unknown : StrEnumAttrCase<"Unknown">; +def SPV_DT_Unknown : I32EnumAttrCase<"Unknown", 4>; -def SPV_DeviceTypeAttr : SPV_StrEnumAttr< - "DeviceType", "valid SPIR-V device types", [ +def SPV_DeviceTypeAttr : SPV_EnumAttr< + "DeviceType", "valid SPIR-V device types", "device_type", [ SPV_DT_Other, SPV_DT_IntegratedGPU, SPV_DT_DiscreteGPU, SPV_DT_CPU, SPV_DT_Unknown ]>; -def SPV_V_AMD : StrEnumAttrCase<"AMD">; -def SPV_V_ARM : StrEnumAttrCase<"ARM">; -def SPV_V_Imagination : StrEnumAttrCase<"Imagination">; -def SPV_V_Intel : StrEnumAttrCase<"Intel">; -def SPV_V_NVIDIA : StrEnumAttrCase<"NVIDIA">; -def SPV_V_Qualcomm : StrEnumAttrCase<"Qualcomm">; -def SPV_V_SwiftShader : StrEnumAttrCase<"SwiftShader">; -def SPV_V_Unknown : StrEnumAttrCase<"Unknown">; - -def SPV_VendorAttr : SPV_StrEnumAttr< - "Vendor", "recognized SPIR-V vendor strings", [ +def SPV_V_AMD : I32EnumAttrCase<"AMD", 0>; +def SPV_V_ARM : I32EnumAttrCase<"ARM", 1>; +def SPV_V_Imagination : I32EnumAttrCase<"Imagination", 2>; +def SPV_V_Intel : I32EnumAttrCase<"Intel", 3>; +def SPV_V_NVIDIA : I32EnumAttrCase<"NVIDIA", 4>; +def SPV_V_Qualcomm : I32EnumAttrCase<"Qualcomm", 5>; +def SPV_V_SwiftShader : I32EnumAttrCase<"SwiftShader", 6>; +def SPV_V_Unknown : I32EnumAttrCase<"Unknown", 7>; + +def SPV_VendorAttr : SPV_EnumAttr< + "Vendor", "recognized SPIR-V vendor strings", "vendor", [ SPV_V_AMD, SPV_V_ARM, SPV_V_Imagination, SPV_V_Intel, SPV_V_NVIDIA, SPV_V_Qualcomm, SPV_V_SwiftShader, SPV_V_Unknown @@ -299,105 +298,105 @@ // Extensions known to the SPIR-V dialect. // https://github.com/KhronosGroup/SPIRV-Registry has the full list. -def SPV_KHR_16bit_storage : StrEnumAttrCase<"SPV_KHR_16bit_storage">; -def SPV_KHR_8bit_storage : StrEnumAttrCase<"SPV_KHR_8bit_storage">; -def SPV_KHR_device_group : StrEnumAttrCase<"SPV_KHR_device_group">; -def SPV_KHR_float_controls : StrEnumAttrCase<"SPV_KHR_float_controls">; -def SPV_KHR_physical_storage_buffer : StrEnumAttrCase<"SPV_KHR_physical_storage_buffer">; -def SPV_KHR_multiview : StrEnumAttrCase<"SPV_KHR_multiview">; -def SPV_KHR_no_integer_wrap_decoration : StrEnumAttrCase<"SPV_KHR_no_integer_wrap_decoration">; -def SPV_KHR_post_depth_coverage : StrEnumAttrCase<"SPV_KHR_post_depth_coverage">; -def SPV_KHR_shader_atomic_counter_ops : StrEnumAttrCase<"SPV_KHR_shader_atomic_counter_ops">; -def SPV_KHR_shader_ballot : StrEnumAttrCase<"SPV_KHR_shader_ballot">; -def SPV_KHR_shader_clock : StrEnumAttrCase<"SPV_KHR_shader_clock">; -def SPV_KHR_shader_draw_parameters : StrEnumAttrCase<"SPV_KHR_shader_draw_parameters">; -def SPV_KHR_storage_buffer_storage_class : StrEnumAttrCase<"SPV_KHR_storage_buffer_storage_class">; -def SPV_KHR_subgroup_vote : StrEnumAttrCase<"SPV_KHR_subgroup_vote">; -def SPV_KHR_variable_pointers : StrEnumAttrCase<"SPV_KHR_variable_pointers">; -def SPV_KHR_vulkan_memory_model : StrEnumAttrCase<"SPV_KHR_vulkan_memory_model">; -def SPV_KHR_expect_assume : StrEnumAttrCase<"SPV_KHR_expect_assume">; -def SPV_KHR_integer_dot_product : StrEnumAttrCase<"SPV_KHR_integer_dot_product">; -def SPV_KHR_bit_instructions : StrEnumAttrCase<"SPV_KHR_bit_instructions">; -def SPV_KHR_fragment_shading_rate : StrEnumAttrCase<"SPV_KHR_fragment_shading_rate">; -def SPV_KHR_workgroup_memory_explicit_layout : StrEnumAttrCase<"SPV_KHR_workgroup_memory_explicit_layout">; -def SPV_KHR_ray_query : StrEnumAttrCase<"SPV_KHR_ray_query">; -def SPV_KHR_ray_tracing : StrEnumAttrCase<"SPV_KHR_ray_tracing">; -def SPV_KHR_subgroup_uniform_control_flow : StrEnumAttrCase<"SPV_KHR_subgroup_uniform_control_flow">; -def SPV_KHR_linkonce_odr : StrEnumAttrCase<"SPV_KHR_linkonce_odr">; - -def SPV_EXT_demote_to_helper_invocation : StrEnumAttrCase<"SPV_EXT_demote_to_helper_invocation">; -def SPV_EXT_descriptor_indexing : StrEnumAttrCase<"SPV_EXT_descriptor_indexing">; -def SPV_EXT_fragment_fully_covered : StrEnumAttrCase<"SPV_EXT_fragment_fully_covered">; -def SPV_EXT_fragment_invocation_density : StrEnumAttrCase<"SPV_EXT_fragment_invocation_density">; -def SPV_EXT_fragment_shader_interlock : StrEnumAttrCase<"SPV_EXT_fragment_shader_interlock">; -def SPV_EXT_physical_storage_buffer : StrEnumAttrCase<"SPV_EXT_physical_storage_buffer">; -def SPV_EXT_shader_stencil_export : StrEnumAttrCase<"SPV_EXT_shader_stencil_export">; -def SPV_EXT_shader_viewport_index_layer : StrEnumAttrCase<"SPV_EXT_shader_viewport_index_layer">; -def SPV_EXT_shader_atomic_float_add : StrEnumAttrCase<"SPV_EXT_shader_atomic_float_add">; -def SPV_EXT_shader_atomic_float_min_max : StrEnumAttrCase<"SPV_EXT_shader_atomic_float_min_max">; -def SPV_EXT_shader_image_int64 : StrEnumAttrCase<"SPV_EXT_shader_image_int64">; -def SPV_EXT_shader_atomic_float16_add : StrEnumAttrCase<"SPV_EXT_shader_atomic_float16_add">; - -def SPV_AMD_gpu_shader_half_float_fetch : StrEnumAttrCase<"SPV_AMD_gpu_shader_half_float_fetch">; -def SPV_AMD_shader_ballot : StrEnumAttrCase<"SPV_AMD_shader_ballot">; -def SPV_AMD_shader_explicit_vertex_parameter : StrEnumAttrCase<"SPV_AMD_shader_explicit_vertex_parameter">; -def SPV_AMD_shader_fragment_mask : StrEnumAttrCase<"SPV_AMD_shader_fragment_mask">; -def SPV_AMD_shader_image_load_store_lod : StrEnumAttrCase<"SPV_AMD_shader_image_load_store_lod">; -def SPV_AMD_texture_gather_bias_lod : StrEnumAttrCase<"SPV_AMD_texture_gather_bias_lod">; - -def SPV_GOOGLE_decorate_string : StrEnumAttrCase<"SPV_GOOGLE_decorate_string">; -def SPV_GOOGLE_hlsl_functionality1 : StrEnumAttrCase<"SPV_GOOGLE_hlsl_functionality1">; -def SPV_GOOGLE_user_type : StrEnumAttrCase<"SPV_GOOGLE_user_type">; - -def SPV_INTEL_device_side_avc_motion_estimation : StrEnumAttrCase<"SPV_INTEL_device_side_avc_motion_estimation">; -def SPV_INTEL_media_block_io : StrEnumAttrCase<"SPV_INTEL_media_block_io">; -def SPV_INTEL_shader_integer_functions2 : StrEnumAttrCase<"SPV_INTEL_shader_integer_functions2">; -def SPV_INTEL_subgroups : StrEnumAttrCase<"SPV_INTEL_subgroups">; -def SPV_INTEL_float_controls2 : StrEnumAttrCase<"SPV_INTEL_float_controls2">; -def SPV_INTEL_function_pointers : StrEnumAttrCase<"SPV_INTEL_function_pointers">; -def SPV_INTEL_inline_assembly : StrEnumAttrCase<"SPV_INTEL_inline_assembly">; -def SPV_INTEL_vector_compute : StrEnumAttrCase<"SPV_INTEL_vector_compute">; -def SPV_INTEL_variable_length_array : StrEnumAttrCase<"SPV_INTEL_variable_length_array">; -def SPV_INTEL_fpga_memory_attributes : StrEnumAttrCase<"SPV_INTEL_fpga_memory_attributes">; -def SPV_INTEL_arbitrary_precision_integers : StrEnumAttrCase<"SPV_INTEL_arbitrary_precision_integers">; -def SPV_INTEL_arbitrary_precision_floating_point : StrEnumAttrCase<"SPV_INTEL_arbitrary_precision_floating_point">; -def SPV_INTEL_unstructured_loop_controls : StrEnumAttrCase<"SPV_INTEL_unstructured_loop_controls">; -def SPV_INTEL_fpga_loop_controls : StrEnumAttrCase<"SPV_INTEL_fpga_loop_controls">; -def SPV_INTEL_kernel_attributes : StrEnumAttrCase<"SPV_INTEL_kernel_attributes">; -def SPV_INTEL_fpga_memory_accesses : StrEnumAttrCase<"SPV_INTEL_fpga_memory_accesses">; -def SPV_INTEL_fpga_cluster_attributes : StrEnumAttrCase<"SPV_INTEL_fpga_cluster_attributes">; -def SPV_INTEL_loop_fuse : StrEnumAttrCase<"SPV_INTEL_loop_fuse">; -def SPV_INTEL_fpga_buffer_location : StrEnumAttrCase<"SPV_INTEL_fpga_buffer_location">; -def SPV_INTEL_arbitrary_precision_fixed_point : StrEnumAttrCase<"SPV_INTEL_arbitrary_precision_fixed_point">; -def SPV_INTEL_usm_storage_classes : StrEnumAttrCase<"SPV_INTEL_usm_storage_classes">; -def SPV_INTEL_io_pipes : StrEnumAttrCase<"SPV_INTEL_io_pipes">; -def SPV_INTEL_blocking_pipes : StrEnumAttrCase<"SPV_INTEL_blocking_pipes">; -def SPV_INTEL_fpga_reg : StrEnumAttrCase<"SPV_INTEL_fpga_reg">; -def SPV_INTEL_long_constant_composite : StrEnumAttrCase<"SPV_INTEL_long_constant_composite">; -def SPV_INTEL_optnone : StrEnumAttrCase<"SPV_INTEL_optnone">; -def SPV_INTEL_debug_module : StrEnumAttrCase<"SPV_INTEL_debug_module">; -def SPV_INTEL_fp_fast_math_mode : StrEnumAttrCase<"SPV_INTEL_fp_fast_math_mode">; - -def SPV_NV_compute_shader_derivatives : StrEnumAttrCase<"SPV_NV_compute_shader_derivatives">; -def SPV_NV_cooperative_matrix : StrEnumAttrCase<"SPV_NV_cooperative_matrix">; -def SPV_NV_fragment_shader_barycentric : StrEnumAttrCase<"SPV_NV_fragment_shader_barycentric">; -def SPV_NV_geometry_shader_passthrough : StrEnumAttrCase<"SPV_NV_geometry_shader_passthrough">; -def SPV_NV_mesh_shader : StrEnumAttrCase<"SPV_NV_mesh_shader">; -def SPV_NV_ray_tracing : StrEnumAttrCase<"SPV_NV_ray_tracing">; -def SPV_NV_sample_mask_override_coverage : StrEnumAttrCase<"SPV_NV_sample_mask_override_coverage">; -def SPV_NV_shader_image_footprint : StrEnumAttrCase<"SPV_NV_shader_image_footprint">; -def SPV_NV_shader_sm_builtins : StrEnumAttrCase<"SPV_NV_shader_sm_builtins">; -def SPV_NV_shader_subgroup_partitioned : StrEnumAttrCase<"SPV_NV_shader_subgroup_partitioned">; -def SPV_NV_shading_rate : StrEnumAttrCase<"SPV_NV_shading_rate">; -def SPV_NV_stereo_view_rendering : StrEnumAttrCase<"SPV_NV_stereo_view_rendering">; -def SPV_NV_viewport_array2 : StrEnumAttrCase<"SPV_NV_viewport_array2">; -def SPV_NV_bindless_texture : StrEnumAttrCase<"SPV_NV_bindless_texture">; -def SPV_NV_ray_tracing_motion_blur : StrEnumAttrCase<"SPV_NV_ray_tracing_motion_blur">; - -def SPV_NVX_multiview_per_view_attributes : StrEnumAttrCase<"SPV_NVX_multiview_per_view_attributes">; +def SPV_KHR_16bit_storage : I32EnumAttrCase<"SPV_KHR_16bit_storage", 0>; +def SPV_KHR_8bit_storage : I32EnumAttrCase<"SPV_KHR_8bit_storage", 1>; +def SPV_KHR_device_group : I32EnumAttrCase<"SPV_KHR_device_group", 2>; +def SPV_KHR_float_controls : I32EnumAttrCase<"SPV_KHR_float_controls", 3>; +def SPV_KHR_physical_storage_buffer : I32EnumAttrCase<"SPV_KHR_physical_storage_buffer", 4>; +def SPV_KHR_multiview : I32EnumAttrCase<"SPV_KHR_multiview", 5>; +def SPV_KHR_no_integer_wrap_decoration : I32EnumAttrCase<"SPV_KHR_no_integer_wrap_decoration", 6>; +def SPV_KHR_post_depth_coverage : I32EnumAttrCase<"SPV_KHR_post_depth_coverage", 7>; +def SPV_KHR_shader_atomic_counter_ops : I32EnumAttrCase<"SPV_KHR_shader_atomic_counter_ops", 8>; +def SPV_KHR_shader_ballot : I32EnumAttrCase<"SPV_KHR_shader_ballot", 9>; +def SPV_KHR_shader_clock : I32EnumAttrCase<"SPV_KHR_shader_clock", 10>; +def SPV_KHR_shader_draw_parameters : I32EnumAttrCase<"SPV_KHR_shader_draw_parameters", 11>; +def SPV_KHR_storage_buffer_storage_class : I32EnumAttrCase<"SPV_KHR_storage_buffer_storage_class", 12>; +def SPV_KHR_subgroup_vote : I32EnumAttrCase<"SPV_KHR_subgroup_vote", 13>; +def SPV_KHR_variable_pointers : I32EnumAttrCase<"SPV_KHR_variable_pointers", 14>; +def SPV_KHR_vulkan_memory_model : I32EnumAttrCase<"SPV_KHR_vulkan_memory_model", 15>; +def SPV_KHR_expect_assume : I32EnumAttrCase<"SPV_KHR_expect_assume", 16>; +def SPV_KHR_integer_dot_product : I32EnumAttrCase<"SPV_KHR_integer_dot_product", 17>; +def SPV_KHR_bit_instructions : I32EnumAttrCase<"SPV_KHR_bit_instructions", 18>; +def SPV_KHR_fragment_shading_rate : I32EnumAttrCase<"SPV_KHR_fragment_shading_rate", 19>; +def SPV_KHR_workgroup_memory_explicit_layout : I32EnumAttrCase<"SPV_KHR_workgroup_memory_explicit_layout", 20>; +def SPV_KHR_ray_query : I32EnumAttrCase<"SPV_KHR_ray_query", 21>; +def SPV_KHR_ray_tracing : I32EnumAttrCase<"SPV_KHR_ray_tracing", 22>; +def SPV_KHR_subgroup_uniform_control_flow : I32EnumAttrCase<"SPV_KHR_subgroup_uniform_control_flow", 23>; +def SPV_KHR_linkonce_odr : I32EnumAttrCase<"SPV_KHR_linkonce_odr", 24>; + +def SPV_EXT_demote_to_helper_invocation : I32EnumAttrCase<"SPV_EXT_demote_to_helper_invocation", 25>; +def SPV_EXT_descriptor_indexing : I32EnumAttrCase<"SPV_EXT_descriptor_indexing", 26>; +def SPV_EXT_fragment_fully_covered : I32EnumAttrCase<"SPV_EXT_fragment_fully_covered", 27>; +def SPV_EXT_fragment_invocation_density : I32EnumAttrCase<"SPV_EXT_fragment_invocation_density", 28>; +def SPV_EXT_fragment_shader_interlock : I32EnumAttrCase<"SPV_EXT_fragment_shader_interlock", 29>; +def SPV_EXT_physical_storage_buffer : I32EnumAttrCase<"SPV_EXT_physical_storage_buffer", 30>; +def SPV_EXT_shader_stencil_export : I32EnumAttrCase<"SPV_EXT_shader_stencil_export", 31>; +def SPV_EXT_shader_viewport_index_layer : I32EnumAttrCase<"SPV_EXT_shader_viewport_index_layer", 32>; +def SPV_EXT_shader_atomic_float_add : I32EnumAttrCase<"SPV_EXT_shader_atomic_float_add", 33>; +def SPV_EXT_shader_atomic_float_min_max : I32EnumAttrCase<"SPV_EXT_shader_atomic_float_min_max", 34>; +def SPV_EXT_shader_image_int64 : I32EnumAttrCase<"SPV_EXT_shader_image_int64", 35>; +def SPV_EXT_shader_atomic_float16_add : I32EnumAttrCase<"SPV_EXT_shader_atomic_float16_add", 36>; + +def SPV_AMD_gpu_shader_half_float_fetch : I32EnumAttrCase<"SPV_AMD_gpu_shader_half_float_fetch", 37>; +def SPV_AMD_shader_ballot : I32EnumAttrCase<"SPV_AMD_shader_ballot", 38>; +def SPV_AMD_shader_explicit_vertex_parameter : I32EnumAttrCase<"SPV_AMD_shader_explicit_vertex_parameter", 39>; +def SPV_AMD_shader_fragment_mask : I32EnumAttrCase<"SPV_AMD_shader_fragment_mask", 40>; +def SPV_AMD_shader_image_load_store_lod : I32EnumAttrCase<"SPV_AMD_shader_image_load_store_lod", 41>; +def SPV_AMD_texture_gather_bias_lod : I32EnumAttrCase<"SPV_AMD_texture_gather_bias_lod", 42>; + +def SPV_GOOGLE_decorate_string : I32EnumAttrCase<"SPV_GOOGLE_decorate_string", 43>; +def SPV_GOOGLE_hlsl_functionality1 : I32EnumAttrCase<"SPV_GOOGLE_hlsl_functionality1", 44>; +def SPV_GOOGLE_user_type : I32EnumAttrCase<"SPV_GOOGLE_user_type", 45>; + +def SPV_INTEL_device_side_avc_motion_estimation : I32EnumAttrCase<"SPV_INTEL_device_side_avc_motion_estimation", 46>; +def SPV_INTEL_media_block_io : I32EnumAttrCase<"SPV_INTEL_media_block_io", 47>; +def SPV_INTEL_shader_integer_functions2 : I32EnumAttrCase<"SPV_INTEL_shader_integer_functions2", 48>; +def SPV_INTEL_subgroups : I32EnumAttrCase<"SPV_INTEL_subgroups", 49>; +def SPV_INTEL_float_controls2 : I32EnumAttrCase<"SPV_INTEL_float_controls2", 50>; +def SPV_INTEL_function_pointers : I32EnumAttrCase<"SPV_INTEL_function_pointers", 51>; +def SPV_INTEL_inline_assembly : I32EnumAttrCase<"SPV_INTEL_inline_assembly", 52>; +def SPV_INTEL_vector_compute : I32EnumAttrCase<"SPV_INTEL_vector_compute", 53>; +def SPV_INTEL_variable_length_array : I32EnumAttrCase<"SPV_INTEL_variable_length_array", 54>; +def SPV_INTEL_fpga_memory_attributes : I32EnumAttrCase<"SPV_INTEL_fpga_memory_attributes", 55>; +def SPV_INTEL_arbitrary_precision_integers : I32EnumAttrCase<"SPV_INTEL_arbitrary_precision_integers", 56>; +def SPV_INTEL_arbitrary_precision_floating_point : I32EnumAttrCase<"SPV_INTEL_arbitrary_precision_floating_point", 57>; +def SPV_INTEL_unstructured_loop_controls : I32EnumAttrCase<"SPV_INTEL_unstructured_loop_controls", 58>; +def SPV_INTEL_fpga_loop_controls : I32EnumAttrCase<"SPV_INTEL_fpga_loop_controls", 59>; +def SPV_INTEL_kernel_attributes : I32EnumAttrCase<"SPV_INTEL_kernel_attributes", 60>; +def SPV_INTEL_fpga_memory_accesses : I32EnumAttrCase<"SPV_INTEL_fpga_memory_accesses", 61>; +def SPV_INTEL_fpga_cluster_attributes : I32EnumAttrCase<"SPV_INTEL_fpga_cluster_attributes", 62>; +def SPV_INTEL_loop_fuse : I32EnumAttrCase<"SPV_INTEL_loop_fuse", 63>; +def SPV_INTEL_fpga_buffer_location : I32EnumAttrCase<"SPV_INTEL_fpga_buffer_location", 64>; +def SPV_INTEL_arbitrary_precision_fixed_point : I32EnumAttrCase<"SPV_INTEL_arbitrary_precision_fixed_point", 65>; +def SPV_INTEL_usm_storage_classes : I32EnumAttrCase<"SPV_INTEL_usm_storage_classes", 66>; +def SPV_INTEL_io_pipes : I32EnumAttrCase<"SPV_INTEL_io_pipes", 67>; +def SPV_INTEL_blocking_pipes : I32EnumAttrCase<"SPV_INTEL_blocking_pipes", 68>; +def SPV_INTEL_fpga_reg : I32EnumAttrCase<"SPV_INTEL_fpga_reg", 69>; +def SPV_INTEL_long_constant_composite : I32EnumAttrCase<"SPV_INTEL_long_constant_composite", 70>; +def SPV_INTEL_optnone : I32EnumAttrCase<"SPV_INTEL_optnone", 71>; +def SPV_INTEL_debug_module : I32EnumAttrCase<"SPV_INTEL_debug_module", 72>; +def SPV_INTEL_fp_fast_math_mode : I32EnumAttrCase<"SPV_INTEL_fp_fast_math_mode", 73>; + +def SPV_NV_compute_shader_derivatives : I32EnumAttrCase<"SPV_NV_compute_shader_derivatives", 74>; +def SPV_NV_cooperative_matrix : I32EnumAttrCase<"SPV_NV_cooperative_matrix", 75>; +def SPV_NV_fragment_shader_barycentric : I32EnumAttrCase<"SPV_NV_fragment_shader_barycentric", 76>; +def SPV_NV_geometry_shader_passthrough : I32EnumAttrCase<"SPV_NV_geometry_shader_passthrough", 77>; +def SPV_NV_mesh_shader : I32EnumAttrCase<"SPV_NV_mesh_shader", 78>; +def SPV_NV_ray_tracing : I32EnumAttrCase<"SPV_NV_ray_tracing", 79>; +def SPV_NV_sample_mask_override_coverage : I32EnumAttrCase<"SPV_NV_sample_mask_override_coverage", 80>; +def SPV_NV_shader_image_footprint : I32EnumAttrCase<"SPV_NV_shader_image_footprint", 81>; +def SPV_NV_shader_sm_builtins : I32EnumAttrCase<"SPV_NV_shader_sm_builtins", 82>; +def SPV_NV_shader_subgroup_partitioned : I32EnumAttrCase<"SPV_NV_shader_subgroup_partitioned", 83>; +def SPV_NV_shading_rate : I32EnumAttrCase<"SPV_NV_shading_rate", 84>; +def SPV_NV_stereo_view_rendering : I32EnumAttrCase<"SPV_NV_stereo_view_rendering", 85>; +def SPV_NV_viewport_array2 : I32EnumAttrCase<"SPV_NV_viewport_array2", 86>; +def SPV_NV_bindless_texture : I32EnumAttrCase<"SPV_NV_bindless_texture", 87>; +def SPV_NV_ray_tracing_motion_blur : I32EnumAttrCase<"SPV_NV_ray_tracing_motion_blur", 88>; + +def SPV_NVX_multiview_per_view_attributes : I32EnumAttrCase<"SPV_NVX_multiview_per_view_attributes", 89>; def SPV_ExtensionAttr : - SPV_StrEnumAttr<"Extension", "supported SPIR-V extensions", [ + SPV_EnumAttr<"Extension", "supported SPIR-V extensions", "ext", [ SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_device_group, SPV_KHR_float_controls, SPV_KHR_physical_storage_buffer, SPV_KHR_multiview, SPV_KHR_no_integer_wrap_decoration, SPV_KHR_post_depth_coverage, diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVEnums.h b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVEnums.h --- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVEnums.h +++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVEnums.h @@ -24,7 +24,7 @@ namespace mlir { namespace spirv { enum class Version : uint32_t; -enum class Extension; +enum class Extension : uint32_t; enum class Capability : uint32_t; } // namespace spirv } // namespace mlir diff --git a/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h b/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h --- a/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h +++ b/mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h @@ -23,17 +23,8 @@ template struct GPUIndexIntrinsicOpLowering : public ConvertOpToLLVMPattern { private: - enum Dimension { X = 0, Y = 1, Z = 2, invalid }; unsigned indexBitwidth; - static Dimension dimensionToIndex(Op op) { - return StringSwitch(op.dimension()) - .Case("x", X) - .Case("y", Y) - .Case("z", Z) - .Default(invalid); - } - public: explicit GPUIndexIntrinsicOpLowering(LLVMTypeConverter &typeConverter) : ConvertOpToLLVMPattern(typeConverter), @@ -46,18 +37,16 @@ auto loc = op->getLoc(); MLIRContext *context = rewriter.getContext(); Value newOp; - switch (dimensionToIndex(op)) { - case X: + switch (op.dimension()) { + case gpu::Dimension::x: newOp = rewriter.create(loc, IntegerType::get(context, 32)); break; - case Y: + case gpu::Dimension::y: newOp = rewriter.create(loc, IntegerType::get(context, 32)); break; - case Z: + case gpu::Dimension::z: newOp = rewriter.create(loc, IntegerType::get(context, 32)); break; - default: - return failure(); } if (indexBitwidth > 32) { diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -40,15 +40,15 @@ namespace { /// Convert gpu dialect shfl mode enum to the equivalent nvvm one. -static NVVM::ShflKind convertShflKind(gpu::ShuffleModeAttr mode) { +static NVVM::ShflKind convertShflKind(gpu::ShuffleMode mode) { switch (mode) { - case gpu::ShuffleModeAttr::XOR: + case gpu::ShuffleMode::XOR: return NVVM::ShflKind::bfly; - case gpu::ShuffleModeAttr::UP: + case gpu::ShuffleMode::UP: return NVVM::ShflKind::up; - case gpu::ShuffleModeAttr::DOWN: + case gpu::ShuffleMode::DOWN: return NVVM::ShflKind::down; - case gpu::ShuffleModeAttr::IDX: + case gpu::ShuffleMode::IDX: return NVVM::ShflKind::idx; } llvm_unreachable("unknown shuffle mode"); diff --git a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp --- a/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp +++ b/mlir/lib/Conversion/GPUToSPIRV/GPUToSPIRV.cpp @@ -115,26 +115,10 @@ // Builtins. //===----------------------------------------------------------------------===// -static Optional getLaunchConfigIndex(Operation *op) { - auto dimAttr = op->getAttrOfType("dimension"); - if (!dimAttr) - return llvm::None; - - return llvm::StringSwitch>(dimAttr.getValue()) - .Case("x", 0) - .Case("y", 1) - .Case("z", 2) - .Default(llvm::None); -} - template LogicalResult LaunchConfigConversion::matchAndRewrite( SourceOp op, typename SourceOp::Adaptor adaptor, ConversionPatternRewriter &rewriter) const { - auto index = getLaunchConfigIndex(op); - if (!index) - return failure(); - auto *typeConverter = this->template getTypeConverter(); auto indexType = typeConverter->getIndexType(); @@ -143,7 +127,7 @@ spirv::getBuiltinVariableValue(op, builtin, indexType, rewriter); rewriter.replaceOpWithNewOp( op, indexType, spirvBuiltin, - rewriter.getI32ArrayAttr({index.getValue()})); + rewriter.getI32ArrayAttr({static_cast(op.dimension())})); return success(); } @@ -164,12 +148,9 @@ LogicalResult WorkGroupSizeConversion::matchAndRewrite( gpu::BlockDimOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const { - auto index = getLaunchConfigIndex(op); - if (!index) - return failure(); - auto workGroupSizeAttr = spirv::lookupLocalWorkGroupSize(op); - auto val = workGroupSizeAttr.getValues()[index.getValue()]; + auto val = workGroupSizeAttr + .getValues()[static_cast(op.dimension())]; auto convertedType = getTypeConverter()->convertType(op.getResult().getType()); if (!convertedType) diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -27,6 +27,7 @@ DEPENDS MLIRGPUOpsIncGen + MLIRGPUOpsAttributesIncGen MLIRGPUOpsEnumsGen MLIRGPUOpInterfacesIncGen diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -124,6 +124,10 @@ #define GET_OP_LIST #include "mlir/Dialect/GPU/GPUOps.cpp.inc" >(); + addAttributes< +#define GET_ATTRDEF_LIST +#include "mlir/Dialect/GPU/GPUOpsAttributes.cpp.inc" + >(); addInterfaces(); } @@ -293,54 +297,36 @@ if (yieldCount == 0) return allReduce.emitError("expected gpu.yield op in region"); } else { - StringRef opName = *allReduce.op(); - if ((opName == "and" || opName == "or" || opName == "xor") && + gpu::AllReduceOperation opName = *allReduce.op(); + if ((opName == gpu::AllReduceOperation::AND || + opName == gpu::AllReduceOperation::OR || + opName == gpu::AllReduceOperation::XOR) && !allReduce.getType().isa()) { return allReduce.emitError() - << '`' << opName << '`' + << '`' << gpu::stringifyAllReduceOperation(opName) << '`' << " accumulator is only compatible with Integer type"; } } return success(); } -static LogicalResult verifyShuffleOp(gpu::ShuffleOp shuffleOp) { - auto type = shuffleOp.value().getType(); - if (shuffleOp.result().getType() != type) { - return shuffleOp.emitOpError() - << "requires the same type for value operand and result"; - } - if (!type.isSignlessIntOrFloat() || type.getIntOrFloatBitWidth() != 32) { - return shuffleOp.emitOpError() - << "requires value operand type to be f32 or i32"; +// TODO: Support optional custom attributes (without dialect prefix). +static ParseResult parseAllReduceOperation(AsmParser &parser, + AllReduceOperationAttr &attr) { + StringRef enumStr; + if (!parser.parseOptionalKeyword(&enumStr)) { + Optional op = gpu::symbolizeAllReduceOperation(enumStr); + if (!op) + return parser.emitError(parser.getCurrentLocation(), "invalid op kind"); + attr = AllReduceOperationAttr::get(parser.getContext(), *op); } return success(); } -static void printShuffleOp(OpAsmPrinter &p, ShuffleOp op) { - p << ' ' << op.getOperands() << ' ' << stringifyEnum(op.mode()) << " : " - << op.value().getType(); -} - -static ParseResult parseShuffleOp(OpAsmParser &parser, OperationState &state) { - SmallVector operandInfo; - if (parser.parseOperandList(operandInfo, 3)) - return failure(); - - StringRef mode; - if (parser.parseKeyword(&mode)) - return failure(); - state.addAttribute("mode", parser.getBuilder().getStringAttr(mode)); - - Type valueType; - Type int32Type = parser.getBuilder().getIntegerType(32); - Type int1Type = parser.getBuilder().getI1Type(); - if (parser.parseColonType(valueType) || - parser.resolveOperands(operandInfo, {valueType, int32Type, int32Type}, - parser.getCurrentLocation(), state.operands) || - parser.addTypesToList({valueType, int1Type}, state.types)) - return failure(); - return success(); +static void printAllReduceOperation(AsmPrinter &printer, Operation *op, + AllReduceOperationAttr attr) { + if (attr) + attr.print(printer); } //===----------------------------------------------------------------------===// @@ -1203,5 +1189,8 @@ #include "mlir/Dialect/GPU/GPUOpInterfaces.cpp.inc" #include "mlir/Dialect/GPU/GPUOpsEnums.cpp.inc" +#define GET_ATTRDEF_CLASSES +#include "mlir/Dialect/GPU/GPUOpsAttributes.cpp.inc" + #define GET_OP_CLASSES #include "mlir/Dialect/GPU/GPUOps.cpp.inc" diff --git a/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp b/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp --- a/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp @@ -66,12 +66,12 @@ rewriter.setInsertionPoint(reduceOp); // Compute linear invocation index and workgroup size. - Value dimX = getDimOp("x"); - Value dimY = getDimOp("y"); - Value dimZ = getDimOp("z"); - Value tidX = getDimOp("x"); - Value tidY = getDimOp("y"); - Value tidZ = getDimOp("z"); + Value dimX = getDimOp(gpu::Dimension::x); + Value dimY = getDimOp(gpu::Dimension::y); + Value dimZ = getDimOp(gpu::Dimension::z); + Value tidX = getDimOp(gpu::Dimension::x); + Value tidY = getDimOp(gpu::Dimension::y); + Value tidZ = getDimOp(gpu::Dimension::z); Value tmp1 = create(int32Type, tidZ, dimY); Value tmp2 = create(int32Type, tmp1, tidY); Value tmp3 = create(int32Type, tmp2, dimX); @@ -150,8 +150,8 @@ // Creates dimension op of type T, with the result casted to int32. template - Value getDimOp(StringRef dimension) { - Value dim = create(indexType, rewriter.getStringAttr(dimension)); + Value getDimOp(gpu::Dimension dimension) { + Value dim = create(indexType, dimension); return create(int32Type, dim); } @@ -212,38 +212,34 @@ } /// Returns an accumulator factory that creates an op specified by opName. - AccumulatorFactory getFactory(StringRef opName) { + AccumulatorFactory getFactory(gpu::AllReduceOperation opName) { bool isFloatingPoint = valueType.isa(); - if (opName == "add") + switch (opName) { + case gpu::AllReduceOperation::ADD: return isFloatingPoint ? getFactory() : getFactory(); - if (opName == "mul") + case gpu::AllReduceOperation::MUL: return isFloatingPoint ? getFactory() : getFactory(); - if (opName == "and") { + case gpu::AllReduceOperation::AND: return getFactory(); - } - if (opName == "or") { + case gpu::AllReduceOperation::OR: return getFactory(); - } - if (opName == "xor") { + case gpu::AllReduceOperation::XOR: return getFactory(); - } - if (opName == "max") { + case gpu::AllReduceOperation::MAX: return isFloatingPoint ? getCmpFactory() : getCmpFactory(); - } - if (opName == "min") { + case gpu::AllReduceOperation::MIN: return isFloatingPoint ? getCmpFactory() : getCmpFactory(); } - return AccumulatorFactory(); } /// Returns an accumulator factory that creates an op of type T. @@ -328,7 +324,6 @@ Value isPartialSubgroup = create(arith::CmpIPredicate::slt, activeWidth, subgroupSize); std::array shuffleType = {valueType, rewriter.getI1Type()}; - auto xorAttr = rewriter.getStringAttr("xor"); createIf( isPartialSubgroup, @@ -340,8 +335,8 @@ // in the first lane. for (int i = 1; i < kSubgroupSize; i <<= 1) { Value offset = create(i, int32Type); - auto shuffleOp = create(shuffleType, value, offset, - activeWidth, xorAttr); + auto shuffleOp = create( + shuffleType, value, offset, activeWidth, gpu::ShuffleMode::XOR); // Skip the accumulation if the shuffle op read from a lane outside // of the active range. createIf( @@ -362,8 +357,9 @@ Value value = operand; for (int i = 1; i < kSubgroupSize; i <<= 1) { Value offset = create(i, int32Type); - auto shuffleOp = create(shuffleType, value, offset, - subgroupSize, xorAttr); + auto shuffleOp = + create(shuffleType, value, offset, subgroupSize, + gpu::ShuffleMode::XOR); value = accumFactory(value, shuffleOp.getResult(0)); } return SmallVector{value}; diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp --- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp @@ -30,11 +30,8 @@ template static void createForAllDimensions(OpBuilder &builder, Location loc, SmallVectorImpl &values) { - for (StringRef dim : {"x", "y", "z"}) { - Value v = builder.create(loc, builder.getIndexType(), - builder.getStringAttr(dim)); - values.push_back(v); - } + for (auto dim : {gpu::Dimension::x, gpu::Dimension::y, gpu::Dimension::z}) + values.push_back(builder.create(loc, builder.getIndexType(), dim)); } /// Adds operations generating block/thread ids and grid/block dimensions at the diff --git a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp --- a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp @@ -24,18 +24,6 @@ using namespace mlir; using namespace mlir::gpu; -/// Returns the textual name of a GPU dimension. -static StringRef getDimName(unsigned dim) { - if (dim == 0) - return "x"; - if (dim == 1) - return "y"; - if (dim == 2) - return "z"; - - llvm_unreachable("dimension ID overflow"); -} - /// Emits the (imperfect) loop nest performing the copy between "from" and "to" /// values using the bounds derived from the "from" value. Emits at least /// GPUDialect::getNumWorkgroupDimensions() loops, completing the nest with @@ -71,10 +59,9 @@ // Obtain thread identifiers and block sizes, necessary to map to them. auto indexType = b.getIndexType(); SmallVector threadIds, blockDims; - for (unsigned i = 0; i < 3; ++i) { - auto dimName = b.getStringAttr(getDimName(i)); - threadIds.push_back(b.create(indexType, dimName)); - blockDims.push_back(b.create(indexType, dimName)); + for (auto dim : {gpu::Dimension::x, gpu::Dimension::y, gpu::Dimension::z}) { + threadIds.push_back(b.create(indexType, dim)); + blockDims.push_back(b.create(indexType, dim)); } // Produce the loop nest with copies. diff --git a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp --- a/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp @@ -18,9 +18,11 @@ #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/DialectImplementation.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Operation.h" #include "mlir/IR/OperationSupport.h" +#include "llvm/ADT/TypeSwitch.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" @@ -213,6 +215,10 @@ #define GET_OP_LIST #include "mlir/Dialect/LLVMIR/NVVMOps.cpp.inc" >(); + addAttributes< +#define GET_ATTRDEF_LIST +#include "mlir/Dialect/LLVMIR/NVVMOpsAttributes.cpp.inc" + >(); // Support unknown operations because not all NVVM operations are // registered. @@ -233,3 +239,6 @@ #define GET_OP_CLASSES #include "mlir/Dialect/LLVMIR/NVVMOps.cpp.inc" + +#define GET_ATTRDEF_CLASSES +#include "mlir/Dialect/LLVMIR/NVVMOpsAttributes.cpp.inc" diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -12,8 +12,10 @@ #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/DialectImplementation.h" #include "mlir/IR/OpImplementation.h" #include "mlir/Transforms/DialectConversion.h" +#include "llvm/ADT/TypeSwitch.h" using namespace mlir; using namespace acc; @@ -29,6 +31,10 @@ #define GET_OP_LIST #include "mlir/Dialect/OpenACC/OpenACCOps.cpp.inc" >(); + addAttributes< +#define GET_ATTRDEF_LIST +#include "mlir/Dialect/OpenACC/OpenACCOpsAttributes.cpp.inc" + >(); } template @@ -894,3 +900,6 @@ #define GET_OP_CLASSES #include "mlir/Dialect/OpenACC/OpenACCOps.cpp.inc" + +#define GET_ATTRDEF_CLASSES +#include "mlir/Dialect/OpenACC/OpenACCOpsAttributes.cpp.inc" diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -14,6 +14,7 @@ #include "mlir/Dialect/LLVMIR/LLVMTypes.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/Attributes.h" +#include "mlir/IR/DialectImplementation.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/OperationSupport.h" @@ -22,6 +23,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/TypeSwitch.h" #include #include "mlir/Dialect/OpenMP/OpenMPOpsDialect.cpp.inc" @@ -47,6 +49,10 @@ #define GET_OP_LIST #include "mlir/Dialect/OpenMP/OpenMPOps.cpp.inc" >(); + addAttributes< +#define GET_ATTRDEF_LIST +#include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.cpp.inc" + >(); LLVM::LLVMPointerType::attachInterface< PointerLikeModel>(*getContext()); @@ -184,10 +190,10 @@ printAllocateAndAllocator(p, op.allocate_vars(), op.allocators_vars()); if (auto def = op.default_val()) - p << "default(" << def->drop_front(3) << ") "; + p << "default(" << stringifyClauseDefault(*def).drop_front(3) << ") "; if (auto bind = op.proc_bind_val()) - p << "proc_bind(" << bind << ") "; + p << "proc_bind(" << stringifyClauseProcBindKind(*bind) << ") "; p.printRegion(op.getRegion()); } @@ -261,19 +267,15 @@ // If we have one modifier that is "simd", then stick a "none" modiifer in // index 0. if (modifiers.size() == 1) { - if (symbolizeScheduleModifier(modifiers[0]) == - mlir::omp::ScheduleModifier::simd) { + if (symbolizeScheduleModifier(modifiers[0]) == ScheduleModifier::simd) { modifiers.push_back(modifiers[0]); - modifiers[0] = - stringifyScheduleModifier(mlir::omp::ScheduleModifier::none); + modifiers[0] = stringifyScheduleModifier(ScheduleModifier::none); } } else if (modifiers.size() == 2) { // If there are two modifier: // First modifier should not be simd, second one should be simd - if (symbolizeScheduleModifier(modifiers[0]) == - mlir::omp::ScheduleModifier::simd || - symbolizeScheduleModifier(modifiers[1]) != - mlir::omp::ScheduleModifier::simd) + if (symbolizeScheduleModifier(modifiers[0]) == ScheduleModifier::simd || + symbolizeScheduleModifier(modifiers[1]) != ScheduleModifier::simd) return parser.emitError(parser.getNameLoc()) << " incorrect modifier order"; } @@ -333,15 +335,14 @@ } /// Print schedule clause -static void printScheduleClause(OpAsmPrinter &p, StringRef &sched, - llvm::Optional modifier, bool simd, +static void printScheduleClause(OpAsmPrinter &p, ClauseScheduleKind sched, + Optional modifier, bool simd, Value scheduleChunkVar) { - std::string schedLower = sched.lower(); - p << "schedule(" << schedLower; + p << "schedule(" << stringifyClauseScheduleKind(sched).lower(); if (scheduleChunkVar) p << " = " << scheduleChunkVar; - if (modifier && modifier.hasValue()) - p << ", " << modifier; + if (modifier) + p << ", " << stringifyScheduleModifier(*modifier); if (simd) p << ", simd"; p << ") "; @@ -545,6 +546,24 @@ // Parser for Clause List //===----------------------------------------------------------------------===// +/// Parse a clause attribute `(` $value `)`. +template +static ParseResult parseClauseAttr(AsmParser &parser, OperationState &state, + StringRef attrName, StringRef name) { + using ClauseT = decltype(std::declval().getValue()); + StringRef enumStr; + llvm::SMLoc loc = parser.getCurrentLocation(); + if (parser.parseLParen() || parser.parseKeyword(&enumStr) || + parser.parseRParen()) + return failure(); + if (Optional enumValue = symbolizeEnum(enumStr)) { + auto attr = ClauseAttr::get(parser.getContext(), *enumValue); + state.addAttribute(attrName, attr); + return success(); + } + return parser.emitError(loc, "invalid ") << name << " kind"; +} + /// Parse a list of clauses. The clauses can appear in any order, but their /// operand segment indices are in the same order that they are passed in the /// `clauses` list. The operand segments are added over the prevSegments @@ -634,13 +653,12 @@ SmallVector clauseSegments(currPos); // Helper function to check if a clause is allowed/repeated or not - auto checkAllowed = [&](ClauseType clause, - bool allowRepeat = false) -> ParseResult { + auto checkAllowed = [&](ClauseType clause) -> ParseResult { if (!llvm::is_contained(clauses, clause)) return parser.emitError(parser.getCurrentLocation()) << clauseKeyword << " is not a valid clause for the " << opName << " operation"; - if (done[clause] && !allowRepeat) + if (done[clause]) return parser.emitError(parser.getCurrentLocation()) << "at most one " << clauseKeyword << " clause can appear on the " << opName << " operation"; @@ -695,20 +713,24 @@ clauseSegments[pos[allocateClause] + 1] = allocators.size(); } else if (clauseKeyword == "default") { StringRef defval; + llvm::SMLoc loc = parser.getCurrentLocation(); if (checkAllowed(defaultClause) || parser.parseLParen() || parser.parseKeyword(&defval) || parser.parseRParen()) return failure(); // The def prefix is required for the attribute as "private" is a keyword // in C++. - auto attr = parser.getBuilder().getStringAttr("def" + defval); - result.addAttribute("default_val", attr); + if (Optional def = + symbolizeClauseDefault(("def" + defval).str())) { + result.addAttribute("default_val", + ClauseDefaultAttr::get(parser.getContext(), *def)); + } else { + return parser.emitError(loc, "invalid default clause"); + } } else if (clauseKeyword == "proc_bind") { - StringRef bind; - if (checkAllowed(procBindClause) || parser.parseLParen() || - parser.parseKeyword(&bind) || parser.parseRParen()) + if (checkAllowed(procBindClause) || + parseClauseAttr(parser, result, + "proc_bind_val", "proc bind")) return failure(); - auto attr = parser.getBuilder().getStringAttr(bind); - result.addAttribute("proc_bind_val", attr); } else if (clauseKeyword == "reduction") { if (checkAllowed(reductionClause) || parseReductionVarList(parser, reductionSymbols, reductionVars, @@ -754,19 +776,15 @@ } result.addAttribute("ordered_val", attr); } else if (clauseKeyword == "order") { - StringRef order; - if (checkAllowed(orderClause) || parser.parseLParen() || - parser.parseKeyword(&order) || parser.parseRParen()) + if (checkAllowed(orderClause) || + parseClauseAttr(parser, result, "order_val", + "order")) return failure(); - auto attr = parser.getBuilder().getStringAttr(order); - result.addAttribute("order_val", attr); } else if (clauseKeyword == "memory_order") { - StringRef memoryOrder; - if (checkAllowed(memoryOrderClause) || parser.parseLParen() || - parser.parseKeyword(&memoryOrder) || parser.parseRParen()) + if (checkAllowed(memoryOrderClause) || + parseClauseAttr( + parser, result, "memory_order", "memory order")) return failure(); - result.addAttribute("memory_order", - parser.getBuilder().getStringAttr(memoryOrder)); } else if (clauseKeyword == "hint") { IntegerAttr hint; if (checkAllowed(hintClause) || @@ -861,15 +879,28 @@ // Add schedule parameters if (done[scheduleClause] && !schedule.empty()) { schedule[0] = llvm::toUpper(schedule[0]); - auto attr = parser.getBuilder().getStringAttr(schedule); - result.addAttribute("schedule_val", attr); + if (Optional sched = + symbolizeClauseScheduleKind(schedule)) { + auto attr = ClauseScheduleKindAttr::get(parser.getContext(), *sched); + result.addAttribute("schedule_val", attr); + } else { + return parser.emitError(parser.getCurrentLocation(), + "invalid schedule kind"); + } if (!modifiers.empty()) { - auto mod = parser.getBuilder().getStringAttr(modifiers[0]); - result.addAttribute("schedule_modifier", mod); + llvm::SMLoc loc = parser.getCurrentLocation(); + if (Optional mod = + symbolizeScheduleModifier(modifiers[0])) { + result.addAttribute( + "schedule_modifier", + ScheduleModifierAttr::get(parser.getContext(), *mod)); + } else { + return parser.emitError(loc, "invalid schedule modifier"); + } // Only SIMD attribute is allowed here! if (modifiers.size() > 1) { assert(symbolizeScheduleModifier(modifiers[1]) == - mlir::omp::ScheduleModifier::simd); + ScheduleModifier::simd); auto attr = UnitAttr::get(parser.getBuilder().getContext()); result.addAttribute("simd_modifier", attr); } @@ -1089,7 +1120,7 @@ p << "ordered(" << ordered << ") "; if (auto order = op.order_val()) - p << "order(" << order << ") "; + p << "order(" << stringifyClauseOrderKind(*order) << ") "; if (!op.reduction_vars().empty()) printReductionVarList(p, op.reductions(), op.reduction_vars()); @@ -1351,8 +1382,8 @@ /// Printer for AtomicReadOp static void printAtomicReadOp(OpAsmPrinter &p, AtomicReadOp op) { p << " " << op.v() << " = " << op.x() << " "; - if (op.memory_order()) - p << "memory_order(" << op.memory_order().getValue() << ") "; + if (auto mo = op.memory_order()) + p << "memory_order(" << stringifyClauseMemoryOrderKind(*mo) << ") "; if (op.hintAttr()) printSynchronizationHint(p << " ", op, op.hintAttr()); p << ": " << op.x().getType(); @@ -1360,11 +1391,12 @@ /// Verifier for AtomicReadOp static LogicalResult verifyAtomicReadOp(AtomicReadOp op) { - if (op.memory_order()) { - StringRef memOrder = op.memory_order().getValue(); - if (memOrder.equals("acq_rel") || memOrder.equals("release")) + if (auto mo = op.memory_order()) { + if (*mo == ClauseMemoryOrderKind::acq_rel || + *mo == ClauseMemoryOrderKind::release) { return op.emitError( "memory-order must not be acq_rel or release for atomic reads"); + } } if (op.x() == op.v()) return op.emitError( @@ -1403,8 +1435,8 @@ /// Printer for AtomicWriteOp static void printAtomicWriteOp(OpAsmPrinter &p, AtomicWriteOp op) { p << " " << op.address() << " = " << op.value() << " "; - if (op.memory_order()) - p << "memory_order(" << op.memory_order() << ") "; + if (auto mo = op.memory_order()) + p << "memory_order(" << stringifyClauseMemoryOrderKind(*mo) << ") "; if (op.hintAttr()) printSynchronizationHint(p, op, op.hintAttr()); p << ": " << op.address().getType() << ", " << op.value().getType(); @@ -1412,11 +1444,12 @@ /// Verifier for AtomicWriteOp static LogicalResult verifyAtomicWriteOp(AtomicWriteOp op) { - if (op.memory_order()) { - StringRef memoryOrder = op.memory_order().getValue(); - if (memoryOrder.equals("acq_rel") || memoryOrder.equals("acquire")) + if (auto mo = op.memory_order()) { + if (*mo == ClauseMemoryOrderKind::acq_rel || + *mo == ClauseMemoryOrderKind::acquire) { return op.emitError( "memory-order must not be acq_rel or acquire for atomic writes"); + } } return verifySynchronizationHint(op, op.hint()); } @@ -1482,8 +1515,8 @@ } p << y << " " << AtomicBinOpKindToString(op.binop()).lower() << " " << z << " "; - if (op.memory_order()) - p << "memory_order(" << op.memory_order() << ") "; + if (auto mo = op.memory_order()) + p << "memory_order(" << stringifyClauseMemoryOrderKind(*mo) << ") "; if (op.hintAttr()) printSynchronizationHint(p, op, op.hintAttr()); p << ": " << op.x().getType() << ", " << op.expr().getType(); @@ -1491,14 +1524,18 @@ /// Verifier for AtomicUpdateOp static LogicalResult verifyAtomicUpdateOp(AtomicUpdateOp op) { - if (op.memory_order()) { - StringRef memoryOrder = op.memory_order().getValue(); - if (memoryOrder.equals("acq_rel") || memoryOrder.equals("acquire")) + if (auto mo = op.memory_order()) { + if (*mo == ClauseMemoryOrderKind::acq_rel || + *mo == ClauseMemoryOrderKind::acquire) { return op.emitError( "memory-order must not be acq_rel or acquire for atomic updates"); + } } return success(); } +#define GET_ATTRDEF_CLASSES +#include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.cpp.inc" + #define GET_OP_CLASSES #include "mlir/Dialect/OpenMP/OpenMPOps.cpp.inc" diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -186,9 +186,23 @@ moduleTranslation.forgetMapping(region); } +/// Convert ProcBindKind from MLIR-generated enum to LLVM enum. +static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) { + switch (kind) { + case omp::ClauseProcBindKind::close: + return llvm::omp::ProcBindKind::OMP_PROC_BIND_close; + case omp::ClauseProcBindKind::master: + return llvm::omp::ProcBindKind::OMP_PROC_BIND_master; + case omp::ClauseProcBindKind::primary: + return llvm::omp::ProcBindKind::OMP_PROC_BIND_primary; + case omp::ClauseProcBindKind::spread: + return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread; + } +} + /// Converts the OpenMP parallel operation to LLVM IR. static LogicalResult -convertOmpParallel(Operation &opInst, llvm::IRBuilderBase &builder, +convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; // TODO: support error propagation in OpenMPIRBuilder and use it instead of @@ -203,10 +217,9 @@ moduleTranslation, allocaIP); // ParallelOp has only one region associated with it. - auto ®ion = cast(opInst).getRegion(); - convertOmpOpRegions(region, "omp.par.region", *codeGenIP.getBlock(), - continuationBlock, builder, moduleTranslation, - bodyGenStatus); + convertOmpOpRegions(opInst.getRegion(), "omp.par.region", + *codeGenIP.getBlock(), continuationBlock, builder, + moduleTranslation, bodyGenStatus); }; // TODO: Perform appropriate actions according to the data-sharing @@ -225,14 +238,14 @@ auto finiCB = [&](InsertPointTy codeGenIP) {}; llvm::Value *ifCond = nullptr; - if (auto ifExprVar = cast(opInst).if_expr_var()) + if (auto ifExprVar = opInst.if_expr_var()) ifCond = moduleTranslation.lookupValue(ifExprVar); llvm::Value *numThreads = nullptr; - if (auto numThreadsVar = cast(opInst).num_threads_var()) + if (auto numThreadsVar = opInst.num_threads_var()) numThreads = moduleTranslation.lookupValue(numThreadsVar); - llvm::omp::ProcBindKind pbKind = llvm::omp::OMP_PROC_BIND_default; - if (auto bind = cast(opInst).proc_bind_val()) - pbKind = llvm::omp::getProcBindKind(bind.getValue()); + auto pbKind = llvm::omp::OMP_PROC_BIND_default; + if (auto bind = opInst.proc_bind_val()) + pbKind = getProcBindKind(*bind); // TODO: Is the Parallel construct cancellable? bool isCancellable = false; @@ -486,8 +499,7 @@ LLVM::ModuleTranslation &moduleTranslation) { auto orderedOp = cast(opInst); - omp::ClauseDepend dependType = - *omp::symbolizeClauseDepend(orderedOp.depend_type_valAttr().getValue()); + omp::ClauseDepend dependType = *orderedOp.depend_type_val(); bool isDependSource = dependType == omp::ClauseDepend::dependsource; unsigned numLoops = orderedOp.num_loops_val().getValue(); SmallVector vecValues = @@ -628,10 +640,8 @@ return failure(); // Static is the default. - omp::ClauseScheduleKind schedule = omp::ClauseScheduleKind::Static; - if (loop.schedule_val().hasValue()) - schedule = - *omp::symbolizeClauseScheduleKind(loop.schedule_val().getValue()); + auto schedule = + loop.schedule_val().getValueOr(omp::ClauseScheduleKind::Static); // Find the loop configuration. llvm::Value *step = moduleTranslation.lookupValue(loop.step()[0]); @@ -788,10 +798,8 @@ break; } - if (loop.schedule_modifier().hasValue()) { - omp::ScheduleModifier modifier = - *omp::symbolizeScheduleModifier(loop.schedule_modifier().getValue()); - switch (modifier) { + if (Optional modifier = loop.schedule_modifier()) { + switch (*modifier) { case omp::ScheduleModifier::monotonic: schedType |= llvm::omp::OMPScheduleType::ModifierMonotonic; break; @@ -861,17 +869,23 @@ } // Convert an Atomic Ordering attribute to llvm::AtomicOrdering. -llvm::AtomicOrdering convertAtomicOrdering(Optional aoAttr) { - if (!aoAttr.hasValue()) +llvm::AtomicOrdering +convertAtomicOrdering(Optional ao) { + if (!ao) return llvm::AtomicOrdering::Monotonic; // Default Memory Ordering - return StringSwitch(aoAttr.getValue()) - .Case("seq_cst", llvm::AtomicOrdering::SequentiallyConsistent) - .Case("acq_rel", llvm::AtomicOrdering::AcquireRelease) - .Case("acquire", llvm::AtomicOrdering::Acquire) - .Case("release", llvm::AtomicOrdering::Release) - .Case("relaxed", llvm::AtomicOrdering::Monotonic) - .Default(llvm::AtomicOrdering::Monotonic); + switch (*ao) { + case omp::ClauseMemoryOrderKind::seq_cst: + return llvm::AtomicOrdering::SequentiallyConsistent; + case omp::ClauseMemoryOrderKind::acq_rel: + return llvm::AtomicOrdering::AcquireRelease; + case omp::ClauseMemoryOrderKind::acquire: + return llvm::AtomicOrdering::Acquire; + case omp::ClauseMemoryOrderKind::release: + return llvm::AtomicOrdering::Release; + case omp::ClauseMemoryOrderKind::relaxed: + return llvm::AtomicOrdering::Monotonic; + } } // Convert omp.atomic.read operation to LLVM IR. @@ -1021,8 +1035,8 @@ ompBuilder->createFlush(builder.saveIP()); return success(); }) - .Case([&](omp::ParallelOp) { - return convertOmpParallel(*op, builder, moduleTranslation); + .Case([&](omp::ParallelOp op) { + return convertOmpParallel(op, builder, moduleTranslation); }) .Case([&](omp::ReductionOp reductionOp) { return convertOmpReductionOp(reductionOp, builder, moduleTranslation); diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -11,43 +11,43 @@ // CHECK: = nvvm.read.ptx.sreg.tid.x : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index) + %tIdX = gpu.thread_id x // CHECK: = nvvm.read.ptx.sreg.tid.y : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index) + %tIdY = gpu.thread_id y // CHECK: = nvvm.read.ptx.sreg.tid.z : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index) + %tIdZ = gpu.thread_id z // CHECK: = nvvm.read.ptx.sreg.ntid.x : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index) + %bDimX = gpu.block_dim x // CHECK: = nvvm.read.ptx.sreg.ntid.y : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %bDimY = "gpu.block_dim"() {dimension = "y"} : () -> (index) + %bDimY = gpu.block_dim y // CHECK: = nvvm.read.ptx.sreg.ntid.z : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %bDimZ = "gpu.block_dim"() {dimension = "z"} : () -> (index) + %bDimZ = gpu.block_dim z // CHECK: = nvvm.read.ptx.sreg.ctaid.x : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %bIdX = "gpu.block_id"() {dimension = "x"} : () -> (index) + %bIdX = gpu.block_id x // CHECK: = nvvm.read.ptx.sreg.ctaid.y : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index) + %bIdY = gpu.block_id y // CHECK: = nvvm.read.ptx.sreg.ctaid.z : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %bIdZ = "gpu.block_id"() {dimension = "z"} : () -> (index) + %bIdZ = gpu.block_id z // CHECK: = nvvm.read.ptx.sreg.nctaid.x : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %gDimX = "gpu.grid_dim"() {dimension = "x"} : () -> (index) + %gDimX = gpu.grid_dim x // CHECK: = nvvm.read.ptx.sreg.nctaid.y : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index) + %gDimY = gpu.grid_dim y // CHECK: = nvvm.read.ptx.sreg.nctaid.z : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index) + %gDimZ = gpu.grid_dim z std.return %tIdX, %tIdY, %tIdZ, %bDimX, %bDimY, %bDimZ, %bIdX, %bIdY, %bIdZ, %gDimX, %gDimY, %gDimZ @@ -78,10 +78,10 @@ gpu.func @gpu_all_reduce_op() { %arg0 = arith.constant 1.0 : f32 // TODO: Check full IR expansion once lowering has settled. - // CHECK: nvvm.shfl.sync "bfly" {{.*}} + // CHECK: nvvm.shfl.sync bfly {{.*}} // CHECK: nvvm.barrier0 // CHECK: llvm.fadd - %result = "gpu.all_reduce"(%arg0) ({}) {op = "add"} : (f32) -> (f32) + %result = gpu.all_reduce add %arg0 {} : (f32) -> (f32) gpu.return } @@ -94,13 +94,13 @@ gpu.func @gpu_all_reduce_region() { %arg0 = arith.constant 1 : i32 // TODO: Check full IR expansion once lowering has settled. - // CHECK: nvvm.shfl.sync "bfly" {{.*}} + // CHECK: nvvm.shfl.sync bfly {{.*}} // CHECK: nvvm.barrier0 - %result = "gpu.all_reduce"(%arg0) ({ + %result = gpu.all_reduce %arg0 { ^bb(%lhs : i32, %rhs : i32): %xor = arith.xori %lhs, %rhs : i32 "gpu.yield"(%xor) : (i32) -> () - }) : (i32) -> (i32) + } : (i32) -> (i32) gpu.return } } @@ -120,16 +120,16 @@ // CHECK: %[[#SHL:]] = llvm.shl %[[#ONE]], %[[#WIDTH]] : i32 // CHECK: %[[#MASK:]] = llvm.sub %[[#SHL]], %[[#ONE]] : i32 // CHECK: %[[#CLAMP:]] = llvm.sub %[[#WIDTH]], %[[#ONE]] : i32 - // CHECK: %[[#SHFL:]] = nvvm.shfl.sync "bfly" %[[#MASK]], %[[#VALUE]], %[[#OFFSET]], %[[#CLAMP]] {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> + // CHECK: %[[#SHFL:]] = nvvm.shfl.sync bfly %[[#MASK]], %[[#VALUE]], %[[#OFFSET]], %[[#CLAMP]] {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> // CHECK: llvm.extractvalue %[[#SHFL]][0 : index] : !llvm.struct<(f32, i1)> // CHECK: llvm.extractvalue %[[#SHFL]][1 : index] : !llvm.struct<(f32, i1)> - %shfl, %pred = "gpu.shuffle"(%arg0, %arg1, %arg2) { mode = "xor" } : (f32, i32, i32) -> (f32, i1) - // CHECK: nvvm.shfl.sync "up" {{.*}} {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> - %shflu, %predu = "gpu.shuffle"(%arg0, %arg1, %arg2) { mode = "up" } : (f32, i32, i32) -> (f32, i1) - // CHECK: nvvm.shfl.sync "down" {{.*}} {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> - %shfld, %predd = "gpu.shuffle"(%arg0, %arg1, %arg2) { mode = "down" } : (f32, i32, i32) -> (f32, i1) - // CHECK: nvvm.shfl.sync "idx" {{.*}} {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> - %shfli, %predi = "gpu.shuffle"(%arg0, %arg1, %arg2) { mode = "idx" } : (f32, i32, i32) -> (f32, i1) + %shfl, %pred = gpu.shuffle xor %arg0, %arg1, %arg2 : f32 + // CHECK: nvvm.shfl.sync up {{.*}} {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> + %shflu, %predu = gpu.shuffle up %arg0, %arg1, %arg2 : f32 + // CHECK: nvvm.shfl.sync down {{.*}} {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> + %shfld, %predd = gpu.shuffle down %arg0, %arg1, %arg2 : f32 + // CHECK: nvvm.shfl.sync idx {{.*}} {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> + %shfli, %predi = gpu.shuffle idx %arg0, %arg1, %arg2 : f32 std.return %shfl, %shflu, %shfld, %shfli : f32, f32,f32, f32 } diff --git a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir --- a/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/wmma-ops-to-nvvm.mlir @@ -20,7 +20,7 @@ // CHECK: %[[ADDRESS:.*]] = llvm.getelementptr %[[BASE]][%[[LIJ]]] : (!llvm.ptr, i64) -> !llvm.ptr // CHECK: %[[LDM32:.*]] = llvm.mlir.constant(32 : index) : i32 // CHECK: %[[FRAG:.*]] = nvvm.wmma.load %[[ADDRESS]], %[[LDM32]] - // CHECK-SAME: {eltype = "f16", frag = "a", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + // CHECK-SAME: {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: llvm.return %[[FRAG]] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK32: %[[INX:.*]] = llvm.mlir.constant(16 : index) : i32 @@ -32,7 +32,7 @@ // CHECK32: %[[ADDRESS:.*]] = llvm.getelementptr %[[BASE]][%[[LIJ]]] : (!llvm.ptr, i32) -> !llvm.ptr // CHECK32: %[[LDM32:.*]] = llvm.mlir.constant(32 : index) : i32 // CHECK32: %[[FRAG:.*]] = nvvm.wmma.load %[[ADDRESS]], %[[LDM32]] - // CHECK32-SAME: {eltype = "f16", frag = "a", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + // CHECK32-SAME: {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK32: llvm.return %[[FRAG]] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> return %0 : !gpu.mma_matrix<16x16xf16, "AOp"> } @@ -64,7 +64,7 @@ // CHECK: %[[ADDRESS:.*]] = llvm.getelementptr %[[BASE]][%[[LIJ]]] : (!llvm.ptr, i64) -> !llvm.ptr // CHECK: %[[LDM32:.*]] = llvm.mlir.constant(32 : index) : i32 // CHECK: nvvm.wmma.store %[[ADDRESS]], %[[LDM32]], %[[EL1]], %[[EL2]], %[[EL3]], %[[EL4]] - // CHECK-SAME: {eltype = "f16", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} : !llvm.ptr, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16> + // CHECK-SAME: {eltype = #nvvm.mma_type, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : !llvm.ptr, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16> // CHECK: llvm.return // CHECK32: %[[INX:.*]] = llvm.mlir.constant(16 : index) : i32 @@ -80,7 +80,7 @@ // CHECK32: %[[ADDRESS:.*]] = llvm.getelementptr %[[BASE]][%[[LIJ]]] : (!llvm.ptr, i32) -> !llvm.ptr // CHECK32: %[[LDM32:.*]] = llvm.mlir.constant(32 : index) : i32 // CHECK32: nvvm.wmma.store %[[ADDRESS]], %[[LDM32]], %[[EL1]], %[[EL2]], %[[EL3]], %[[EL4]] - // CHECK32-SAME: {eltype = "f16", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} : !llvm.ptr, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16> + // CHECK32-SAME: {eltype = #nvvm.mma_type, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : !llvm.ptr, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16> // CHECK32: llvm.return return } @@ -115,7 +115,7 @@ // CHECK: %[[C3:.*]] = llvm.extractvalue %[[C]][2 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[C4:.*]] = llvm.extractvalue %[[C]][3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[RES:.*]] = nvvm.wmma.mma %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A5]], %[[A6]], %[[A7]], %[[A8]], %[[B1]], %[[B2]], %[[B3]], %[[B4]], %[[B5]], %[[B6]], %[[B7]], %[[B8]], %[[C1]], %[[C2]], %[[C3]], %[[C4]] - // CHECK-SAME: {eltypeA = "f16", eltypeB = "f16", k = 16 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} : ( + // CHECK-SAME: {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : ( // CHECK-SAME: vector<2xf16>, {{.*}}) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: llvm.return %[[RES]] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> return %D : !gpu.mma_matrix<16x16xf16, "COp"> @@ -127,13 +127,13 @@ gpu.module @test_module { // CHECK-LABEL: func @gpu_wmma_mma_loop_op -// CHECK: %[[C:.+]] = nvvm.wmma.load %{{.*}}, %{{.*}} {eltype = "f16", frag = "c", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[C:.+]] = nvvm.wmma.load %{{.*}}, %{{.*}} {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: llvm.br ^bb1(%{{.*}}, %[[C]] : i64, !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) // CHECK: ^bb1(%{{.*}}: i64, %[[ACC:.+]]: !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>): // 2 preds: ^bb0, ^bb2 // CHECK: llvm.cond_br %{{.*}}, ^bb2, ^bb3 // CHECK: ^bb2: // pred: ^bb1 -// CHECK: %[[A:.+]] = nvvm.wmma.load %{{.*}}, %{{.*}} {eltype = "f16", frag = "a", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> -// CHECK: %[[B:.+]] = nvvm.wmma.load %{{.*}}, %{{.*}} {eltype = "f16", frag = "b", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[A:.+]] = nvvm.wmma.load %{{.*}}, %{{.*}} {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[B:.+]] = nvvm.wmma.load %{{.*}}, %{{.*}} {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[A0:.+]] = llvm.extractvalue %[[A]][0 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[A1:.+]] = llvm.extractvalue %[[A]][1 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[A2:.+]] = llvm.extractvalue %[[A]][2 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> @@ -154,14 +154,14 @@ // CHECK: %[[ACC1:.+]] = llvm.extractvalue %[[ACC]][1 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[ACC2:.+]] = llvm.extractvalue %[[ACC]][2 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[ACC3:.+]] = llvm.extractvalue %[[ACC]][3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> -// CHECK: %[[ACC_MUL:.+]] = nvvm.wmma.mma %[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A5]], %[[A6]], %[[A7]], %[[B0]], %[[B1]], %[[B2]], %[[B3]], %[[B4]], %[[B5]], %[[B6]], %[[B7]], %[[ACC0]], %[[ACC1]], %[[ACC2]], %[[ACC3]] {eltypeA = "f16", eltypeB = "f16", k = 16 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} : (vector<2xf16>, {{.*}} -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> +// CHECK: %[[ACC_MUL:.+]] = nvvm.wmma.mma %[[A0]], %[[A1]], %[[A2]], %[[A3]], %[[A4]], %[[A5]], %[[A6]], %[[A7]], %[[B0]], %[[B1]], %[[B2]], %[[B3]], %[[B4]], %[[B5]], %[[B6]], %[[B7]], %[[ACC0]], %[[ACC1]], %[[ACC2]], %[[ACC3]] {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (vector<2xf16>, {{.*}} -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: llvm.br ^bb1(%{{.*}}, %[[ACC_MUL]] : i64, !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)>) // CHECK: ^bb3: // pred: ^bb1 // CHECK: %[[E0:.+]] = llvm.extractvalue %[[ACC]][0 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[E1:.+]] = llvm.extractvalue %[[ACC]][1 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[E2:.+]] = llvm.extractvalue %[[ACC]][2 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: %[[E3:.+]] = llvm.extractvalue %[[ACC]][3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> -// CHECK: nvvm.wmma.store %{{.*}}, %{{.*}}, %[[E0]], %[[E1]], %[[E2]], %[[E3]] {eltype = "f16", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} : !llvm.ptr, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16> +// CHECK: nvvm.wmma.store %{{.*}}, %{{.*}}, %[[E0]], %[[E1]], %[[E2]], %[[E3]] {eltype = #nvvm.mma_type, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : !llvm.ptr, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16> builtin.func @gpu_wmma_mma_loop_op(%arg0: memref<128x128xf16>, %arg1: memref<128x128xf16>, %arg2: memref<128x128xf16>) { %c0 = arith.constant 0 : index @@ -233,8 +233,7 @@ // CHECK: %[[M4:.*]] = llvm.insertvalue %[[C3]], %[[M3]][3 : i32] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> // CHECK: llvm.return %[[M4]] : !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> builtin.func @gpu_wmma_elementwise(%A : !gpu.mma_matrix<16x16xf16, "COp">, %B : !gpu.mma_matrix<16x16xf16, "COp">) ->(!gpu.mma_matrix<16x16xf16, "COp">) { - %C = gpu.subgroup_mma_elementwise %A, %B { operation = "ADDF" } : - (!gpu.mma_matrix<16x16xf16, "COp">, !gpu.mma_matrix<16x16xf16, "COp">) -> !gpu.mma_matrix<16x16xf16, "COp"> + %C = gpu.subgroup_mma_elementwise addf %A, %B : (!gpu.mma_matrix<16x16xf16, "COp">, !gpu.mma_matrix<16x16xf16, "COp">) -> !gpu.mma_matrix<16x16xf16, "COp"> return %C : !gpu.mma_matrix<16x16xf16, "COp"> } } diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir --- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir +++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir @@ -11,43 +11,43 @@ // CHECK: rocdl.workitem.id.x : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index) + %tIdX = gpu.thread_id x // CHECK: rocdl.workitem.id.y : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index) + %tIdY = gpu.thread_id y // CHECK: rocdl.workitem.id.z : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index) + %tIdZ = gpu.thread_id z // CHECK: rocdl.workgroup.dim.x : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index) + %bDimX = gpu.block_dim x // CHECK: rocdl.workgroup.dim.y : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %bDimY = "gpu.block_dim"() {dimension = "y"} : () -> (index) + %bDimY = gpu.block_dim y // CHECK: rocdl.workgroup.dim.z : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %bDimZ = "gpu.block_dim"() {dimension = "z"} : () -> (index) + %bDimZ = gpu.block_dim z // CHECK: rocdl.workgroup.id.x : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %bIdX = "gpu.block_id"() {dimension = "x"} : () -> (index) + %bIdX = gpu.block_id x // CHECK: rocdl.workgroup.id.y : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index) + %bIdY = gpu.block_id y // CHECK: rocdl.workgroup.id.z : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %bIdZ = "gpu.block_id"() {dimension = "z"} : () -> (index) + %bIdZ = gpu.block_id z // CHECK: rocdl.grid.dim.x : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %gDimX = "gpu.grid_dim"() {dimension = "x"} : () -> (index) + %gDimX = gpu.grid_dim x // CHECK: rocdl.grid.dim.y : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index) + %gDimY = gpu.grid_dim y // CHECK: rocdl.grid.dim.z : i32 // CHECK: = llvm.sext %{{.*}} : i32 to i64 - %gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index) + %gDimZ = gpu.grid_dim z std.return %tIdX, %tIdY, %tIdZ, %bDimX, %bDimY, %bDimZ, %bIdX, %bIdY, %bIdZ, %gDimX, %gDimY, %gDimZ diff --git a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir --- a/mlir/test/Conversion/GPUToSPIRV/builtins.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/builtins.mlir @@ -16,7 +16,7 @@ // CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPID]] // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]] // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}} - %0 = "gpu.block_id"() {dimension = "x"} : () -> index + %0 = gpu.block_id x gpu.return } } @@ -42,7 +42,7 @@ // CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPID]] // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]] // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}1 : i32{{\]}} - %0 = "gpu.block_id"() {dimension = "y"} : () -> index + %0 = gpu.block_id y gpu.return } } @@ -66,7 +66,7 @@ // CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[WORKGROUPID]] // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]] // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}2 : i32{{\]}} - %0 = "gpu.block_id"() {dimension = "z"} : () -> index + %0 = gpu.block_id z gpu.return } } @@ -91,7 +91,7 @@ // We may want to define gpu.workgroup_size and convert it to the entry // point ABI we want here. // CHECK: spv.Constant 32 : i32 - %0 = "gpu.block_dim"() {dimension = "x"} : () -> index + %0 = gpu.block_dim x gpu.return } } @@ -113,7 +113,7 @@ attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} { // The constant value is obtained from the spv.entry_point_abi. // CHECK: spv.Constant 4 : i32 - %0 = "gpu.block_dim"() {dimension = "y"} : () -> index + %0 = gpu.block_dim y gpu.return } } @@ -135,7 +135,7 @@ attributes {spv.entry_point_abi = {local_size = dense<[32, 4, 1]>: vector<3xi32>}} { // The constant value is obtained from the spv.entry_point_abi. // CHECK: spv.Constant 1 : i32 - %0 = "gpu.block_dim"() {dimension = "z"} : () -> index + %0 = gpu.block_dim z gpu.return } } @@ -159,7 +159,7 @@ // CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[LOCALINVOCATIONID]] // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]] // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}} - %0 = "gpu.thread_id"() {dimension = "x"} : () -> index + %0 = gpu.thread_id x gpu.return } } @@ -183,7 +183,7 @@ // CHECK: [[ADDRESS:%.*]] = spv.mlir.addressof [[NUMWORKGROUPS]] // CHECK-NEXT: [[VEC:%.*]] = spv.Load "Input" [[ADDRESS]] // CHECK-NEXT: {{%.*}} = spv.CompositeExtract [[VEC]]{{\[}}0 : i32{{\]}} - %0 = "gpu.grid_dim"() {dimension = "x"} : () -> index + %0 = gpu.grid_dim x gpu.return } } diff --git a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir --- a/mlir/test/Conversion/GPUToSPIRV/load-store.mlir +++ b/mlir/test/Conversion/GPUToSPIRV/load-store.mlir @@ -43,18 +43,18 @@ // CHECK: %[[ADDRESSLOCALINVOCATIONID:.*]] = spv.mlir.addressof @[[$LOCALINVOCATIONIDVAR]] // CHECK: %[[LOCALINVOCATIONID:.*]] = spv.Load "Input" %[[ADDRESSLOCALINVOCATIONID]] // CHECK: %[[LOCALINVOCATIONIDX:.*]] = spv.CompositeExtract %[[LOCALINVOCATIONID]]{{\[}}0 : i32{{\]}} - %0 = "gpu.block_id"() {dimension = "x"} : () -> index - %1 = "gpu.block_id"() {dimension = "y"} : () -> index - %2 = "gpu.block_id"() {dimension = "z"} : () -> index - %3 = "gpu.thread_id"() {dimension = "x"} : () -> index - %4 = "gpu.thread_id"() {dimension = "y"} : () -> index - %5 = "gpu.thread_id"() {dimension = "z"} : () -> index - %6 = "gpu.grid_dim"() {dimension = "x"} : () -> index - %7 = "gpu.grid_dim"() {dimension = "y"} : () -> index - %8 = "gpu.grid_dim"() {dimension = "z"} : () -> index - %9 = "gpu.block_dim"() {dimension = "x"} : () -> index - %10 = "gpu.block_dim"() {dimension = "y"} : () -> index - %11 = "gpu.block_dim"() {dimension = "z"} : () -> index + %0 = gpu.block_id x + %1 = gpu.block_id y + %2 = gpu.block_id z + %3 = gpu.thread_id x + %4 = gpu.thread_id y + %5 = gpu.thread_id z + %6 = gpu.grid_dim x + %7 = gpu.grid_dim y + %8 = gpu.grid_dim z + %9 = gpu.block_dim x + %10 = gpu.block_dim y + %11 = gpu.block_dim z // CHECK: %[[INDEX1:.*]] = spv.IAdd %[[ARG3]], %[[WORKGROUPIDX]] %12 = arith.addi %arg3, %0 : index // CHECK: %[[INDEX2:.*]] = spv.IAdd %[[ARG4]], %[[LOCALINVOCATIONIDX]] diff --git a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir --- a/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir +++ b/mlir/test/Conversion/VectorToGPU/vector-to-mma-ops.mlir @@ -92,7 +92,7 @@ // CHECK-DAG: %[[C0:.+]] = gpu.subgroup_mma_constant_matrix %[[CST_0]] : !gpu.mma_matrix<16x16xf16, "COp"> // CHECK-DAG: %[[C1:.+]] = gpu.subgroup_mma_constant_matrix %[[CST_1]] : !gpu.mma_matrix<16x16xf16, "COp"> // CHECK: %[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[C0]] : !gpu.mma_matrix<16x16xf16, "AOp">, !gpu.mma_matrix<16x16xf16, "BOp"> -> !gpu.mma_matrix<16x16xf16, "COp"> -// CHECK: %[[E:.+]] = gpu.subgroup_mma_elementwise %[[D]], %[[C1]] {operation = "ADDF"} : (!gpu.mma_matrix<16x16xf16, "COp">, !gpu.mma_matrix<16x16xf16, "COp">) -> !gpu.mma_matrix<16x16xf16, "COp"> +// CHECK: %[[E:.+]] = gpu.subgroup_mma_elementwise addf %[[D]], %[[C1]] : (!gpu.mma_matrix<16x16xf16, "COp">, !gpu.mma_matrix<16x16xf16, "COp">) -> !gpu.mma_matrix<16x16xf16, "COp"> // CHECK: gpu.subgroup_mma_store_matrix %[[E]], %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 16 : index} : !gpu.mma_matrix<16x16xf16, "COp">, memref<16x16xf16> func @matmul_fused_elementwise(%arg0: memref<16x16xf16>, %arg1: memref<16x16xf16>, %arg2: memref<16x16xf16>) { %cst_0 = arith.constant dense<0.000000e+00> : vector<16x16xf16> @@ -114,7 +114,7 @@ // CHECK-DAG: %[[C0:.+]] = gpu.subgroup_mma_constant_matrix %[[CST_0]] : !gpu.mma_matrix<16x16xf16, "COp"> // CHECK: %[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[C0]] : !gpu.mma_matrix<16x16xf16, "AOp">, !gpu.mma_matrix<16x16xf16, "BOp"> -> !gpu.mma_matrix<16x16xf16, "COp"> // CHECK: %[[E:.+]] = gpu.subgroup_mma_load_matrix %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}] {leadDimension = 0 : index} : memref<16x16x16x16xf16> -> !gpu.mma_matrix<16x16xf16, "COp"> -// CHECK: %[[F:.+]] = gpu.subgroup_mma_elementwise %[[D]], %[[E]] {operation = "DIVF"} : (!gpu.mma_matrix<16x16xf16, "COp">, !gpu.mma_matrix<16x16xf16, "COp">) -> !gpu.mma_matrix<16x16xf16, "COp"> +// CHECK: %[[F:.+]] = gpu.subgroup_mma_elementwise divf %[[D]], %[[E]] : (!gpu.mma_matrix<16x16xf16, "COp">, !gpu.mma_matrix<16x16xf16, "COp">) -> !gpu.mma_matrix<16x16xf16, "COp"> // CHECK: gpu.subgroup_mma_store_matrix %[[F]], %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 16 : index} : !gpu.mma_matrix<16x16xf16, "COp">, memref<16x16xf16> func @matmul_fused_broadcast(%arg0: memref<16x16xf16>, %arg1: memref<16x16xf16>, %arg2: memref<16x16xf16>, %arg3: memref<16x16x16x16xf16>) { diff --git a/mlir/test/Dialect/GPU/all-reduce-max.mlir b/mlir/test/Dialect/GPU/all-reduce-max.mlir --- a/mlir/test/Dialect/GPU/all-reduce-max.mlir +++ b/mlir/test/Dialect/GPU/all-reduce-max.mlir @@ -16,17 +16,17 @@ // CHECK-DAG: [[VAL_8:%.*]] = arith.constant 4 : i32 // CHECK-DAG: [[VAL_9:%.*]] = arith.constant 8 : i32 // CHECK-DAG: [[VAL_10:%.*]] = arith.constant 16 : i32 - // CHECK: [[VAL_11:%.*]] = "gpu.block_dim"() {dimension = "x"} : () -> index + // CHECK: [[VAL_11:%.*]] = gpu.block_dim x // CHECK: [[VAL_12:%.*]] = arith.index_cast [[VAL_11]] : index to i32 - // CHECK: [[VAL_13:%.*]] = "gpu.block_dim"() {dimension = "y"} : () -> index + // CHECK: [[VAL_13:%.*]] = gpu.block_dim y // CHECK: [[VAL_14:%.*]] = arith.index_cast [[VAL_13]] : index to i32 - // CHECK: [[VAL_15:%.*]] = "gpu.block_dim"() {dimension = "z"} : () -> index + // CHECK: [[VAL_15:%.*]] = gpu.block_dim z // CHECK: [[VAL_16:%.*]] = arith.index_cast [[VAL_15]] : index to i32 - // CHECK: [[VAL_17:%.*]] = "gpu.thread_id"() {dimension = "x"} : () -> index + // CHECK: [[VAL_17:%.*]] = gpu.thread_id x // CHECK: [[VAL_18:%.*]] = arith.index_cast [[VAL_17]] : index to i32 - // CHECK: [[VAL_19:%.*]] = "gpu.thread_id"() {dimension = "y"} : () -> index + // CHECK: [[VAL_19:%.*]] = gpu.thread_id y // CHECK: [[VAL_20:%.*]] = arith.index_cast [[VAL_19]] : index to i32 - // CHECK: [[VAL_21:%.*]] = "gpu.thread_id"() {dimension = "z"} : () -> index + // CHECK: [[VAL_21:%.*]] = gpu.thread_id z // CHECK: [[VAL_22:%.*]] = arith.index_cast [[VAL_21]] : index to i32 // CHECK: [[VAL_23:%.*]] = arith.muli [[VAL_22]], [[VAL_14]] : i32 // CHECK: [[VAL_24:%.*]] = arith.addi [[VAL_23]], [[VAL_20]] : i32 @@ -41,7 +41,7 @@ // CHECK: [[VAL_33:%.*]] = arith.cmpi slt, [[VAL_32]], [[VAL_5]] : i32 // CHECK: cond_br [[VAL_33]], ^bb1, ^bb17 // CHECK: ^bb1: - // CHECK: [[VAL_34:%.*]], [[VAL_35:%.*]] = gpu.shuffle [[VAL_0]], [[VAL_6]], [[VAL_32]] xor : f32 + // CHECK: [[VAL_34:%.*]], [[VAL_35:%.*]] = gpu.shuffle xor [[VAL_0]], [[VAL_6]], [[VAL_32]] : f32 // CHECK: cond_br [[VAL_35]], ^bb2, ^bb3 // CHECK: ^bb2: // CHECK: [[VAL_36:%.*]] = arith.cmpf ugt, [[VAL_0]], [[VAL_34]] : f32 @@ -50,7 +50,7 @@ // CHECK: ^bb3: // CHECK: br ^bb4([[VAL_0]] : f32) // CHECK: ^bb4([[VAL_38:%.*]]: f32): - // CHECK: [[VAL_39:%.*]], [[VAL_40:%.*]] = gpu.shuffle [[VAL_38]], [[VAL_7]], [[VAL_32]] xor : f32 + // CHECK: [[VAL_39:%.*]], [[VAL_40:%.*]] = gpu.shuffle xor [[VAL_38]], [[VAL_7]], [[VAL_32]] : f32 // CHECK: cond_br [[VAL_40]], ^bb5, ^bb6 // CHECK: ^bb5: // CHECK: [[VAL_41:%.*]] = arith.cmpf ugt, [[VAL_38]], [[VAL_39]] : f32 @@ -59,7 +59,7 @@ // CHECK: ^bb6: // CHECK: br ^bb7([[VAL_38]] : f32) // CHECK: ^bb7([[VAL_43:%.*]]: f32): - // CHECK: [[VAL_44:%.*]], [[VAL_45:%.*]] = gpu.shuffle [[VAL_43]], [[VAL_8]], [[VAL_32]] xor : f32 + // CHECK: [[VAL_44:%.*]], [[VAL_45:%.*]] = gpu.shuffle xor [[VAL_43]], [[VAL_8]], [[VAL_32]] : f32 // CHECK: cond_br [[VAL_45]], ^bb8, ^bb9 // CHECK: ^bb8: // CHECK: [[VAL_46:%.*]] = arith.cmpf ugt, [[VAL_43]], [[VAL_44]] : f32 @@ -68,7 +68,7 @@ // CHECK: ^bb9: // CHECK: br ^bb10([[VAL_43]] : f32) // CHECK: ^bb10([[VAL_48:%.*]]: f32): - // CHECK: [[VAL_49:%.*]], [[VAL_50:%.*]] = gpu.shuffle [[VAL_48]], [[VAL_9]], [[VAL_32]] xor : f32 + // CHECK: [[VAL_49:%.*]], [[VAL_50:%.*]] = gpu.shuffle xor [[VAL_48]], [[VAL_9]], [[VAL_32]] : f32 // CHECK: cond_br [[VAL_50]], ^bb11, ^bb12 // CHECK: ^bb11: // CHECK: [[VAL_51:%.*]] = arith.cmpf ugt, [[VAL_48]], [[VAL_49]] : f32 @@ -77,7 +77,7 @@ // CHECK: ^bb12: // CHECK: br ^bb13([[VAL_48]] : f32) // CHECK: ^bb13([[VAL_53:%.*]]: f32): - // CHECK: [[VAL_54:%.*]], [[VAL_55:%.*]] = gpu.shuffle [[VAL_53]], [[VAL_10]], [[VAL_32]] xor : f32 + // CHECK: [[VAL_54:%.*]], [[VAL_55:%.*]] = gpu.shuffle xor [[VAL_53]], [[VAL_10]], [[VAL_32]] : f32 // CHECK: cond_br [[VAL_55]], ^bb14, ^bb15 // CHECK: ^bb14: // CHECK: [[VAL_56:%.*]] = arith.cmpf ugt, [[VAL_53]], [[VAL_54]] : f32 @@ -88,19 +88,19 @@ // CHECK: ^bb16([[VAL_58:%.*]]: f32): // CHECK: br ^bb18([[VAL_58]] : f32) // CHECK: ^bb17: - // CHECK: [[VAL_59:%.*]], [[VAL_60:%.*]] = gpu.shuffle [[VAL_0]], [[VAL_6]], [[VAL_5]] xor : f32 + // CHECK: [[VAL_59:%.*]], [[VAL_60:%.*]] = gpu.shuffle xor [[VAL_0]], [[VAL_6]], [[VAL_5]] : f32 // CHECK: [[VAL_61:%.*]] = arith.cmpf ugt, [[VAL_0]], [[VAL_59]] : f32 // CHECK: [[VAL_62:%.*]] = select [[VAL_61]], [[VAL_0]], [[VAL_59]] : f32 - // CHECK: [[VAL_63:%.*]], [[VAL_64:%.*]] = gpu.shuffle [[VAL_62]], [[VAL_7]], [[VAL_5]] xor : f32 + // CHECK: [[VAL_63:%.*]], [[VAL_64:%.*]] = gpu.shuffle xor [[VAL_62]], [[VAL_7]], [[VAL_5]] : f32 // CHECK: [[VAL_65:%.*]] = arith.cmpf ugt, [[VAL_62]], [[VAL_63]] : f32 // CHECK: [[VAL_66:%.*]] = select [[VAL_65]], [[VAL_62]], [[VAL_63]] : f32 - // CHECK: [[VAL_67:%.*]], [[VAL_68:%.*]] = gpu.shuffle [[VAL_66]], [[VAL_8]], [[VAL_5]] xor : f32 + // CHECK: [[VAL_67:%.*]], [[VAL_68:%.*]] = gpu.shuffle xor [[VAL_66]], [[VAL_8]], [[VAL_5]] : f32 // CHECK: [[VAL_69:%.*]] = arith.cmpf ugt, [[VAL_66]], [[VAL_67]] : f32 // CHECK: [[VAL_70:%.*]] = select [[VAL_69]], [[VAL_66]], [[VAL_67]] : f32 - // CHECK: [[VAL_71:%.*]], [[VAL_72:%.*]] = gpu.shuffle [[VAL_70]], [[VAL_9]], [[VAL_5]] xor : f32 + // CHECK: [[VAL_71:%.*]], [[VAL_72:%.*]] = gpu.shuffle xor [[VAL_70]], [[VAL_9]], [[VAL_5]] : f32 // CHECK: [[VAL_73:%.*]] = arith.cmpf ugt, [[VAL_70]], [[VAL_71]] : f32 // CHECK: [[VAL_74:%.*]] = select [[VAL_73]], [[VAL_70]], [[VAL_71]] : f32 - // CHECK: [[VAL_75:%.*]], [[VAL_76:%.*]] = gpu.shuffle [[VAL_74]], [[VAL_10]], [[VAL_5]] xor : f32 + // CHECK: [[VAL_75:%.*]], [[VAL_76:%.*]] = gpu.shuffle xor [[VAL_74]], [[VAL_10]], [[VAL_5]] : f32 // CHECK: [[VAL_77:%.*]] = arith.cmpf ugt, [[VAL_74]], [[VAL_75]] : f32 // CHECK: [[VAL_78:%.*]] = select [[VAL_77]], [[VAL_74]], [[VAL_75]] : f32 // CHECK: br ^bb18([[VAL_78]] : f32) @@ -125,7 +125,7 @@ // CHECK: [[VAL_87:%.*]] = arith.cmpi slt, [[VAL_83]], [[VAL_5]] : i32 // CHECK: cond_br [[VAL_87]], ^bb23, ^bb39 // CHECK: ^bb23: - // CHECK: [[VAL_88:%.*]], [[VAL_89:%.*]] = gpu.shuffle [[VAL_86]], [[VAL_6]], [[VAL_83]] xor : f32 + // CHECK: [[VAL_88:%.*]], [[VAL_89:%.*]] = gpu.shuffle xor [[VAL_86]], [[VAL_6]], [[VAL_83]] : f32 // CHECK: cond_br [[VAL_89]], ^bb24, ^bb25 // CHECK: ^bb24: // CHECK: [[VAL_90:%.*]] = arith.cmpf ugt, [[VAL_86]], [[VAL_88]] : f32 @@ -134,7 +134,7 @@ // CHECK: ^bb25: // CHECK: br ^bb26([[VAL_86]] : f32) // CHECK: ^bb26([[VAL_92:%.*]]: f32): - // CHECK: [[VAL_93:%.*]], [[VAL_94:%.*]] = gpu.shuffle [[VAL_92]], [[VAL_7]], [[VAL_83]] xor : f32 + // CHECK: [[VAL_93:%.*]], [[VAL_94:%.*]] = gpu.shuffle xor [[VAL_92]], [[VAL_7]], [[VAL_83]] : f32 // CHECK: cond_br [[VAL_94]], ^bb27, ^bb28 // CHECK: ^bb27: // CHECK: [[VAL_95:%.*]] = arith.cmpf ugt, [[VAL_92]], [[VAL_93]] : f32 @@ -143,7 +143,7 @@ // CHECK: ^bb28: // CHECK: br ^bb29([[VAL_92]] : f32) // CHECK: ^bb29([[VAL_97:%.*]]: f32): - // CHECK: [[VAL_98:%.*]], [[VAL_99:%.*]] = gpu.shuffle [[VAL_97]], [[VAL_8]], [[VAL_83]] xor : f32 + // CHECK: [[VAL_98:%.*]], [[VAL_99:%.*]] = gpu.shuffle xor [[VAL_97]], [[VAL_8]], [[VAL_83]] : f32 // CHECK: cond_br [[VAL_99]], ^bb30, ^bb31 // CHECK: ^bb30: // CHECK: [[VAL_100:%.*]] = arith.cmpf ugt, [[VAL_97]], [[VAL_98]] : f32 @@ -152,7 +152,7 @@ // CHECK: ^bb31: // CHECK: br ^bb32([[VAL_97]] : f32) // CHECK: ^bb32([[VAL_102:%.*]]: f32): - // CHECK: [[VAL_103:%.*]], [[VAL_104:%.*]] = gpu.shuffle [[VAL_102]], [[VAL_9]], [[VAL_83]] xor : f32 + // CHECK: [[VAL_103:%.*]], [[VAL_104:%.*]] = gpu.shuffle xor [[VAL_102]], [[VAL_9]], [[VAL_83]] : f32 // CHECK: cond_br [[VAL_104]], ^bb33, ^bb34 // CHECK: ^bb33: // CHECK: [[VAL_105:%.*]] = arith.cmpf ugt, [[VAL_102]], [[VAL_103]] : f32 @@ -161,7 +161,7 @@ // CHECK: ^bb34: // CHECK: br ^bb35([[VAL_102]] : f32) // CHECK: ^bb35([[VAL_107:%.*]]: f32): - // CHECK: [[VAL_108:%.*]], [[VAL_109:%.*]] = gpu.shuffle [[VAL_107]], [[VAL_10]], [[VAL_83]] xor : f32 + // CHECK: [[VAL_108:%.*]], [[VAL_109:%.*]] = gpu.shuffle xor [[VAL_107]], [[VAL_10]], [[VAL_83]] : f32 // CHECK: cond_br [[VAL_109]], ^bb36, ^bb37 // CHECK: ^bb36: // CHECK: [[VAL_110:%.*]] = arith.cmpf ugt, [[VAL_107]], [[VAL_108]] : f32 @@ -172,19 +172,19 @@ // CHECK: ^bb38([[VAL_112:%.*]]: f32): // CHECK: br ^bb40([[VAL_112]] : f32) // CHECK: ^bb39: - // CHECK: [[VAL_113:%.*]], [[VAL_114:%.*]] = gpu.shuffle [[VAL_86]], [[VAL_6]], [[VAL_5]] xor : f32 + // CHECK: [[VAL_113:%.*]], [[VAL_114:%.*]] = gpu.shuffle xor [[VAL_86]], [[VAL_6]], [[VAL_5]] : f32 // CHECK: [[VAL_115:%.*]] = arith.cmpf ugt, [[VAL_86]], [[VAL_113]] : f32 // CHECK: [[VAL_116:%.*]] = select [[VAL_115]], [[VAL_86]], [[VAL_113]] : f32 - // CHECK: [[VAL_117:%.*]], [[VAL_118:%.*]] = gpu.shuffle [[VAL_116]], [[VAL_7]], [[VAL_5]] xor : f32 + // CHECK: [[VAL_117:%.*]], [[VAL_118:%.*]] = gpu.shuffle xor [[VAL_116]], [[VAL_7]], [[VAL_5]] : f32 // CHECK: [[VAL_119:%.*]] = arith.cmpf ugt, [[VAL_116]], [[VAL_117]] : f32 // CHECK: [[VAL_120:%.*]] = select [[VAL_119]], [[VAL_116]], [[VAL_117]] : f32 - // CHECK: [[VAL_121:%.*]], [[VAL_122:%.*]] = gpu.shuffle [[VAL_120]], [[VAL_8]], [[VAL_5]] xor : f32 + // CHECK: [[VAL_121:%.*]], [[VAL_122:%.*]] = gpu.shuffle xor [[VAL_120]], [[VAL_8]], [[VAL_5]] : f32 // CHECK: [[VAL_123:%.*]] = arith.cmpf ugt, [[VAL_120]], [[VAL_121]] : f32 // CHECK: [[VAL_124:%.*]] = select [[VAL_123]], [[VAL_120]], [[VAL_121]] : f32 - // CHECK: [[VAL_125:%.*]], [[VAL_126:%.*]] = gpu.shuffle [[VAL_124]], [[VAL_9]], [[VAL_5]] xor : f32 + // CHECK: [[VAL_125:%.*]], [[VAL_126:%.*]] = gpu.shuffle xor [[VAL_124]], [[VAL_9]], [[VAL_5]] : f32 // CHECK: [[VAL_127:%.*]] = arith.cmpf ugt, [[VAL_124]], [[VAL_125]] : f32 // CHECK: [[VAL_128:%.*]] = select [[VAL_127]], [[VAL_124]], [[VAL_125]] : f32 - // CHECK: [[VAL_129:%.*]], [[VAL_130:%.*]] = gpu.shuffle [[VAL_128]], [[VAL_10]], [[VAL_5]] xor : f32 + // CHECK: [[VAL_129:%.*]], [[VAL_130:%.*]] = gpu.shuffle xor [[VAL_128]], [[VAL_10]], [[VAL_5]] : f32 // CHECK: [[VAL_131:%.*]] = arith.cmpf ugt, [[VAL_128]], [[VAL_129]] : f32 // CHECK: [[VAL_132:%.*]] = select [[VAL_131]], [[VAL_128]], [[VAL_129]] : f32 // CHECK: br ^bb40([[VAL_132]] : f32) @@ -195,7 +195,7 @@ // CHECK: br ^bb42 // CHECK: ^bb42: // CHECK: gpu.barrier - %sum = "gpu.all_reduce"(%arg0) ({}) {op = "max"} : (f32) -> (f32) + %sum = gpu.all_reduce max %arg0 {} : (f32) -> (f32) gpu.return } diff --git a/mlir/test/Dialect/GPU/all-reduce.mlir b/mlir/test/Dialect/GPU/all-reduce.mlir --- a/mlir/test/Dialect/GPU/all-reduce.mlir +++ b/mlir/test/Dialect/GPU/all-reduce.mlir @@ -16,17 +16,17 @@ // CHECK-DAG: [[VAL_8:%.*]] = arith.constant 4 : i32 // CHECK-DAG: [[VAL_9:%.*]] = arith.constant 8 : i32 // CHECK-DAG: [[VAL_10:%.*]] = arith.constant 16 : i32 - // CHECK: [[VAL_11:%.*]] = "gpu.block_dim"() {dimension = "x"} : () -> index + // CHECK: [[VAL_11:%.*]] = gpu.block_dim x // CHECK: [[VAL_12:%.*]] = arith.index_cast [[VAL_11]] : index to i32 - // CHECK: [[VAL_13:%.*]] = "gpu.block_dim"() {dimension = "y"} : () -> index + // CHECK: [[VAL_13:%.*]] = gpu.block_dim y // CHECK: [[VAL_14:%.*]] = arith.index_cast [[VAL_13]] : index to i32 - // CHECK: [[VAL_15:%.*]] = "gpu.block_dim"() {dimension = "z"} : () -> index + // CHECK: [[VAL_15:%.*]] = gpu.block_dim z // CHECK: [[VAL_16:%.*]] = arith.index_cast [[VAL_15]] : index to i32 - // CHECK: [[VAL_17:%.*]] = "gpu.thread_id"() {dimension = "x"} : () -> index + // CHECK: [[VAL_17:%.*]] = gpu.thread_id x // CHECK: [[VAL_18:%.*]] = arith.index_cast [[VAL_17]] : index to i32 - // CHECK: [[VAL_19:%.*]] = "gpu.thread_id"() {dimension = "y"} : () -> index + // CHECK: [[VAL_19:%.*]] = gpu.thread_id y // CHECK: [[VAL_20:%.*]] = arith.index_cast [[VAL_19]] : index to i32 - // CHECK: [[VAL_21:%.*]] = "gpu.thread_id"() {dimension = "z"} : () -> index + // CHECK: [[VAL_21:%.*]] = gpu.thread_id z // CHECK: [[VAL_22:%.*]] = arith.index_cast [[VAL_21]] : index to i32 // CHECK: [[VAL_23:%.*]] = arith.muli [[VAL_22]], [[VAL_14]] : i32 // CHECK: [[VAL_24:%.*]] = arith.addi [[VAL_23]], [[VAL_20]] : i32 @@ -41,7 +41,7 @@ // CHECK: [[VAL_33:%.*]] = arith.cmpi slt, [[VAL_32]], [[VAL_5]] : i32 // CHECK: cond_br [[VAL_33]], ^bb1, ^bb17 // CHECK: ^bb1: - // CHECK: [[VAL_34:%.*]], [[VAL_35:%.*]] = gpu.shuffle [[VAL_0]], [[VAL_6]], [[VAL_32]] xor : f32 + // CHECK: [[VAL_34:%.*]], [[VAL_35:%.*]] = gpu.shuffle xor [[VAL_0]], [[VAL_6]], [[VAL_32]] : f32 // CHECK: cond_br [[VAL_35]], ^bb2, ^bb3 // CHECK: ^bb2: // CHECK: [[VAL_36:%.*]] = arith.addf [[VAL_0]], [[VAL_34]] : f32 @@ -49,7 +49,7 @@ // CHECK: ^bb3: // CHECK: br ^bb4([[VAL_0]] : f32) // CHECK: ^bb4([[VAL_37:%.*]]: f32): - // CHECK: [[VAL_38:%.*]], [[VAL_39:%.*]] = gpu.shuffle [[VAL_37]], [[VAL_7]], [[VAL_32]] xor : f32 + // CHECK: [[VAL_38:%.*]], [[VAL_39:%.*]] = gpu.shuffle xor [[VAL_37]], [[VAL_7]], [[VAL_32]] : f32 // CHECK: cond_br [[VAL_39]], ^bb5, ^bb6 // CHECK: ^bb5: // CHECK: [[VAL_40:%.*]] = arith.addf [[VAL_37]], [[VAL_38]] : f32 @@ -57,7 +57,7 @@ // CHECK: ^bb6: // CHECK: br ^bb7([[VAL_37]] : f32) // CHECK: ^bb7([[VAL_41:%.*]]: f32): - // CHECK: [[VAL_42:%.*]], [[VAL_43:%.*]] = gpu.shuffle [[VAL_41]], [[VAL_8]], [[VAL_32]] xor : f32 + // CHECK: [[VAL_42:%.*]], [[VAL_43:%.*]] = gpu.shuffle xor [[VAL_41]], [[VAL_8]], [[VAL_32]] : f32 // CHECK: cond_br [[VAL_43]], ^bb8, ^bb9 // CHECK: ^bb8: // CHECK: [[VAL_44:%.*]] = arith.addf [[VAL_41]], [[VAL_42]] : f32 @@ -65,7 +65,7 @@ // CHECK: ^bb9: // CHECK: br ^bb10([[VAL_41]] : f32) // CHECK: ^bb10([[VAL_45:%.*]]: f32): - // CHECK: [[VAL_46:%.*]], [[VAL_47:%.*]] = gpu.shuffle [[VAL_45]], [[VAL_9]], [[VAL_32]] xor : f32 + // CHECK: [[VAL_46:%.*]], [[VAL_47:%.*]] = gpu.shuffle xor [[VAL_45]], [[VAL_9]], [[VAL_32]] : f32 // CHECK: cond_br [[VAL_47]], ^bb11, ^bb12 // CHECK: ^bb11: // CHECK: [[VAL_48:%.*]] = arith.addf [[VAL_45]], [[VAL_46]] : f32 @@ -73,7 +73,7 @@ // CHECK: ^bb12: // CHECK: br ^bb13([[VAL_45]] : f32) // CHECK: ^bb13([[VAL_49:%.*]]: f32): - // CHECK: [[VAL_50:%.*]], [[VAL_51:%.*]] = gpu.shuffle [[VAL_49]], [[VAL_10]], [[VAL_32]] xor : f32 + // CHECK: [[VAL_50:%.*]], [[VAL_51:%.*]] = gpu.shuffle xor [[VAL_49]], [[VAL_10]], [[VAL_32]] : f32 // CHECK: cond_br [[VAL_51]], ^bb14, ^bb15 // CHECK: ^bb14: // CHECK: [[VAL_52:%.*]] = arith.addf [[VAL_49]], [[VAL_50]] : f32 @@ -83,15 +83,15 @@ // CHECK: ^bb16([[VAL_53:%.*]]: f32): // CHECK: br ^bb18([[VAL_53]] : f32) // CHECK: ^bb17: - // CHECK: [[VAL_54:%.*]], [[VAL_55:%.*]] = gpu.shuffle [[VAL_0]], [[VAL_6]], [[VAL_5]] xor : f32 + // CHECK: [[VAL_54:%.*]], [[VAL_55:%.*]] = gpu.shuffle xor [[VAL_0]], [[VAL_6]], [[VAL_5]] : f32 // CHECK: [[VAL_56:%.*]] = arith.addf [[VAL_0]], [[VAL_54]] : f32 - // CHECK: [[VAL_57:%.*]], [[VAL_58:%.*]] = gpu.shuffle [[VAL_56]], [[VAL_7]], [[VAL_5]] xor : f32 + // CHECK: [[VAL_57:%.*]], [[VAL_58:%.*]] = gpu.shuffle xor [[VAL_56]], [[VAL_7]], [[VAL_5]] : f32 // CHECK: [[VAL_59:%.*]] = arith.addf [[VAL_56]], [[VAL_57]] : f32 - // CHECK: [[VAL_60:%.*]], [[VAL_61:%.*]] = gpu.shuffle [[VAL_59]], [[VAL_8]], [[VAL_5]] xor : f32 + // CHECK: [[VAL_60:%.*]], [[VAL_61:%.*]] = gpu.shuffle xor [[VAL_59]], [[VAL_8]], [[VAL_5]] : f32 // CHECK: [[VAL_62:%.*]] = arith.addf [[VAL_59]], [[VAL_60]] : f32 - // CHECK: [[VAL_63:%.*]], [[VAL_64:%.*]] = gpu.shuffle [[VAL_62]], [[VAL_9]], [[VAL_5]] xor : f32 + // CHECK: [[VAL_63:%.*]], [[VAL_64:%.*]] = gpu.shuffle xor [[VAL_62]], [[VAL_9]], [[VAL_5]] : f32 // CHECK: [[VAL_65:%.*]] = arith.addf [[VAL_62]], [[VAL_63]] : f32 - // CHECK: [[VAL_66:%.*]], [[VAL_67:%.*]] = gpu.shuffle [[VAL_65]], [[VAL_10]], [[VAL_5]] xor : f32 + // CHECK: [[VAL_66:%.*]], [[VAL_67:%.*]] = gpu.shuffle xor [[VAL_65]], [[VAL_10]], [[VAL_5]] : f32 // CHECK: [[VAL_68:%.*]] = arith.addf [[VAL_65]], [[VAL_66]] : f32 // CHECK: br ^bb18([[VAL_68]] : f32) // CHECK: ^bb18([[VAL_69:%.*]]: f32): @@ -115,7 +115,7 @@ // CHECK: [[VAL_77:%.*]] = arith.cmpi slt, [[VAL_73]], [[VAL_5]] : i32 // CHECK: cond_br [[VAL_77]], ^bb23, ^bb39 // CHECK: ^bb23: - // CHECK: [[VAL_78:%.*]], [[VAL_79:%.*]] = gpu.shuffle [[VAL_76]], [[VAL_6]], [[VAL_73]] xor : f32 + // CHECK: [[VAL_78:%.*]], [[VAL_79:%.*]] = gpu.shuffle xor [[VAL_76]], [[VAL_6]], [[VAL_73]] : f32 // CHECK: cond_br [[VAL_79]], ^bb24, ^bb25 // CHECK: ^bb24: // CHECK: [[VAL_80:%.*]] = arith.addf [[VAL_76]], [[VAL_78]] : f32 @@ -123,7 +123,7 @@ // CHECK: ^bb25: // CHECK: br ^bb26([[VAL_76]] : f32) // CHECK: ^bb26([[VAL_81:%.*]]: f32): - // CHECK: [[VAL_82:%.*]], [[VAL_83:%.*]] = gpu.shuffle [[VAL_81]], [[VAL_7]], [[VAL_73]] xor : f32 + // CHECK: [[VAL_82:%.*]], [[VAL_83:%.*]] = gpu.shuffle xor [[VAL_81]], [[VAL_7]], [[VAL_73]] : f32 // CHECK: cond_br [[VAL_83]], ^bb27, ^bb28 // CHECK: ^bb27: // CHECK: [[VAL_84:%.*]] = arith.addf [[VAL_81]], [[VAL_82]] : f32 @@ -131,7 +131,7 @@ // CHECK: ^bb28: // CHECK: br ^bb29([[VAL_81]] : f32) // CHECK: ^bb29([[VAL_85:%.*]]: f32): - // CHECK: [[VAL_86:%.*]], [[VAL_87:%.*]] = gpu.shuffle [[VAL_85]], [[VAL_8]], [[VAL_73]] xor : f32 + // CHECK: [[VAL_86:%.*]], [[VAL_87:%.*]] = gpu.shuffle xor [[VAL_85]], [[VAL_8]], [[VAL_73]] : f32 // CHECK: cond_br [[VAL_87]], ^bb30, ^bb31 // CHECK: ^bb30: // CHECK: [[VAL_88:%.*]] = arith.addf [[VAL_85]], [[VAL_86]] : f32 @@ -139,7 +139,7 @@ // CHECK: ^bb31: // CHECK: br ^bb32([[VAL_85]] : f32) // CHECK: ^bb32([[VAL_89:%.*]]: f32): - // CHECK: [[VAL_90:%.*]], [[VAL_91:%.*]] = gpu.shuffle [[VAL_89]], [[VAL_9]], [[VAL_73]] xor : f32 + // CHECK: [[VAL_90:%.*]], [[VAL_91:%.*]] = gpu.shuffle xor [[VAL_89]], [[VAL_9]], [[VAL_73]] : f32 // CHECK: cond_br [[VAL_91]], ^bb33, ^bb34 // CHECK: ^bb33: // CHECK: [[VAL_92:%.*]] = arith.addf [[VAL_89]], [[VAL_90]] : f32 @@ -147,7 +147,7 @@ // CHECK: ^bb34: // CHECK: br ^bb35([[VAL_89]] : f32) // CHECK: ^bb35([[VAL_93:%.*]]: f32): - // CHECK: [[VAL_94:%.*]], [[VAL_95:%.*]] = gpu.shuffle [[VAL_93]], [[VAL_10]], [[VAL_73]] xor : f32 + // CHECK: [[VAL_94:%.*]], [[VAL_95:%.*]] = gpu.shuffle xor [[VAL_93]], [[VAL_10]], [[VAL_73]] : f32 // CHECK: cond_br [[VAL_95]], ^bb36, ^bb37 // CHECK: ^bb36: // CHECK: [[VAL_96:%.*]] = arith.addf [[VAL_93]], [[VAL_94]] : f32 @@ -157,15 +157,15 @@ // CHECK: ^bb38([[VAL_97:%.*]]: f32): // CHECK: br ^bb40([[VAL_97]] : f32) // CHECK: ^bb39: - // CHECK: [[VAL_98:%.*]], [[VAL_99:%.*]] = gpu.shuffle [[VAL_76]], [[VAL_6]], [[VAL_5]] xor : f32 + // CHECK: [[VAL_98:%.*]], [[VAL_99:%.*]] = gpu.shuffle xor [[VAL_76]], [[VAL_6]], [[VAL_5]] : f32 // CHECK: [[VAL_100:%.*]] = arith.addf [[VAL_76]], [[VAL_98]] : f32 - // CHECK: [[VAL_101:%.*]], [[VAL_102:%.*]] = gpu.shuffle [[VAL_100]], [[VAL_7]], [[VAL_5]] xor : f32 + // CHECK: [[VAL_101:%.*]], [[VAL_102:%.*]] = gpu.shuffle xor [[VAL_100]], [[VAL_7]], [[VAL_5]] : f32 // CHECK: [[VAL_103:%.*]] = arith.addf [[VAL_100]], [[VAL_101]] : f32 - // CHECK: [[VAL_104:%.*]], [[VAL_105:%.*]] = gpu.shuffle [[VAL_103]], [[VAL_8]], [[VAL_5]] xor : f32 + // CHECK: [[VAL_104:%.*]], [[VAL_105:%.*]] = gpu.shuffle xor [[VAL_103]], [[VAL_8]], [[VAL_5]] : f32 // CHECK: [[VAL_106:%.*]] = arith.addf [[VAL_103]], [[VAL_104]] : f32 - // CHECK: [[VAL_107:%.*]], [[VAL_108:%.*]] = gpu.shuffle [[VAL_106]], [[VAL_9]], [[VAL_5]] xor : f32 + // CHECK: [[VAL_107:%.*]], [[VAL_108:%.*]] = gpu.shuffle xor [[VAL_106]], [[VAL_9]], [[VAL_5]] : f32 // CHECK: [[VAL_109:%.*]] = arith.addf [[VAL_106]], [[VAL_107]] : f32 - // CHECK: [[VAL_110:%.*]], [[VAL_111:%.*]] = gpu.shuffle [[VAL_109]], [[VAL_10]], [[VAL_5]] xor : f32 + // CHECK: [[VAL_110:%.*]], [[VAL_111:%.*]] = gpu.shuffle xor [[VAL_109]], [[VAL_10]], [[VAL_5]] : f32 // CHECK: [[VAL_112:%.*]] = arith.addf [[VAL_109]], [[VAL_110]] : f32 // CHECK: br ^bb40([[VAL_112]] : f32) // CHECK: ^bb40([[VAL_113:%.*]]: f32): @@ -175,7 +175,7 @@ // CHECK: br ^bb42 // CHECK: ^bb42: // CHECK: gpu.barrier - %sum = "gpu.all_reduce"(%arg0) ({}) {op = "add"} : (f32) -> (f32) + %sum = gpu.all_reduce add %arg0 {} : (f32) -> (f32) gpu.return } diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir --- a/mlir/test/Dialect/GPU/invalid.mlir +++ b/mlir/test/Dialect/GPU/invalid.mlir @@ -194,42 +194,6 @@ // ----- -func @illegal_dimension() { - // expected-error@+1 {{dimension "o" is invalid}} - %tIdX = "gpu.thread_id"() {dimension = "o"} : () -> (index) - - return -} - -// ----- - -func @illegal_dimension() { - // expected-error@+1 {{dimension "o" is invalid}} - %bDimX = "gpu.block_dim"() {dimension = "o"} : () -> (index) - - return -} - -// ----- - -func @illegal_dimension() { - // expected-error@+1 {{dimension "o" is invalid}} - %bIdX = "gpu.block_id"() {dimension = "o"} : () -> (index) - - return -} - -// ----- - -func @illegal_dimension() { - // expected-error@+1 {{dimension "o" is invalid}} - %gDimX = "gpu.grid_dim"() {dimension = "o"} : () -> (index) - - return -} - -// ----- - func @reduce_no_op_no_body(%arg0 : f32) { // expected-error@+1 {{expected either an op attribute or a non-empty body}} %res = "gpu.all_reduce"(%arg0) ({}) : (f32) -> (f32) @@ -243,15 +207,15 @@ %res = "gpu.all_reduce"(%arg0) ({ ^bb(%lhs : f32, %rhs : f32): "gpu.yield"(%lhs) : (f32) -> () - }) {op = "add"} : (f32) -> (f32) + }) {op = #gpu<"all_reduce_op add">} : (f32) -> (f32) return } // ----- func @reduce_invalid_op(%arg0 : f32) { - // expected-error@+1 {{attribute 'op' failed to satisfy constraint}} - %res = "gpu.all_reduce"(%arg0) ({}) {op = "foo"} : (f32) -> (f32) + // expected-error@+1 {{invalid op kind}} + %res = gpu.all_reduce foo %arg0 {} : (f32) -> (f32) return } @@ -259,7 +223,7 @@ func @reduce_invalid_op_type(%arg0 : f32) { // expected-error@+1 {{`and` accumulator is only compatible with Integer type}} - %res = "gpu.all_reduce"(%arg0) ({}) {op = "and"} : (f32) -> (f32) + %res = gpu.all_reduce and %arg0 {} : (f32) -> (f32) return } @@ -267,10 +231,10 @@ func @reduce_incorrect_region_arguments(%arg0 : f32) { // expected-error@+1 {{expected two region arguments}} - %res = "gpu.all_reduce"(%arg0) ({ + %res = gpu.all_reduce %arg0 { ^bb(%lhs : f32): "gpu.yield"(%lhs) : (f32) -> () - }) : (f32) -> (f32) + } : (f32) -> (f32) return } @@ -278,10 +242,10 @@ func @reduce_incorrect_region_arguments(%arg0 : f32) { // expected-error@+1 {{incorrect region argument type}} - %res = "gpu.all_reduce"(%arg0) ({ + %res = gpu.all_reduce %arg0 { ^bb(%lhs : f32, %rhs : i32): "gpu.yield"(%lhs) : (f32) -> () - }) : (f32) -> (f32) + } : (f32) -> (f32) return } @@ -289,10 +253,10 @@ func @reduce_incorrect_yield(%arg0 : f32) { // expected-error@+1 {{expected one gpu.yield operand}} - %res = "gpu.all_reduce"(%arg0) ({ + %res = gpu.all_reduce %arg0 { ^bb(%lhs : f32, %rhs : f32): "gpu.yield"(%lhs, %rhs) : (f32, f32) -> () - }) : (f32) -> (f32) + } : (f32) -> (f32) return } @@ -300,11 +264,11 @@ func @reduce_incorrect_yield(%arg0 : f32) { // expected-error@+1 {{incorrect gpu.yield type}} - %res = "gpu.all_reduce"(%arg0) ({ + %res = gpu.all_reduce %arg0 { ^bb(%lhs : f32, %rhs : f32): %one = arith.constant 1 : i32 "gpu.yield"(%one) : (i32) -> () - }) : (f32) -> (f32) + } : (f32) -> (f32) return } @@ -312,25 +276,27 @@ func @reduce_incorrect_yield(%arg0 : f32) { // expected-error@+1 {{expected gpu.yield op in region}} - %res = "gpu.all_reduce"(%arg0) ({ + %res = gpu.all_reduce %arg0 { ^bb(%lhs : f32, %rhs : f32): return - }) : (f32) -> (f32) + } : (f32) -> (f32) return } // ----- func @shuffle_mismatching_type(%arg0 : f32, %arg1 : i32, %arg2 : i32) { - // expected-error@+1 {{requires the same type for value operand and result}} - %shfl, %pred = "gpu.shuffle"(%arg0, %arg1, %arg2) { mode = "xor" } : (f32, i32, i32) -> (i32, i1) + // expected-error@+1 {{inferred type(s) 'f32', 'i1' are incompatible with return type(s) of operation 'i32', 'i1'}} + %shfl, %pred = "gpu.shuffle"(%arg0, %arg1, %arg2) { mode = #gpu<"shuffle_mode xor"> } : (f32, i32, i32) -> (i32, i1) + return } // ----- func @shuffle_unsupported_type(%arg0 : index, %arg1 : i32, %arg2 : i32) { - // expected-error@+1 {{requires value operand type to be f32 or i32}} - %shfl, %pred = gpu.shuffle %arg0, %arg1, %arg2 xor : index + // expected-error@+1 {{operand #0 must be i32 or f32}} + %shfl, %pred = gpu.shuffle xor %arg0, %arg1, %arg2 : index + return } // ----- diff --git a/mlir/test/Dialect/GPU/multiple-all-reduce.mlir b/mlir/test/Dialect/GPU/multiple-all-reduce.mlir --- a/mlir/test/Dialect/GPU/multiple-all-reduce.mlir +++ b/mlir/test/Dialect/GPU/multiple-all-reduce.mlir @@ -10,9 +10,9 @@ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1) threads(%tx, %ty, %tz) in (%block_x = %c1, %block_y = %c1, %block_z = %c1) { %val = memref.load %data[%bx, %tx] : memref<2x6xf32> - %reduced0 = "gpu.all_reduce"(%val) ({}) { op = "add" } : (f32) -> (f32) + %reduced0 = gpu.all_reduce add %val {} : (f32) -> (f32) memref.store %reduced0, %sum[%bx] : memref<2xf32> - %reduced1 = "gpu.all_reduce"(%val) ({}) { op = "mul" } : (f32) -> (f32) + %reduced1 = gpu.all_reduce mul %val {} : (f32) -> (f32) memref.store %reduced1, %mul[%bx] : memref<2xf32> gpu.terminator } diff --git a/mlir/test/Dialect/GPU/ops.mlir b/mlir/test/Dialect/GPU/ops.mlir --- a/mlir/test/Dialect/GPU/ops.mlir +++ b/mlir/test/Dialect/GPU/ops.mlir @@ -28,39 +28,39 @@ gpu.module @kernels { gpu.func @kernel_1(%arg0 : f32, %arg1 : memref) kernel { - %tIdX = "gpu.thread_id"() {dimension = "x"} : () -> (index) - %tIdY = "gpu.thread_id"() {dimension = "y"} : () -> (index) - %tIdZ = "gpu.thread_id"() {dimension = "z"} : () -> (index) + %tIdX = gpu.thread_id x + %tIdY = gpu.thread_id y + %tIdZ = gpu.thread_id z - %bDimX = "gpu.block_dim"() {dimension = "x"} : () -> (index) - %bDimY = "gpu.block_dim"() {dimension = "y"} : () -> (index) - %bDimZ = "gpu.block_dim"() {dimension = "z"} : () -> (index) + %bDimX = gpu.block_dim x + %bDimY = gpu.block_dim y + %bDimZ = gpu.block_dim z - %bIdX = "gpu.block_id"() {dimension = "x"} : () -> (index) - %bIdY = "gpu.block_id"() {dimension = "y"} : () -> (index) - %bIdZ = "gpu.block_id"() {dimension = "z"} : () -> (index) + %bIdX = gpu.block_id x + %bIdY = gpu.block_id y + %bIdZ = gpu.block_id z - %gDimX = "gpu.grid_dim"() {dimension = "x"} : () -> (index) - %gDimY = "gpu.grid_dim"() {dimension = "y"} : () -> (index) - %gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index) + %gDimX = gpu.grid_dim x + %gDimY = gpu.grid_dim y + %gDimZ = gpu.grid_dim z %sgId = gpu.subgroup_id : index %numSg = gpu.num_subgroups : index %SgSi = gpu.subgroup_size : index %one = arith.constant 1.0 : f32 - %sum = "gpu.all_reduce"(%one) ({}) {op = "add"} : (f32) -> (f32) + %sum = gpu.all_reduce add %one {} : (f32) -> (f32) %width = arith.constant 7 : i32 %offset = arith.constant 3 : i32 - // CHECK: gpu.shuffle %{{.*}}, %{{.*}}, %{{.*}} xor : f32 - %shfl, %pred = gpu.shuffle %arg0, %offset, %width xor : f32 - // CHECK: gpu.shuffle %{{.*}}, %{{.*}}, %{{.*}} up : f32 - %shfl1, %pred1 = gpu.shuffle %arg0, %offset, %width up : f32 - // CHECK: gpu.shuffle %{{.*}}, %{{.*}}, %{{.*}} down : f32 - %shfl2, %pred2 = gpu.shuffle %arg0, %offset, %width down : f32 - // CHECK: gpu.shuffle %{{.*}}, %{{.*}}, %{{.*}} idx : f32 - %shfl3, %pred3 = gpu.shuffle %arg0, %offset, %width idx : f32 + // CHECK: gpu.shuffle xor %{{.*}}, %{{.*}}, %{{.*}} : f32 + %shfl, %pred = gpu.shuffle xor %arg0, %offset, %width : f32 + // CHECK: gpu.shuffle up %{{.*}}, %{{.*}}, %{{.*}} : f32 + %shfl1, %pred1 = gpu.shuffle up %arg0, %offset, %width : f32 + // CHECK: gpu.shuffle down %{{.*}}, %{{.*}}, %{{.*}} : f32 + %shfl2, %pred2 = gpu.shuffle down %arg0, %offset, %width : f32 + // CHECK: gpu.shuffle idx %{{.*}}, %{{.*}}, %{{.*}} : f32 + %shfl3, %pred3 = gpu.shuffle idx %arg0, %offset, %width : f32 "gpu.barrier"() : () -> () @@ -234,10 +234,10 @@ %0 = gpu.subgroup_mma_load_matrix %wg[%i, %i] {leadDimension = 32 : index} : memref<32x32xf16, 3> -> !gpu.mma_matrix<16x16xf16, "AOp"> // CHECK: gpu.subgroup_mma_load_matrix %[[wg]][%[[i]], %[[i]]] {leadDimension = 32 : index} : memref<32x32xf16, 3> -> !gpu.mma_matrix<16x16xf16, "AOp"> %1 = gpu.subgroup_mma_constant_matrix %cst : !gpu.mma_matrix<16x16xf32, "COp"> - // CHECK: gpu.subgroup_mma_elementwise %{{.*}}, %{{.*}} {operation = "ADDF"} : (!gpu.mma_matrix<16x16xf32, "COp">, !gpu.mma_matrix<16x16xf32, "COp">) -> !gpu.mma_matrix<16x16xf32, "COp"> - %2 = gpu.subgroup_mma_elementwise %1, %1 {operation = "ADDF"} : (!gpu.mma_matrix<16x16xf32, "COp">, !gpu.mma_matrix<16x16xf32, "COp">) -> !gpu.mma_matrix<16x16xf32, "COp"> - // CHECK: gpu.subgroup_mma_elementwise %{{.*}}, %{{.*}} {operation = "MAXF"} : (!gpu.mma_matrix<16x16xf32, "COp">, !gpu.mma_matrix<16x16xf32, "COp">) -> !gpu.mma_matrix<16x16xf32, "COp"> - %3 = gpu.subgroup_mma_elementwise %2, %1 {operation = "MAXF"} : (!gpu.mma_matrix<16x16xf32, "COp">, !gpu.mma_matrix<16x16xf32, "COp">) -> !gpu.mma_matrix<16x16xf32, "COp"> + // CHECK: gpu.subgroup_mma_elementwise addf %{{.*}}, %{{.*}} : (!gpu.mma_matrix<16x16xf32, "COp">, !gpu.mma_matrix<16x16xf32, "COp">) -> !gpu.mma_matrix<16x16xf32, "COp"> + %2 = gpu.subgroup_mma_elementwise addf %1, %1 : (!gpu.mma_matrix<16x16xf32, "COp">, !gpu.mma_matrix<16x16xf32, "COp">) -> !gpu.mma_matrix<16x16xf32, "COp"> + // CHECK: gpu.subgroup_mma_elementwise maxf %{{.*}}, %{{.*}} : (!gpu.mma_matrix<16x16xf32, "COp">, !gpu.mma_matrix<16x16xf32, "COp">) -> !gpu.mma_matrix<16x16xf32, "COp"> + %3 = gpu.subgroup_mma_elementwise maxf %2, %1 : (!gpu.mma_matrix<16x16xf32, "COp">, !gpu.mma_matrix<16x16xf32, "COp">) -> !gpu.mma_matrix<16x16xf32, "COp"> return } } diff --git a/mlir/test/Dialect/GPU/outlining.mlir b/mlir/test/Dialect/GPU/outlining.mlir --- a/mlir/test/Dialect/GPU/outlining.mlir +++ b/mlir/test/Dialect/GPU/outlining.mlir @@ -41,18 +41,18 @@ // CHECK-LABEL: gpu.module @launch_kernel // CHECK-NEXT: gpu.func @launch_kernel // CHECK-SAME: (%[[KERNEL_ARG0:.*]]: f32, %[[KERNEL_ARG1:.*]]: memref) -// CHECK-NEXT: %[[BID:.*]] = "gpu.block_id"() {dimension = "x"} : () -> index -// CHECK-NEXT: = "gpu.block_id"() {dimension = "y"} : () -> index -// CHECK-NEXT: = "gpu.block_id"() {dimension = "z"} : () -> index -// CHECK-NEXT: %[[TID:.*]] = "gpu.thread_id"() {dimension = "x"} : () -> index -// CHECK-NEXT: = "gpu.thread_id"() {dimension = "y"} : () -> index -// CHECK-NEXT: = "gpu.thread_id"() {dimension = "z"} : () -> index -// CHECK-NEXT: = "gpu.grid_dim"() {dimension = "x"} : () -> index -// CHECK-NEXT: = "gpu.grid_dim"() {dimension = "y"} : () -> index -// CHECK-NEXT: = "gpu.grid_dim"() {dimension = "z"} : () -> index -// CHECK-NEXT: %[[BDIM:.*]] = "gpu.block_dim"() {dimension = "x"} : () -> index -// CHECK-NEXT: = "gpu.block_dim"() {dimension = "y"} : () -> index -// CHECK-NEXT: = "gpu.block_dim"() {dimension = "z"} : () -> index +// CHECK-NEXT: %[[BID:.*]] = gpu.block_id x +// CHECK-NEXT: = gpu.block_id y +// CHECK-NEXT: = gpu.block_id z +// CHECK-NEXT: %[[TID:.*]] = gpu.thread_id x +// CHECK-NEXT: = gpu.thread_id y +// CHECK-NEXT: = gpu.thread_id z +// CHECK-NEXT: = gpu.grid_dim x +// CHECK-NEXT: = gpu.grid_dim y +// CHECK-NEXT: = gpu.grid_dim z +// CHECK-NEXT: %[[BDIM:.*]] = gpu.block_dim x +// CHECK-NEXT: = gpu.block_dim y +// CHECK-NEXT: = gpu.block_dim z // CHECK-NEXT: br ^[[BLOCK:.*]] // CHECK-NEXT: ^[[BLOCK]]: // CHECK-NEXT: "use"(%[[KERNEL_ARG0]]) : (f32) -> () diff --git a/mlir/test/Dialect/GPU/promotion.mlir b/mlir/test/Dialect/GPU/promotion.mlir --- a/mlir/test/Dialect/GPU/promotion.mlir +++ b/mlir/test/Dialect/GPU/promotion.mlir @@ -11,12 +11,12 @@ // CHECK-DAG: %[[c1:.*]] = arith.constant 1 // CHECK-DAG: %[[c4:.*]] = arith.constant 4 // CHECK-DAG: %[[c5:.*]] = arith.constant 5 - // CHECK-DAG: %[[tx:.*]] = "gpu.thread_id"() {dimension = "x"} - // CHECK-DAG: %[[ty:.*]] = "gpu.thread_id"() {dimension = "y"} - // CHECK-DAG: %[[tz:.*]] = "gpu.thread_id"() {dimension = "z"} - // CHECK-DAG: %[[bdx:.*]] = "gpu.block_dim"() {dimension = "x"} - // CHECK-DAG: %[[bdy:.*]] = "gpu.block_dim"() {dimension = "y"} - // CHECK-DAG: %[[bdz:.*]] = "gpu.block_dim"() {dimension = "z"} + // CHECK-DAG: %[[tx:.*]] = gpu.thread_id x + // CHECK-DAG: %[[ty:.*]] = gpu.thread_id y + // CHECK-DAG: %[[tz:.*]] = gpu.thread_id z + // CHECK-DAG: %[[bdx:.*]] = gpu.block_dim x + // CHECK-DAG: %[[bdy:.*]] = gpu.block_dim y + // CHECK-DAG: %[[bdz:.*]] = gpu.block_dim z // Verify that loops for the copy are emitted. We only check the number of // loops here since their bounds are produced by mapLoopToProcessorIds, @@ -65,12 +65,12 @@ // CHECK-DAG: %[[c6:.*]] = arith.constant 6 // CHECK-DAG: %[[c7:.*]] = arith.constant 7 // CHECK-DAG: %[[c8:.*]] = arith.constant 8 - // CHECK-DAG: %[[tx:.*]] = "gpu.thread_id"() {dimension = "x"} - // CHECK-DAG: %[[ty:.*]] = "gpu.thread_id"() {dimension = "y"} - // CHECK-DAG: %[[tz:.*]] = "gpu.thread_id"() {dimension = "z"} - // CHECK-DAG: %[[bdx:.*]] = "gpu.block_dim"() {dimension = "x"} - // CHECK-DAG: %[[bdy:.*]] = "gpu.block_dim"() {dimension = "y"} - // CHECK-DAG: %[[bdz:.*]] = "gpu.block_dim"() {dimension = "z"} + // CHECK-DAG: %[[tx:.*]] = gpu.thread_id x + // CHECK-DAG: %[[ty:.*]] = gpu.thread_id y + // CHECK-DAG: %[[tz:.*]] = gpu.thread_id z + // CHECK-DAG: %[[bdx:.*]] = gpu.block_dim x + // CHECK-DAG: %[[bdy:.*]] = gpu.block_dim y + // CHECK-DAG: %[[bdz:.*]] = gpu.block_dim z // Verify that loops for the copy are emitted. // CHECK: scf.for %[[i0:.*]] = diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir --- a/mlir/test/Dialect/LLVMIR/invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/invalid.mlir @@ -495,21 +495,21 @@ func @nvvm_invalid_shfl_pred_1(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i32) { // expected-error@+1 {{expected return type to be a two-element struct with i1 as the second element}} - %0 = nvvm.shfl.sync "bfly" %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : i32 -> i32 + %0 = nvvm.shfl.sync bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : i32 -> i32 } // ----- func @nvvm_invalid_shfl_pred_2(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i32) { // expected-error@+1 {{expected return type to be a two-element struct with i1 as the second element}} - %0 = nvvm.shfl.sync "bfly" %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32)> + %0 = nvvm.shfl.sync bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32)> } // ----- func @nvvm_invalid_shfl_pred_3(%arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i32) { // expected-error@+1 {{expected return type to be a two-element struct with i1 as the second element}} - %0 = nvvm.shfl.sync "bfly" %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i32)> + %0 = nvvm.shfl.sync bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i32)> } // ----- @@ -1042,7 +1042,7 @@ llvm.func @wmmaLoadOp_invalid_mem_space(%arg0: !llvm.ptr, %arg1: i32) { // expected-error@+1 {{'nvvm.wmma.load' op expected source pointer in memory space 0, 1, 3}} %0 = nvvm.wmma.load %arg0, %arg1 - {eltype = "f16", frag = "a", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> llvm.return } @@ -1052,7 +1052,7 @@ llvm.func @wmmaLoadOp_invalid_AOp(%arg0: !llvm.ptr, %arg1: i32) { // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 8 elements of type 'vector<2xf16>'}} %0 = nvvm.wmma.load %arg0, %arg1 - {eltype = "f16", frag = "a", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> llvm.return } @@ -1062,7 +1062,7 @@ llvm.func @wmmaLoadOp_invalid_BOp(%arg0: !llvm.ptr, %arg1: i32) { // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 8 elements of type 'vector<2xf16>'}} %0 = nvvm.wmma.load %arg0, %arg1 - {eltype = "f16", frag = "b", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> llvm.return @@ -1073,7 +1073,7 @@ llvm.func @wmmaLoadOp_invalid_COp(%arg0: !llvm.ptr, %arg1: i32) { // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 4 elements of type 'vector<2xf16>'}} %0 = nvvm.wmma.load %arg0, %arg1 - {eltype = "f16", frag = "c", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> llvm.return @@ -1086,7 +1086,7 @@ %arg4: vector<2 x f16>, %arg5: vector<2 xf16>) { // expected-error@+1 {{'nvvm.wmma.store' op expected operands to be a source pointer in memory space 0, 1, 3}} nvvm.wmma.store %arg0, %arg1, %arg2, %arg3, %arg4, %arg5 - {eltype = "f16", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + {eltype = #nvvm.mma_type, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : !llvm.ptr, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16> llvm.return } @@ -1105,7 +1105,7 @@ %arg18: vector<2 x f16>) { // expected-error@+1 {{'nvvm.wmma.mma' op expected 20 arguments}} %0 = nvvm.wmma.mma %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18 - {eltypeA = "f16", eltypeB = "f16", k = 16 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} + {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>) -> !llvm.struct<(vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>)> @@ -1126,7 +1126,7 @@ %arg18: vector<2 x f16>, %arg19: vector<2 x f16>) { // expected-error@+1 {{'nvvm.wmma.mma' op expected destination type is a structure of 4 elements of type 'vector<2xf16>'}} %0 = nvvm.wmma.mma %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19 - {eltypeA = "f16", eltypeB = "f16", k = 16 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} + {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>) -> !llvm.struct<(vector<2 x f16>, vector<2 x f16>, vector<2 x f16>)> llvm.return @@ -1146,7 +1146,7 @@ %arg20: f32, %arg21: f32, %arg22: f32, %arg23: f32) { // expected-error@+1 {{'nvvm.wmma.mma' op expected argument 15 to be of type 'vector<2xf16>'}} %0 = nvvm.wmma.mma %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19, %arg20, %arg21, %arg22, %arg23 - {eltypeA = "f16", eltypeB = "f32", k = 16 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} + {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return } @@ -1165,7 +1165,7 @@ %arg20: f32, %arg21: f32, %arg22: f32, %arg23: vector<2xf16>) { // expected-error@+1 {{'nvvm.wmma.mma' op expected argument 23 to be of type 'f32'}} %0 = nvvm.wmma.mma %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19, %arg20, %arg21, %arg22, %arg23 - {eltypeA = "f16", eltypeB = "f32", k = 16 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} + {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, vector<2xf16>) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return } @@ -1184,7 +1184,7 @@ %arg20: f32, %arg21: f32, %arg22: f32, %arg23: f32) { // expected-error@+1 {{'nvvm.wmma.mma' op expected destination type is a structure of 8 elements of type 'f32'}} %0 = nvvm.wmma.mma %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19, %arg20, %arg21, %arg22, %arg23 - {eltypeA = "f16", eltypeB = "f32", k = 16 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} + {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, vector<2xf16>)> llvm.return } diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir --- a/mlir/test/Dialect/LLVMIR/nvvm.mlir +++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir @@ -37,26 +37,26 @@ func @nvvm_shfl( %arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i32, %arg4 : f32) -> i32 { - // CHECK: nvvm.shfl.sync "bfly" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : i32 -> i32 - %0 = nvvm.shfl.sync "bfly" %arg0, %arg3, %arg1, %arg2 : i32 -> i32 - // CHECK: nvvm.shfl.sync "bfly" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 -> f32 - %1 = nvvm.shfl.sync "bfly" %arg0, %arg4, %arg1, %arg2 : f32 -> f32 - // CHECK: nvvm.shfl.sync "up" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 -> f32 - %2 = nvvm.shfl.sync "up" %arg0, %arg4, %arg1, %arg2 : f32 -> f32 - // CHECK: nvvm.shfl.sync "down" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 -> f32 - %3 = nvvm.shfl.sync "down" %arg0, %arg4, %arg1, %arg2 : f32 -> f32 - // CHECK: nvvm.shfl.sync "idx" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 -> f32 - %4 = nvvm.shfl.sync "idx" %arg0, %arg4, %arg1, %arg2 : f32 -> f32 + // CHECK: nvvm.shfl.sync bfly %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : i32 -> i32 + %0 = nvvm.shfl.sync bfly %arg0, %arg3, %arg1, %arg2 : i32 -> i32 + // CHECK: nvvm.shfl.sync bfly %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 -> f32 + %1 = nvvm.shfl.sync bfly %arg0, %arg4, %arg1, %arg2 : f32 -> f32 + // CHECK: nvvm.shfl.sync up %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 -> f32 + %2 = nvvm.shfl.sync up %arg0, %arg4, %arg1, %arg2 : f32 -> f32 + // CHECK: nvvm.shfl.sync down %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 -> f32 + %3 = nvvm.shfl.sync down %arg0, %arg4, %arg1, %arg2 : f32 -> f32 + // CHECK: nvvm.shfl.sync idx %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 -> f32 + %4 = nvvm.shfl.sync idx %arg0, %arg4, %arg1, %arg2 : f32 -> f32 llvm.return %0 : i32 } func @nvvm_shfl_pred( %arg0 : i32, %arg1 : i32, %arg2 : i32, %arg3 : i32, %arg4 : f32) -> !llvm.struct<(i32, i1)> { - // CHECK: nvvm.shfl.sync "bfly" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i1)> - %0 = nvvm.shfl.sync "bfly" %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i1)> - // CHECK: nvvm.shfl.sync "bfly" %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> - %1 = nvvm.shfl.sync "bfly" %arg0, %arg4, %arg1, %arg2 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> + // CHECK: nvvm.shfl.sync bfly %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i1)> + %0 = nvvm.shfl.sync bfly %arg0, %arg3, %arg1, %arg2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i1)> + // CHECK: nvvm.shfl.sync bfly %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> + %1 = nvvm.shfl.sync bfly %arg0, %arg4, %arg1, %arg2 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> llvm.return %0 : !llvm.struct<(i32, i1)> } @@ -71,14 +71,14 @@ %c0 : f32, %c1 : f32, %c2 : f32, %c3 : f32, %c4 : f32, %c5 : f32, %c6 : f32, %c7 : f32) { // CHECK: nvvm.mma.sync {{.*}} {alayout = "row", blayout = "col"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> - %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout="row", blayout="col"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> + %0 = nvvm.mma.sync %a0, %a1, %b0, %b1, %c0, %c1, %c2, %c3, %c4, %c5, %c6, %c7 {alayout = "row", blayout = "col"} : (vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return %0 : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> } func @nvvm_wmma_load_tf32(%arg0: !llvm.ptr, %arg1 : i32) -> !llvm.struct<(i32, i32, i32, i32)> { - // CHECK: nvvm.wmma.load {{.*}} {eltype = "tf32", frag = "a", k = 8 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + // CHECK: nvvm.wmma.load {{.*}} {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 8 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} %0 = nvvm.wmma.load %arg0, %arg1 - {eltype = "tf32", frag = "a", k = 8 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 8 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(i32, i32, i32, i32)> llvm.return %0 : !llvm.struct<(i32, i32, i32, i32)> } @@ -87,9 +87,9 @@ %6 : i32, %7 : i32, %8 : f32, %9 : f32, %10 : f32, %11 : f32, %12 : f32, %13 : f32, %14 : f32, %15 : f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> { - // CHECK: nvvm.wmma.mma {{.*}} {eltypeA = "tf32", eltypeB = "f32", k = 8 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} + // CHECK: nvvm.wmma.mma {{.*}} {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 8 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} %r = nvvm.wmma.mma %0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15 - {eltypeA = "tf32", eltypeB = "f32", k = 8 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} + {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 8 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (i32, i32, i32, i32, i32, i32, i32, i32, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return %r : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> diff --git a/mlir/test/Dialect/Linalg/distribute-tiled-loop.mlir b/mlir/test/Dialect/Linalg/distribute-tiled-loop.mlir --- a/mlir/test/Dialect/Linalg/distribute-tiled-loop.mlir +++ b/mlir/test/Dialect/Linalg/distribute-tiled-loop.mlir @@ -25,13 +25,13 @@ // CHECK-LABEL: func @distribute_for_gpu // CHECK: %[[C64:.*]] = arith.constant 64 : index -// CHECK-DAG: %[[GPU_BLOCK_X:.*]] = "gpu.block_id"() {dimension = "x"} -// CHECK-DAG: %[[GPU_GRID_DIM_X:.*]] = "gpu.grid_dim"() {dimension = "x"} +// CHECK-DAG: %[[GPU_BLOCK_X:.*]] = gpu.block_id x +// CHECK-DAG: %[[GPU_GRID_DIM_X:.*]] = gpu.grid_dim x // CHECK-DAG: %[[LB_I:.*]] = affine.apply #[[$MAP0]](){{\[}}%[[GPU_BLOCK_X]]] // CHECK-DAG: %[[STEP_I:.*]] = affine.apply #[[$MAP0]](){{\[}}%[[GPU_GRID_DIM_X]]] -// CHECK-DAG: %[[GPU_BLOCK_Y:.*]] = "gpu.block_id"() {dimension = "y"} -// CHECK-DAG: %[[GPU_GRID_DIM_Y:.*]] = "gpu.grid_dim"() {dimension = "y"} +// CHECK-DAG: %[[GPU_BLOCK_Y:.*]] = gpu.block_id y +// CHECK-DAG: %[[GPU_GRID_DIM_Y:.*]] = gpu.grid_dim y // CHECK-DAG: %[[LB_J:.*]] = affine.apply #[[$MAP1]](){{\[}}%[[GPU_BLOCK_Y]]] // CHECK-DAG: %[[STEP_J:.*]] = affine.apply #[[$MAP1]](){{\[}}%[[GPU_GRID_DIM_Y]]] diff --git a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir --- a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir @@ -12,8 +12,8 @@ // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK-DAG: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"} -// CHECK-DAG: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"} +// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y +// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x // CHECK: scf.for %[[ARG3:.*]] = // CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] // CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]] @@ -38,8 +38,8 @@ // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK-DAG: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"} -// CHECK-DAG: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"} +// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y +// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x // CHECK: %[[ITERY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] // CHECK: %[[ITERX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] // CHECK: %[[INBOUNDSY:.*]] = arith.cmpi slt, %[[ITERY]], %{{.*}} @@ -70,10 +70,10 @@ // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK-DAG: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"} -// CHECK-DAG: %[[NBLOCKSY:.*]] = "gpu.grid_dim"() {dimension = "y"} -// CHECK-DAG: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"} -// CHECK-DAG: %[[NBLOCKSX:.*]] = "gpu.grid_dim"() {dimension = "x"} +// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y +// CHECK-DAG: %[[NBLOCKSY:.*]] = gpu.grid_dim y +// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x +// CHECK-DAG: %[[NBLOCKSX:.*]] = gpu.grid_dim x // CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] // CHECK: %[[STEPY:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]] // CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] @@ -99,8 +99,8 @@ // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK-DAG: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"} -// CHECK-DAG: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"} +// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y +// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x // CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] // CHECK: %[[INBOUNDS:.*]] = arith.cmpi slt, %[[LBX]], %{{.*}} // CHECK: scf.if %[[INBOUNDS]] @@ -128,9 +128,9 @@ // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK-DAG: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"} -// CHECK-DAG: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"} -// CHECK-DAG: %[[NBLOCKSX:.*]] = "gpu.grid_dim"() {dimension = "x"} +// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y +// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x +// CHECK-DAG: %[[NBLOCKSX:.*]] = gpu.grid_dim x // CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] // CHECK: %[[LBX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] // CHECK: %[[STEPX:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSX]]] @@ -159,9 +159,9 @@ // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]*]]: memref // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]*]]: memref -// CHECK-DAG: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"} -// CHECK-DAG: %[[NBLOCKSY:.*]] = "gpu.grid_dim"() {dimension = "y"} -// CHECK-DAG: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"} +// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y +// CHECK-DAG: %[[NBLOCKSY:.*]] = gpu.grid_dim y +// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x // CHECK: %[[LBY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] // CHECK: %[[STEPY:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]] // CHECK: scf.parallel (%[[ARG3:.*]]) = (%[[LBY]]) to (%{{.*}}) step (%[[STEPY]]) @@ -186,10 +186,10 @@ -> tensor { // CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index -// CHECK-DAG: %[[BIDY:.*]] = "gpu.block_id"() {dimension = "y"} -// CHECK-DAG: %[[NBLOCKSY:.*]] = "gpu.grid_dim"() {dimension = "y"} -// CHECK-DAG: %[[BIDX:.*]] = "gpu.block_id"() {dimension = "x"} -// CHECK-DAG: %[[NBLOCKSX:.*]] = "gpu.grid_dim"() {dimension = "x"} +// CHECK-DAG: %[[BIDY:.*]] = gpu.block_id y +// CHECK-DAG: %[[NBLOCKSY:.*]] = gpu.grid_dim y +// CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x +// CHECK-DAG: %[[NBLOCKSX:.*]] = gpu.grid_dim x // CHECK: %[[MUL:.+]] = affine.apply #[[MULMAP]]()[%[[BIDY]], %[[C8]]] // CHECK: %[[LBY:.+]] = affine.apply #[[ADDMAP]]()[%[[MUL]], %[[C0]]] // CHECK: %[[STEPY:.+]] = affine.apply #[[MULMAP]]()[%[[NBLOCKSY]], %[[C8]]] diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir --- a/mlir/test/Dialect/OpenACC/ops.mlir +++ b/mlir/test/Dialect/OpenACC/ops.mlir @@ -376,9 +376,9 @@ acc.parallel private(%a: memref<10xf32>, %c: memref<10x10xf32>) firstprivate(%b: memref<10xf32>) { } acc.parallel { - } attributes {defaultAttr = "none"} + } attributes {defaultAttr = #acc<"defaultvalue none">} acc.parallel { - } attributes {defaultAttr = "present"} + } attributes {defaultAttr = #acc<"defaultvalue present">} acc.parallel { } attributes {asyncAttr} acc.parallel { @@ -441,9 +441,9 @@ // CHECK: acc.parallel private([[ARGA]]: memref<10xf32>, [[ARGC]]: memref<10x10xf32>) firstprivate([[ARGB]]: memref<10xf32>) { // CHECK-NEXT: } // CHECK: acc.parallel { -// CHECK-NEXT: } attributes {defaultAttr = "none"} +// CHECK-NEXT: } attributes {defaultAttr = #acc<"defaultvalue none">} // CHECK: acc.parallel { -// CHECK-NEXT: } attributes {defaultAttr = "present"} +// CHECK-NEXT: } attributes {defaultAttr = #acc<"defaultvalue present">} // CHECK: acc.parallel { // CHECK-NEXT: } attributes {asyncAttr} // CHECK: acc.parallel { @@ -482,11 +482,11 @@ acc.data copyin(%b: memref<10xf32>) copyout(%c: memref<10x10xf32>) present(%a: memref<10xf32>) { } acc.data present(%a : memref<10xf32>) { - } attributes { defaultAttr = "none" } + } attributes { defaultAttr = #acc<"defaultvalue none"> } acc.data present(%a : memref<10xf32>) { - } attributes { defaultAttr = "present" } + } attributes { defaultAttr = #acc<"defaultvalue present"> } acc.data { - } attributes { defaultAttr = "none" } + } attributes { defaultAttr = #acc<"defaultvalue none"> } return } @@ -519,11 +519,11 @@ // CHECK: acc.data copyin([[ARGB]] : memref<10xf32>) copyout([[ARGC]] : memref<10x10xf32>) present([[ARGA]] : memref<10xf32>) { // CHECK-NEXT: } // CHECK: acc.data present([[ARGA]] : memref<10xf32>) { -// CHECK-NEXT: } attributes {defaultAttr = "none"} +// CHECK-NEXT: } attributes {defaultAttr = #acc<"defaultvalue none">} // CHECK: acc.data present([[ARGA]] : memref<10xf32>) { -// CHECK-NEXT: } attributes {defaultAttr = "present"} +// CHECK-NEXT: } attributes {defaultAttr = #acc<"defaultvalue present">} // CHECK: acc.data { -// CHECK-NEXT: } attributes {defaultAttr = "none"} +// CHECK-NEXT: } attributes {defaultAttr = #acc<"defaultvalue none">} // ----- diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -155,7 +155,7 @@ // ----- func @order_value(%lb : index, %ub : index, %step : index) { - // expected-error @below {{attribute 'order_val' failed to satisfy constraint: OrderKind Clause}} + // expected-error @below {{invalid order kind}} omp.wsloop (%iv) : index = (%lb) to (%ub) step (%step) order(default) { omp.yield } @@ -476,7 +476,7 @@ func @omp_ordered3(%vec0 : i64) -> () { // expected-error @below {{ordered depend directive must be closely nested inside a worksharing-loop with ordered clause with parameter present}} - omp.ordered depend_type("dependsink") depend_vec(%vec0 : i64) {num_loops_val = 1 : i64} + omp.ordered depend_type(dependsink) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64} return } @@ -485,7 +485,7 @@ func @omp_ordered4(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64) -> () { omp.wsloop (%0) : i32 = (%arg1) to (%arg2) step (%arg3) ordered(0) { // expected-error @below {{ordered depend directive must be closely nested inside a worksharing-loop with ordered clause with parameter present}} - omp.ordered depend_type("dependsink") depend_vec(%vec0 : i64) {num_loops_val = 1 : i64} + omp.ordered depend_type(dependsink) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64} omp.yield } @@ -496,7 +496,7 @@ func @omp_ordered5(%arg1 : i32, %arg2 : i32, %arg3 : i32, %vec0 : i64, %vec1 : i64) -> () { omp.wsloop (%0) : i32 = (%arg1) to (%arg2) step (%arg3) ordered(1) { // expected-error @below {{number of variables in depend clause does not match number of iteration variables in the doacross loop}} - omp.ordered depend_type("dependsource") depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64} + omp.ordered depend_type(dependsource) depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64} omp.yield } @@ -514,7 +514,7 @@ // ----- func @omp_atomic_read2(%x: memref, %v: memref) { - // expected-error @below {{attribute 'memory_order' failed to satisfy constraint: MemoryOrderKind Clause}} + // expected-error @below {{invalid memory order kind}} omp.atomic.read %v = %x memory_order(xyz) : memref return } @@ -602,7 +602,7 @@ // ----- func @omp_atomic_write6(%addr : memref, %val : i32) { - // expected-error @below {{attribute 'memory_order' failed to satisfy constraint: MemoryOrderKind Clause}} + // expected-error @below {{invalid memory order kind}} omp.atomic.write %addr = %val memory_order(xyz) : memref, i32 return } diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -59,7 +59,7 @@ // CHECK: omp.parallel num_threads(%{{.*}} : si32) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) shared(%{{.*}} : memref) copyin(%{{.*}} : memref) allocate(%{{.*}} : memref -> %{{.*}} : memref) "omp.parallel"(%num_threads, %data_var, %data_var, %data_var, %data_var, %data_var, %data_var) ({ omp.terminator - }) {operand_segment_sizes = dense<[0,1,1,1,1,1,1,1]>: vector<8xi32>, default_val = "defshared"} : (si32, memref, memref, memref, memref, memref, memref) -> () + }) {operand_segment_sizes = dense<[0,1,1,1,1,1,1,1]>: vector<8xi32>, default_val = #omp<"clause_default defshared">} : (si32, memref, memref, memref, memref, memref, memref) -> () // CHECK: omp.barrier omp.barrier @@ -77,7 +77,7 @@ }) {operand_segment_sizes = dense<[1,1,1,1,1,1,0,0]> : vector<8xi32>} : (i1, si32, memref, memref, memref, memref) -> () omp.terminator - }) {operand_segment_sizes = dense<[1,1,1,1,1,1,1,1]> : vector<8xi32>, proc_bind_val = "spread"} : (i1, si32, memref, memref, memref, memref, memref, memref) -> () + }) {operand_segment_sizes = dense<[1,1,1,1,1,1,1,1]> : vector<8xi32>, proc_bind_val = #omp<"procbindkind spread">} : (i1, si32, memref, memref, memref, memref, memref, memref) -> () // test with multiple parameters for single variadic argument // CHECK: omp.parallel private(%{{.*}} : memref) firstprivate(%{{.*}} : memref, %{{.*}} : memref) shared(%{{.*}} : memref) copyin(%{{.*}} : memref) allocate(%{{.*}} : memref -> %{{.*}} : memref) @@ -160,28 +160,28 @@ "omp.wsloop" (%lb, %ub, %step, %data_var, %linear_var) ({ ^bb0(%iv: index): omp.yield - }) {operand_segment_sizes = dense<[1,1,1,0,0,0,1,1,0,0]> : vector<10xi32>, schedule_val = "Static"} : + }) {operand_segment_sizes = dense<[1,1,1,0,0,0,1,1,0,0]> : vector<10xi32>, schedule_val = #omp<"schedulekind Static">} : (index, index, index, memref, i32) -> () // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) linear(%{{.*}} = %{{.*}} : memref, %{{.*}} = %{{.*}} : memref) schedule(static) "omp.wsloop" (%lb, %ub, %step, %data_var, %data_var, %linear_var, %linear_var) ({ ^bb0(%iv: index): omp.yield - }) {operand_segment_sizes = dense<[1,1,1,0,0,0,2,2,0,0]> : vector<10xi32>, schedule_val = "Static"} : + }) {operand_segment_sizes = dense<[1,1,1,0,0,0,2,2,0,0]> : vector<10xi32>, schedule_val = #omp<"schedulekind Static">} : (index, index, index, memref, memref, i32, i32) -> () // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) firstprivate(%{{.*}} : memref) lastprivate(%{{.*}} : memref) linear(%{{.*}} = %{{.*}} : memref) schedule(dynamic = %{{.*}}) collapse(3) ordered(2) "omp.wsloop" (%lb, %ub, %step, %data_var, %data_var, %data_var, %data_var, %linear_var, %chunk_var) ({ ^bb0(%iv: index): omp.yield - }) {operand_segment_sizes = dense<[1,1,1,1,1,1,1,1,0,1]> : vector<10xi32>, schedule_val = "Dynamic", collapse_val = 3, ordered_val = 2} : + }) {operand_segment_sizes = dense<[1,1,1,1,1,1,1,1,0,1]> : vector<10xi32>, schedule_val = #omp<"schedulekind Dynamic">, collapse_val = 3, ordered_val = 2} : (index, index, index, memref, memref, memref, memref, i32, i32) -> () // CHECK: omp.wsloop (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) private(%{{.*}} : memref) schedule(auto) nowait "omp.wsloop" (%lb, %ub, %step, %data_var) ({ ^bb0(%iv: index): omp.yield - }) {operand_segment_sizes = dense<[1,1,1,1,0,0,0,0,0,0]> : vector<10xi32>, nowait, schedule_val = "Auto"} : + }) {operand_segment_sizes = dense<[1,1,1,1,0,0,0,0,0,0]> : vector<10xi32>, nowait, schedule_val = #omp<"schedulekind Auto">} : (index, index, index, memref) -> () return @@ -467,22 +467,22 @@ omp.wsloop (%0) : i32 = (%arg1) to (%arg2) step (%arg3) ordered(1) { // Only one DEPEND(SINK: vec) clause - // CHECK: omp.ordered depend_type("dependsink") depend_vec(%{{.*}} : i64) {num_loops_val = 1 : i64} - omp.ordered depend_type("dependsink") depend_vec(%vec0 : i64) {num_loops_val = 1 : i64} + // CHECK: omp.ordered depend_type(dependsink) depend_vec(%{{.*}} : i64) {num_loops_val = 1 : i64} + omp.ordered depend_type(dependsink) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64} - // CHECK: omp.ordered depend_type("dependsource") depend_vec(%{{.*}} : i64) {num_loops_val = 1 : i64} - omp.ordered depend_type("dependsource") depend_vec(%vec0 : i64) {num_loops_val = 1 : i64} + // CHECK: omp.ordered depend_type(dependsource) depend_vec(%{{.*}} : i64) {num_loops_val = 1 : i64} + omp.ordered depend_type(dependsource) depend_vec(%vec0 : i64) {num_loops_val = 1 : i64} omp.yield } omp.wsloop (%0) : i32 = (%arg1) to (%arg2) step (%arg3) ordered(2) { // Multiple DEPEND(SINK: vec) clauses - // CHECK: omp.ordered depend_type("dependsink") depend_vec(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : i64, i64, i64, i64) {num_loops_val = 2 : i64} - omp.ordered depend_type("dependsink") depend_vec(%vec0, %vec1, %vec2, %vec3 : i64, i64, i64, i64) {num_loops_val = 2 : i64} + // CHECK: omp.ordered depend_type(dependsink) depend_vec(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : i64, i64, i64, i64) {num_loops_val = 2 : i64} + omp.ordered depend_type(dependsink) depend_vec(%vec0, %vec1, %vec2, %vec3 : i64, i64, i64, i64) {num_loops_val = 2 : i64} - // CHECK: omp.ordered depend_type("dependsource") depend_vec(%{{.*}}, %{{.*}} : i64, i64) {num_loops_val = 2 : i64} - omp.ordered depend_type("dependsource") depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64} + // CHECK: omp.ordered depend_type(dependsource) depend_vec(%{{.*}}, %{{.*}} : i64, i64) {num_loops_val = 2 : i64} + omp.ordered depend_type(dependsource) depend_vec(%vec0, %vec1 : i64, i64) {num_loops_val = 2 : i64} omp.yield } diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-and.mlir @@ -55,7 +55,7 @@ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c2, %grid_y = %c1, %grid_z = %c1) threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) { %val = memref.load %data[%bx, %tx] : memref<2x6xi32> - %reduced = "gpu.all_reduce"(%val) ({}) { op = "and" } : (i32) -> (i32) + %reduced = gpu.all_reduce and %val {} : (i32) -> (i32) memref.store %reduced, %sum[%bx] : memref<2xi32> gpu.terminator } diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-max.mlir @@ -55,7 +55,7 @@ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c2, %grid_y = %c1, %grid_z = %c1) threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) { %val = memref.load %data[%bx, %tx] : memref<2x6xi32> - %reduced = "gpu.all_reduce"(%val) ({}) { op = "max" } : (i32) -> (i32) + %reduced = gpu.all_reduce max %val {} : (i32) -> (i32) memref.store %reduced, %sum[%bx] : memref<2xi32> gpu.terminator } diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-min.mlir @@ -55,7 +55,7 @@ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c2, %grid_y = %c1, %grid_z = %c1) threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) { %val = memref.load %data[%bx, %tx] : memref<2x6xi32> - %reduced = "gpu.all_reduce"(%val) ({}) { op = "min" } : (i32) -> (i32) + %reduced = gpu.all_reduce min %val {} : (i32) -> (i32) memref.store %reduced, %sum[%bx] : memref<2xi32> gpu.terminator } diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-op.mlir @@ -28,7 +28,7 @@ %idx = arith.addi %tx, %t2 : index %t3 = arith.index_cast %idx : index to i32 %val = arith.sitofp %t3 : i32 to f32 - %sum = "gpu.all_reduce"(%val) ({}) { op = "add" } : (f32) -> (f32) + %sum = gpu.all_reduce add %val {} : (f32) -> (f32) memref.store %sum, %dst[%tz, %ty, %tx] : memref gpu.terminator } diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-or.mlir @@ -55,7 +55,7 @@ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c2, %grid_y = %c1, %grid_z = %c1) threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) { %val = memref.load %data[%bx, %tx] : memref<2x6xi32> - %reduced = "gpu.all_reduce"(%val) ({}) { op = "or" } : (i32) -> (i32) + %reduced = gpu.all_reduce or %val {} : (i32) -> (i32) memref.store %reduced, %sum[%bx] : memref<2xi32> gpu.terminator } diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-region.mlir @@ -20,11 +20,11 @@ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one) threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) { %val = arith.index_cast %tx : index to i32 - %xor = "gpu.all_reduce"(%val) ({ + %xor = gpu.all_reduce %val { ^bb(%lhs : i32, %rhs : i32): %xor = arith.xori %lhs, %rhs : i32 "gpu.yield"(%xor) : (i32) -> () - }) : (i32) -> (i32) + } : (i32) -> (i32) %res = arith.sitofp %xor : i32 to f32 memref.store %res, %dst[%tx] : memref gpu.terminator diff --git a/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir --- a/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir +++ b/mlir/test/Integration/GPU/CUDA/all-reduce-xor.mlir @@ -55,7 +55,7 @@ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c2, %grid_y = %c1, %grid_z = %c1) threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) { %val = memref.load %data[%bx, %tx] : memref<2x6xi32> - %reduced = "gpu.all_reduce"(%val) ({}) { op = "xor" } : (i32) -> (i32) + %reduced = gpu.all_reduce xor %val {} : (i32) -> (i32) memref.store %reduced, %sum[%bx] : memref<2xi32> gpu.terminator } diff --git a/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir b/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir --- a/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir +++ b/mlir/test/Integration/GPU/CUDA/multiple-all-reduce.mlir @@ -58,9 +58,9 @@ gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c2, %grid_y = %c1, %grid_z = %c1) threads(%tx, %ty, %tz) in (%block_x = %c6, %block_y = %c1, %block_z = %c1) { %val = memref.load %data[%bx, %tx] : memref<2x6xf32> - %reduced0 = "gpu.all_reduce"(%val) ({}) { op = "add" } : (f32) -> (f32) + %reduced0 = gpu.all_reduce add %val {} : (f32) -> (f32) memref.store %reduced0, %sum[%bx] : memref<2xf32> - %reduced1 = "gpu.all_reduce"(%val) ({}) { op = "mul" } : (f32) -> (f32) + %reduced1 = gpu.all_reduce mul %val {} : (f32) -> (f32) memref.store %reduced1, %mul[%bx] : memref<2xf32> gpu.terminator } diff --git a/mlir/test/Integration/GPU/CUDA/shuffle.mlir b/mlir/test/Integration/GPU/CUDA/shuffle.mlir --- a/mlir/test/Integration/GPU/CUDA/shuffle.mlir +++ b/mlir/test/Integration/GPU/CUDA/shuffle.mlir @@ -23,7 +23,7 @@ %val = arith.sitofp %t0 : i32 to f32 %width = arith.index_cast %block_x : index to i32 %offset = arith.constant 4 : i32 - %shfl, %valid = gpu.shuffle %val, %offset, %width xor : f32 + %shfl, %valid = gpu.shuffle xor %val, %offset, %width : f32 cond_br %valid, ^bb1(%shfl : f32), ^bb0 ^bb0: %m1 = arith.constant -1.0 : f32 diff --git a/mlir/test/Integration/GPU/ROCM/printf.mlir b/mlir/test/Integration/GPU/ROCM/printf.mlir --- a/mlir/test/Integration/GPU/ROCM/printf.mlir +++ b/mlir/test/Integration/GPU/ROCM/printf.mlir @@ -12,7 +12,7 @@ module attributes {gpu.container_module} { gpu.module @kernels { gpu.func @hello() kernel { - %0 = "gpu.thread_id"() {dimension="x"} : () -> (index) + %0 = gpu.thread_id x gpu.printf "Hello from %d\n" %0 : index gpu.return } diff --git a/mlir/test/Target/LLVMIR/nvvmir.mlir b/mlir/test/Target/LLVMIR/nvvmir.mlir --- a/mlir/test/Target/LLVMIR/nvvmir.mlir +++ b/mlir/test/Target/LLVMIR/nvvmir.mlir @@ -42,21 +42,21 @@ %0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : f32) -> i32 { // CHECK: call i32 @llvm.nvvm.shfl.sync.bfly.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %6 = nvvm.shfl.sync "bfly" %0, %3, %1, %2 : i32 -> i32 + %6 = nvvm.shfl.sync bfly %0, %3, %1, %2 : i32 -> i32 // CHECK: call float @llvm.nvvm.shfl.sync.bfly.f32(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %7 = nvvm.shfl.sync "bfly" %0, %4, %1, %2 : f32 -> f32 + %7 = nvvm.shfl.sync bfly %0, %4, %1, %2 : f32 -> f32 // CHECK: call i32 @llvm.nvvm.shfl.sync.up.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %8 = nvvm.shfl.sync "up" %0, %3, %1, %2 : i32 -> i32 + %8 = nvvm.shfl.sync up %0, %3, %1, %2 : i32 -> i32 // CHECK: call float @llvm.nvvm.shfl.sync.up.f32(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %9 = nvvm.shfl.sync "up" %0, %4, %1, %2 : f32 -> f32 + %9 = nvvm.shfl.sync up %0, %4, %1, %2 : f32 -> f32 // CHECK: call i32 @llvm.nvvm.shfl.sync.down.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %10 = nvvm.shfl.sync "down" %0, %3, %1, %2 : i32 -> i32 + %10 = nvvm.shfl.sync down %0, %3, %1, %2 : i32 -> i32 // CHECK: call float @llvm.nvvm.shfl.sync.down.f32(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %11 = nvvm.shfl.sync "down" %0, %4, %1, %2 : f32 -> f32 + %11 = nvvm.shfl.sync down %0, %4, %1, %2 : f32 -> f32 // CHECK: call i32 @llvm.nvvm.shfl.sync.idx.i32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %12 = nvvm.shfl.sync "idx" %0, %3, %1, %2 : i32 -> i32 + %12 = nvvm.shfl.sync idx %0, %3, %1, %2 : i32 -> i32 // CHECK: call float @llvm.nvvm.shfl.sync.idx.f32(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %13 = nvvm.shfl.sync "idx" %0, %4, %1, %2 : f32 -> f32 + %13 = nvvm.shfl.sync idx %0, %4, %1, %2 : f32 -> f32 llvm.return %6 : i32 } @@ -64,21 +64,21 @@ %0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : f32) -> !llvm.struct<(i32, i1)> { // CHECK: call { i32, i1 } @llvm.nvvm.shfl.sync.bfly.i32p(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %6 = nvvm.shfl.sync "bfly" %0, %3, %1, %2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i1)> + %6 = nvvm.shfl.sync bfly %0, %3, %1, %2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i1)> // CHECK: call { float, i1 } @llvm.nvvm.shfl.sync.bfly.f32p(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %7 = nvvm.shfl.sync "bfly" %0, %4, %1, %2 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> + %7 = nvvm.shfl.sync bfly %0, %4, %1, %2 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> // CHECK: call { i32, i1 } @llvm.nvvm.shfl.sync.up.i32p(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %8 = nvvm.shfl.sync "up" %0, %3, %1, %2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i1)> + %8 = nvvm.shfl.sync up %0, %3, %1, %2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i1)> // CHECK: call { float, i1 } @llvm.nvvm.shfl.sync.up.f32p(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %9 = nvvm.shfl.sync "up" %0, %4, %1, %2 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> + %9 = nvvm.shfl.sync up %0, %4, %1, %2 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> // CHECK: call { i32, i1 } @llvm.nvvm.shfl.sync.down.i32p(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %10 = nvvm.shfl.sync "down" %0, %3, %1, %2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i1)> + %10 = nvvm.shfl.sync down %0, %3, %1, %2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i1)> // CHECK: call { float, i1 } @llvm.nvvm.shfl.sync.down.f32p(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %11 = nvvm.shfl.sync "down" %0, %4, %1, %2 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> + %11 = nvvm.shfl.sync down %0, %4, %1, %2 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> // CHECK: call { i32, i1 } @llvm.nvvm.shfl.sync.idx.i32p(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %12 = nvvm.shfl.sync "idx" %0, %3, %1, %2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i1)> + %12 = nvvm.shfl.sync idx %0, %3, %1, %2 {return_value_and_is_valid} : i32 -> !llvm.struct<(i32, i1)> // CHECK: call { float, i1 } @llvm.nvvm.shfl.sync.idx.f32p(i32 %{{.*}}, float %{{.*}}, i32 %{{.*}}, i32 %{{.*}}) - %13 = nvvm.shfl.sync "idx" %0, %4, %1, %2 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> + %13 = nvvm.shfl.sync idx %0, %4, %1, %2 {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)> llvm.return %6 : !llvm.struct<(i32, i1)> } @@ -102,7 +102,7 @@ llvm.func @gpu_wmma_load_op(%arg0: !llvm.ptr, %arg1: i32) { // CHECK: call { <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half> } @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16.p3i32(i32 addrspace(3)* %{{.*}}, i32 %{{.*}}) %0 = nvvm.wmma.load %arg0, %arg1 - {eltype = "f16", frag = "a", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> llvm.return @@ -115,7 +115,7 @@ %arg4: vector<2 xf16>, %arg5: vector<2 x f16>) { // CHECK: call void @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16.p3i32(i32 addrspace(3)* %{{.*}}, <2 x half> {{.*}}, <2 x half> %{{.*}}, <2 x half> %{{.*}}, <2 x half> %{{.*}}, i32 %{{.*}}) nvvm.wmma.store %arg0, %arg1, %arg2, %arg3, %arg4, %arg5 - {eltype = "f16", k = 16 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + {eltype = #nvvm.mma_type, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : !llvm.ptr, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16> llvm.return } @@ -134,7 +134,7 @@ %arg18: vector<2 x f16>, %arg19: vector<2 x f16>) { // CHECK: call { <2 x half>, <2 x half>, <2 x half>, <2 x half> } @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16(<2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}, <2 x half> {{.*}}) %0 = nvvm.wmma.mma %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19 - {eltypeA = "f16", eltypeB = "f16", k = 16 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} + {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 16 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, @@ -146,7 +146,7 @@ llvm.func @nvvm_wmma_load_tf32(%arg0: !llvm.ptr, %arg1 : i32) { // CHECK: call { i32, i32, i32, i32 } @llvm.nvvm.wmma.m16n16k8.load.a.row.stride.tf32.p0i32(i32* %{{.*}}, i32 %{{.*}}) %0 = nvvm.wmma.load %arg0, %arg1 - {eltype = "tf32", frag = "a", k = 8 : i32, layout = "row", m = 16 : i32, n = 16 : i32} + {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 8 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (!llvm.ptr) -> !llvm.struct<(i32, i32, i32, i32)> llvm.return } @@ -156,7 +156,7 @@ %11 : f32, %12 : f32, %13 : f32, %14 : f32, %15 : f32) { // CHECK: { float, float, float, float, float, float, float, float } @llvm.nvvm.wmma.m16n16k8.mma.row.row.tf32(i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, i32 %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}}) %r = nvvm.wmma.mma %0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15 - {eltypeA = "tf32", eltypeB = "f32", k = 8 : i32, layoutA = "row", layoutB = "row", m = 16 : i32, n = 16 : i32} + {eltypeA = #nvvm.mma_type, eltypeB = #nvvm.mma_type, k = 8 : i32, layoutA = #nvvm.mma_layout, layoutB = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} : (i32, i32, i32, i32, i32, i32, i32, i32, f32, f32, f32, f32, f32, f32, f32, f32) -> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)> llvm.return diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -663,14 +663,14 @@ // CHECK: [[TMP2:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[ADDR]], i64 0, i64 0 // CHECK: [[OMP_THREAD2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) // CHECK: call void @__kmpc_doacross_wait(%struct.ident_t* @[[GLOB3]], i32 [[OMP_THREAD2]], i64* [[TMP2]]) - omp.ordered depend_type("dependsink") depend_vec(%arg3 : i64) {num_loops_val = 1 : i64} + omp.ordered depend_type(dependsink) depend_vec(%arg3 : i64) {num_loops_val = 1 : i64} // CHECK: [[TMP3:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[ADDR3]], i64 0, i64 0 // CHECK: store i64 [[ARG0]], i64* [[TMP3]], align 8 // CHECK: [[TMP4:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[ADDR3]], i64 0, i64 0 // CHECK: [[OMP_THREAD4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB5:[0-9]+]]) // CHECK: call void @__kmpc_doacross_post(%struct.ident_t* @[[GLOB5]], i32 [[OMP_THREAD4]], i64* [[TMP4]]) - omp.ordered depend_type("dependsource") depend_vec(%arg3 : i64) {num_loops_val = 1 : i64} + omp.ordered depend_type(dependsource) depend_vec(%arg3 : i64) {num_loops_val = 1 : i64} omp.yield } @@ -690,7 +690,7 @@ // CHECK: [[TMP10:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR7]], i64 0, i64 0 // CHECK: [[OMP_THREAD8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB7]]) // CHECK: call void @__kmpc_doacross_wait(%struct.ident_t* @[[GLOB7]], i32 [[OMP_THREAD8]], i64* [[TMP10]]) - omp.ordered depend_type("dependsink") depend_vec(%arg3, %arg4, %arg5, %arg6 : i64, i64, i64, i64) {num_loops_val = 2 : i64} + omp.ordered depend_type(dependsink) depend_vec(%arg3, %arg4, %arg5, %arg6 : i64, i64, i64, i64) {num_loops_val = 2 : i64} // CHECK: [[TMP11:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR9]], i64 0, i64 0 // CHECK: store i64 [[ARG0]], i64* [[TMP11]], align 8 @@ -699,7 +699,7 @@ // CHECK: [[TMP13:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[ADDR9]], i64 0, i64 0 // CHECK: [[OMP_THREAD10:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB9:[0-9]+]]) // CHECK: call void @__kmpc_doacross_post(%struct.ident_t* @[[GLOB9]], i32 [[OMP_THREAD10]], i64* [[TMP13]]) - omp.ordered depend_type("dependsource") depend_vec(%arg3, %arg4 : i64, i64) {num_loops_val = 2 : i64} + omp.ordered depend_type(dependsource) depend_vec(%arg3, %arg4 : i64, i64) {num_loops_val = 2 : i64} omp.yield } diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgDistribution.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgDistribution.cpp --- a/mlir/test/lib/Dialect/Linalg/TestLinalgDistribution.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgDistribution.cpp @@ -19,21 +19,20 @@ using namespace mlir; using namespace mlir::linalg; -template +template static linalg::ProcInfo getGpuBlockInfo(OpBuilder &b, Location loc) { - std::string d(1, dim); - StringAttr attr = b.getStringAttr(d); - Type indexType = b.getIndexType(); - ProcInfo procInfo = {b.create(loc, indexType, attr), - b.create(loc, indexType, attr)}; + ProcInfo procInfo = {b.create(loc, indexType, Dim), + b.create(loc, indexType, Dim)}; return procInfo; } static LinalgLoopDistributionOptions getDistributionOptions() { LinalgLoopDistributionOptions opts; - opts.procInfoMap.insert(std::make_pair("block_x", getGpuBlockInfo<'x'>)); - opts.procInfoMap.insert(std::make_pair("block_y", getGpuBlockInfo<'y'>)); + opts.procInfoMap.insert( + std::make_pair("block_x", getGpuBlockInfo)); + opts.procInfoMap.insert( + std::make_pair("block_y", getGpuBlockInfo)); return opts; } diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp --- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp +++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp @@ -370,12 +370,11 @@ getGpuProcIds(OpBuilder &b, Location loc, ArrayRef parallelLoopRanges) { size_t count = std::min(3, parallelLoopRanges.size()); SmallVector procInfo(count); - const char *xyz[] = {"x", "y", "z"}; Type indexType = b.getIndexType(); for (unsigned i = 0; i < count; ++i) { - procInfo[count - 1 - i] = { - b.create(loc, indexType, b.getStringAttr(xyz[i])), - b.create(loc, indexType, b.getStringAttr(xyz[i]))}; + gpu::Dimension dim = *gpu::symbolizeDimension(i); + procInfo[count - 1 - i] = {b.create(loc, indexType, dim), + b.create(loc, indexType, dim)}; } return procInfo; } diff --git a/mlir/test/mlir-tblgen/directive-common.td b/mlir/test/mlir-tblgen/directive-common.td --- a/mlir/test/mlir-tblgen/directive-common.td +++ b/mlir/test/mlir-tblgen/directive-common.td @@ -1,4 +1,4 @@ -// RUN: mlir-tblgen -gen-directive-decl -I %S/../../../llvm/include %s | FileCheck -match-full-lines %s +// RUN: mlir-tblgen -gen-directive-decl -directives-dialect=TDL -I %S/../../../llvm/include %s | FileCheck -match-full-lines %s include "llvm/Frontend/Directive/DirectiveBase.td" @@ -21,11 +21,12 @@ ]; } -// CHECK: def AKindvala : StrEnumAttrCase<"vala">; -// CHECK: def AKindvalb : StrEnumAttrCase<"valb">; -// CHECK: def AKind: StrEnumAttr< +// CHECK: def AKindvala : I32EnumAttrCase<"vala", 0>; +// CHECK: def AKindvalb : I32EnumAttrCase<"valb", 1>; +// CHECK: def AKind: I32EnumAttr< // CHECK: "ClauseAKind", // CHECK: "AKind Clause", // CHECK: [AKindvala,AKindvalb]> { // CHECK: let cppNamespace = "::mlir::tdl"; // CHECK: } +// CHECK: def AKindAttr : EnumAttr; diff --git a/mlir/test/mlir-vulkan-runner/addf.mlir b/mlir/test/mlir-vulkan-runner/addf.mlir --- a/mlir/test/mlir-vulkan-runner/addf.mlir +++ b/mlir/test/mlir-vulkan-runner/addf.mlir @@ -9,7 +9,7 @@ gpu.module @kernels { gpu.func @kernel_add(%arg0 : memref<8xf32>, %arg1 : memref<8xf32>, %arg2 : memref<8xf32>) kernel attributes { spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32> }} { - %0 = "gpu.block_id"() {dimension = "x"} : () -> index + %0 = gpu.block_id x %1 = memref.load %arg0[%0] : memref<8xf32> %2 = memref.load %arg1[%0] : memref<8xf32> %3 = arith.addf %1, %2 : f32 diff --git a/mlir/test/mlir-vulkan-runner/addi.mlir b/mlir/test/mlir-vulkan-runner/addi.mlir --- a/mlir/test/mlir-vulkan-runner/addi.mlir +++ b/mlir/test/mlir-vulkan-runner/addi.mlir @@ -9,9 +9,9 @@ gpu.module @kernels { gpu.func @kernel_addi(%arg0 : memref<8xi32>, %arg1 : memref<8x8xi32>, %arg2 : memref<8x8x8xi32>) kernel attributes { spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32>}} { - %x = "gpu.block_id"() {dimension = "x"} : () -> index - %y = "gpu.block_id"() {dimension = "y"} : () -> index - %z = "gpu.block_id"() {dimension = "z"} : () -> index + %x = gpu.block_id x + %y = gpu.block_id y + %z = gpu.block_id z %0 = memref.load %arg0[%x] : memref<8xi32> %1 = memref.load %arg1[%y, %x] : memref<8x8xi32> %2 = arith.addi %0, %1 : i32 diff --git a/mlir/test/mlir-vulkan-runner/addi8.mlir b/mlir/test/mlir-vulkan-runner/addi8.mlir --- a/mlir/test/mlir-vulkan-runner/addi8.mlir +++ b/mlir/test/mlir-vulkan-runner/addi8.mlir @@ -9,9 +9,9 @@ gpu.module @kernels { gpu.func @kernel_addi(%arg0 : memref<8xi8>, %arg1 : memref<8x8xi8>, %arg2 : memref<8x8x8xi32>) kernel attributes { spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32>}} { - %x = "gpu.block_id"() {dimension = "x"} : () -> index - %y = "gpu.block_id"() {dimension = "y"} : () -> index - %z = "gpu.block_id"() {dimension = "z"} : () -> index + %x = gpu.block_id x + %y = gpu.block_id y + %z = gpu.block_id z %0 = memref.load %arg0[%x] : memref<8xi8> %1 = memref.load %arg1[%y, %x] : memref<8x8xi8> %2 = arith.addi %0, %1 : i8 diff --git a/mlir/test/mlir-vulkan-runner/mulf.mlir b/mlir/test/mlir-vulkan-runner/mulf.mlir --- a/mlir/test/mlir-vulkan-runner/mulf.mlir +++ b/mlir/test/mlir-vulkan-runner/mulf.mlir @@ -9,8 +9,8 @@ gpu.module @kernels { gpu.func @kernel_mul(%arg0 : memref<4x4xf32>, %arg1 : memref<4x4xf32>, %arg2 : memref<4x4xf32>) kernel attributes { spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32> }} { - %x = "gpu.block_id"() {dimension = "x"} : () -> index - %y = "gpu.block_id"() {dimension = "y"} : () -> index + %x = gpu.block_id x + %y = gpu.block_id y %1 = memref.load %arg0[%x, %y] : memref<4x4xf32> %2 = memref.load %arg1[%x, %y] : memref<4x4xf32> %3 = arith.mulf %1, %2 : f32 diff --git a/mlir/test/mlir-vulkan-runner/subf.mlir b/mlir/test/mlir-vulkan-runner/subf.mlir --- a/mlir/test/mlir-vulkan-runner/subf.mlir +++ b/mlir/test/mlir-vulkan-runner/subf.mlir @@ -9,9 +9,9 @@ gpu.module @kernels { gpu.func @kernel_sub(%arg0 : memref<8x4x4xf32>, %arg1 : memref<4x4xf32>, %arg2 : memref<8x4x4xf32>) kernel attributes { spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32> }} { - %x = "gpu.block_id"() {dimension = "x"} : () -> index - %y = "gpu.block_id"() {dimension = "y"} : () -> index - %z = "gpu.block_id"() {dimension = "z"} : () -> index + %x = gpu.block_id x + %y = gpu.block_id y + %z = gpu.block_id z %1 = memref.load %arg0[%x, %y, %z] : memref<8x4x4xf32> %2 = memref.load %arg1[%y, %z] : memref<4x4xf32> %3 = arith.subf %1, %2 : f32 diff --git a/mlir/test/mlir-vulkan-runner/time.mlir b/mlir/test/mlir-vulkan-runner/time.mlir --- a/mlir/test/mlir-vulkan-runner/time.mlir +++ b/mlir/test/mlir-vulkan-runner/time.mlir @@ -12,8 +12,8 @@ gpu.module @kernels { gpu.func @kernel_add(%arg0 : memref<16384xf32>, %arg1 : memref<16384xf32>, %arg2 : memref<16384xf32>) kernel attributes { spv.entry_point_abi = {local_size = dense<[128, 1, 1]>: vector<3xi32> }} { - %bid = "gpu.block_id"() {dimension = "x"} : () -> index - %tid = "gpu.thread_id"() {dimension = "x"} : () -> index + %bid = gpu.block_id x + %tid = gpu.thread_id x %cst = arith.constant 128 : index %b = arith.muli %bid, %cst : index %0 = arith.addi %b, %tid : index diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp --- a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp +++ b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp @@ -731,7 +731,7 @@ if (parseResult.hasValue()) return attr; } - parser.emitError(typeLoc) << "unknown attribute `" + parser.emitError(typeLoc) << "unknown attribute `" << attrTag << "` in dialect `" << getNamespace() << "`"; return {{}; } diff --git a/mlir/tools/mlir-tblgen/DirectiveCommonGen.cpp b/mlir/tools/mlir-tblgen/DirectiveCommonGen.cpp --- a/mlir/tools/mlir-tblgen/DirectiveCommonGen.cpp +++ b/mlir/tools/mlir-tblgen/DirectiveCommonGen.cpp @@ -14,8 +14,10 @@ #include "mlir/TableGen/GenInfo.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/DirectiveEmitter.h" +#include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" using llvm::Clause; @@ -39,7 +41,14 @@ // Clause record in OMP.td. This name can be used to specify the type of the // OpenMP operation's operand. The allowedClauseValues field provides the list // of ClauseValues which are part of the enumeration. -static bool emitDecls(const RecordKeeper &recordKeeper, raw_ostream &os) { +static bool emitDecls(const RecordKeeper &recordKeeper, llvm::StringRef dialect, + raw_ostream &os) { + // A dialect must be selected for the generated attributes. + if (dialect.empty()) { + llvm::PrintFatalError("a dialect must be selected for the directives via " + "'--directives-dialect'"); + } + const auto &directiveLanguages = recordKeeper.getAllDerivedDefinitions("DirectiveLanguage"); assert(!directiveLanguages.empty() && "DirectiveLanguage missing."); @@ -56,18 +65,19 @@ assert(!enumName.empty() && "enumClauseValue field not set."); std::vector cvDefs; - for (const auto &cv : clauseVals) { - ClauseVal cval{cv}; + for (const auto &it : llvm::enumerate(clauseVals)) { + ClauseVal cval{it.value()}; if (!cval.isUserVisible()) continue; const auto name = cval.getFormattedName(); std::string cvDef{(enumName + llvm::Twine(name)).str()}; - os << "def " << cvDef << " : StrEnumAttrCase<\"" << name << "\">;\n"; + os << "def " << cvDef << " : I32EnumAttrCase<\"" << name << "\", " + << it.index() << ">;\n"; cvDefs.push_back(cvDef); } - os << "def " << enumName << ": StrEnumAttr<\n"; + os << "def " << enumName << ": I32EnumAttr<\n"; os << " \"Clause" << enumName << "\",\n"; os << " \"" << enumName << " Clause\",\n"; os << " ["; @@ -79,15 +89,27 @@ os << "]> {\n"; os << " let cppNamespace = \"::mlir::" << directiveLanguages[0]->getValueAsString("cppNamespace") << "\";\n"; + os << " let genSpecializedAttr = 0;\n"; os << "}\n"; + llvm::SmallString<16> mnemonic; + llvm::transform(enumName, std::back_inserter(mnemonic), llvm::toLower); + os << "def " << enumName << "Attr : EnumAttr<" << dialect << "_Dialect, " + << enumName << ", \"" << mnemonic << "\">;\n"; } return false; } +static llvm::cl::OptionCategory + directiveGenCat("Options for gen-directive-decl"); +static llvm::cl::opt + dialect("directives-dialect", + llvm::cl::desc("Generate directives for this dialect"), + llvm::cl::cat(directiveGenCat), llvm::cl::CommaSeparated); + // Registers the generator to mlir-tblgen. static mlir::GenRegistration genDirectiveDecls( "gen-directive-decl", "Generate declarations for directives (OpenMP/OpenACC etc.)", [](const RecordKeeper &records, raw_ostream &os) { - return emitDecls(records, os); + return emitDecls(records, dialect, os); }); diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -424,6 +424,14 @@ Optional variableTransformer; }; + /// The context in which an element is generated. + enum class GenContext { + /// The element is generated at the top-level or with the same behaviour. + Normal, + /// The element is generated inside an optional group. + Optional + }; + OperationFormat(const Operator &op) : allOperands(false), allOperandTypes(false), allResultTypes(false), infersResultTypes(false) { @@ -442,7 +450,8 @@ void genParser(Operator &op, OpClass &opClass); /// Generate the parser code for a specific format element. void genElementParser(Element *element, MethodBody &body, - FmtContext &attrTypeCtx); + FmtContext &attrTypeCtx, + GenContext genCtx = GenContext::Normal); /// Generate the C++ to resolve the types of operands and results during /// parsing. void genParserTypeResolution(Operator &op, MethodBody &body); @@ -1217,7 +1226,8 @@ } void OperationFormat::genElementParser(Element *element, MethodBody &body, - FmtContext &attrTypeCtx) { + FmtContext &attrTypeCtx, + GenContext genCtx) { /// Optional Group. if (auto *optional = dyn_cast(element)) { auto elements = llvm::drop_begin(optional->getThenElements(), @@ -1264,10 +1274,13 @@ << "\", parser.getBuilder().getUnitAttr());\n"; } - // Generate the rest of the elements normally. + // Generate the rest of the elements inside an optional group. Elements in + // an optional group after the guard are parsed as required. for (Element &childElement : llvm::drop_begin(elements, 1)) { - if (&childElement != elidedAnchorElement) - genElementParser(&childElement, body, attrTypeCtx); + if (&childElement != elidedAnchorElement) { + genElementParser(&childElement, body, attrTypeCtx, + GenContext::Optional); + } } body << " }"; @@ -1316,7 +1329,7 @@ } else { attrTypeStr = "::mlir::Type{}"; } - if (var->attr.isOptional()) { + if (genCtx == GenContext::Normal && var->attr.isOptional()) { body << formatv(optionalAttrParserCode, var->name, attrTypeStr); } else { if (attr->shouldBeQualified() || @@ -2048,9 +2061,6 @@ if (attr->getTypeBuilder()) body << " _odsPrinter.printAttributeWithoutType(" << op.getGetterName(var->name) << "Attr());\n"; - else if (var->attr.isOptional()) - body << "_odsPrinter.printAttribute(" << op.getGetterName(var->name) - << "Attr());\n"; else if (attr->shouldBeQualified() || var->attr.getStorageType() == "::mlir::Attribute") body << " _odsPrinter.printAttribute(" << op.getGetterName(var->name) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -3002,6 +3002,14 @@ ["-gen-enum-defs"], "include/mlir/Dialect/GPU/GPUOpsEnums.cpp.inc", ), + ( + ["-gen-attrdef-decls"], + "include/mlir/Dialect/GPU/GPUOpsAttributes.h.inc", + ), + ( + ["-gen-attrdef-defs"], + "include/mlir/Dialect/GPU/GPUOpsAttributes.cpp.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/GPU/GPUOps.td", @@ -3541,6 +3549,20 @@ ["-gen-enum-defs"], "include/mlir/Dialect/LLVMIR/NVVMOpsEnums.cpp.inc", ), + ( + [ + "-gen-attrdef-decls", + "-attrdefs-dialect=nvvm", + ], + "include/mlir/Dialect/LLVMIR/NVVMOpsAttributes.h.inc", + ), + ( + [ + "-gen-attrdef-defs", + "-attrdefs-dialect=nvvm", + ], + "include/mlir/Dialect/LLVMIR/NVVMOpsAttributes.cpp.inc", + ), ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/LLVMIR/NVVMOps.td", @@ -5884,7 +5906,10 @@ strip_include_prefix = "include", tbl_outs = [ ( - ["-gen-directive-decl"], + [ + "-gen-directive-decl", + "-directives-dialect=OpenACC", + ], "include/mlir/Dialect/OpenACC/AccCommon.td", ), ], @@ -5937,6 +5962,20 @@ ["-gen-enum-defs"], "include/mlir/Dialect/OpenACC/OpenACCOpsEnums.cpp.inc", ), + ( + [ + "-gen-attrdef-decls", + "-attrdefs-dialect=acc", + ], + "include/mlir/Dialect/OpenACC/OpenACCOpsAttributes.h.inc", + ), + ( + [ + "-gen-attrdef-defs", + "-attrdefs-dialect=acc", + ], + "include/mlir/Dialect/OpenACC/OpenACCOpsAttributes.cpp.inc", + ), ( ["-gen-op-doc"], "g3doc/Dialects/OpenACC/OpenACCOps.md", @@ -5978,7 +6017,10 @@ strip_include_prefix = "include", tbl_outs = [ ( - ["-gen-directive-decl"], + [ + "-gen-directive-decl", + "-directives-dialect=OpenMP", + ], "include/mlir/Dialect/OpenMP/OmpCommon.td", ), ], @@ -6045,6 +6087,20 @@ ["-gen-type-interface-defs"], "include/mlir/Dialect/OpenMP/OpenMPTypeInterfaces.cpp.inc", ), + ( + [ + "-gen-attrdef-decls", + "-attrdefs-dialect=omp", + ], + "include/mlir/Dialect/OpenMP/OpenMPOpsAttributes.h.inc", + ), + ( + [ + "-gen-attrdef-defs", + "-attrdefs-dialect=omp", + ], + "include/mlir/Dialect/OpenMP/OpenMPOpsAttributes.cpp.inc", + ), ( ["-gen-op-doc"], "g3doc/Dialects/OpenMP/OpenMPOps.md",