diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td index 83c781a19b18..85870010f0e2 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td @@ -1,1716 +1,1779 @@ //===- Ops.td - Standard operation definitions -------------*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Defines some MLIR standard operations. // //===----------------------------------------------------------------------===// #ifndef STANDARD_OPS #define STANDARD_OPS include "mlir/Analysis/CallInterfaces.td" include "mlir/IR/OpAsmInterface.td" def Std_Dialect : Dialect { let name = "std"; let cppNamespace = ""; } // Base class for Standard dialect ops. class Std_Op traits = []> : Op { // For every standard op, there needs to be a: // * void print(OpAsmPrinter &p, ${C++ class of Op} op) // * LogicalResult verify(${C++ class of Op} op) // * ParseResult parse${C++ class of Op}(OpAsmParser &parser, // OperationState &result) // functions. let printer = [{ return ::print(p, *this); }]; let verifier = [{ return ::verify(*this); }]; let parser = [{ return ::parse$cppClass(parser, result); }]; } // Base class for standard cast operations. Requires single operand and result, // but does not constrain them to specific types. class CastOp traits = []> : Std_Op { let results = (outs AnyType); let builders = [OpBuilder< "Builder *builder, OperationState &result, Value source, Type destType", [{ impl::buildCastOp(builder, result, source, destType); }]>]; let parser = [{ return impl::parseCastOp(parser, result); }]; let printer = [{ return printStandardCastOp(this->getOperation(), p); }]; let verifier = [{ return ::verifyCastOp(*this); }]; let hasFolder = 1; } // Base class for unary ops. Requires single operand and result. Individual // classes will have `operand` accessor. class UnaryOp traits = []> : Op { let results = (outs AnyType); let printer = [{ return printStandardUnaryOp(this->getOperation(), p); }]; } class UnaryOpSameOperandAndResultType traits = []> : UnaryOp { let parser = [{ return impl::parseOneResultSameOperandTypeOp(parser, result); }]; } class FloatUnaryOp traits = []> : UnaryOpSameOperandAndResultType, Arguments<(ins FloatLike:$operand)>; // Base class for standard arithmetic operations. Requires operands and // results to be of the same type, but does not constrain them to specific // types. Individual classes will have `lhs` and `rhs` accessor to operands. class ArithmeticOp traits = []> : Op { let results = (outs AnyType); let parser = [{ return impl::parseOneResultSameOperandTypeOp(parser, result); }]; let printer = [{ return printStandardBinaryOp(this->getOperation(), p); }]; } // Base class for standard arithmetic operations on integers, vectors and // tensors thereof. This operation takes two operands and returns one result, // each of these is required to be of the same type. This type may be an // integer scalar type, a vector whose element type is an integer type, or an // integer tensor. The custom assembly form of the operation is as follows // // i %0, %1 : i32 class IntArithmeticOp traits = []> : ArithmeticOp, Arguments<(ins SignlessIntegerLike:$lhs, SignlessIntegerLike:$rhs)>; // Base class for standard arithmetic binary operations on floats, vectors and // tensors thereof. This operation has two operands and returns one result, // each of these is required to be of the same type. This type may be a // floating point scalar type, a vector whose element type is a floating point // type, or a floating point tensor. The custom assembly form of the operation // is as follows // // f %0, %1 : f32 class FloatArithmeticOp traits = []> : ArithmeticOp, Arguments<(ins FloatLike:$lhs, FloatLike:$rhs)>; def AbsFOp : FloatUnaryOp<"absf"> { let summary = "floating point absolute-value operation"; let description = [{ The `absf` operation computes the absolute value. It takes one operand and returns one result of the same type. This type may be a float scalar type, a vector whose element type is float, or a tensor of floats. It has no standard attributes. }]; } def AddFOp : FloatArithmeticOp<"addf"> { let summary = "floating point addition operation"; let hasFolder = 1; } def AddIOp : IntArithmeticOp<"addi", [Commutative]> { let summary = "integer addition operation"; let hasFolder = 1; } def AllocOp : Std_Op<"alloc"> { let summary = "memory allocation operation"; let description = [{ The "alloc" operation allocates a region of memory, as specified by its memref type. For example: %0 = alloc() : memref<8x64xf32, (d0, d1) -> (d0, d1), 1> The optional list of dimension operands are bound to the dynamic dimensions specified in its memref type. In the example below, the ssa value '%d' is bound to the second dimension of the memref (which is dynamic). %0 = alloc(%d) : memref<8x?xf32, (d0, d1) -> (d0, d1), 1> The optional list of symbol operands are bound to the symbols of the memrefs affine map. In the example below, the ssa value '%s' is bound to the symbol 's0' in the affine map specified in the allocs memref type. %0 = alloc()[%s] : memref<8x64xf32, (d0, d1)[s0] -> ((d0 + s0), d1), 1> This operation returns a single ssa value of memref type, which can be used by subsequent load and store operations. The optional `alignment` attribute may be specified to ensure that the region of memory that will be indexed is aligned at the specified byte boundary. TODO(b/144281289) optional alignment attribute to MemRefType. %0 = alloc()[%s] {alignment = 8} : memref<8x64xf32, (d0, d1)[s0] -> ((d0 + s0), d1), 1> }]; let arguments = (ins Variadic:$value, Confined, [IntMinValue<0>]>:$alignment); let results = (outs AnyMemRef); let builders = [OpBuilder< "Builder *builder, OperationState &result, MemRefType memrefType", [{ result.types.push_back(memrefType); }]>, OpBuilder< "Builder *builder, OperationState &result, MemRefType memrefType, " # "ArrayRef operands, IntegerAttr alignment = IntegerAttr()", [{ result.addOperands(operands); result.types.push_back(memrefType); if (alignment) result.addAttribute(getAlignmentAttrName(), alignment); }]>]; let extraClassDeclaration = [{ static StringRef getAlignmentAttrName() { return "alignment"; } MemRefType getType() { return getResult().getType().cast(); } /// Returns the number of symbolic operands (the ones in square brackets), /// which bind to the symbols of the memref's layout map. unsigned getNumSymbolicOperands() { return getNumOperands() - getType().getNumDynamicDims(); } /// Returns the symbolic operands (the ones in square brackets), which bind /// to the symbols of the memref's layout map. operand_range getSymbolicOperands() { return {operand_begin() + getType().getNumDynamicDims(), operand_end()}; } /// Returns the dynamic sizes for this alloc operation if specified. operand_range getDynamicSizes() { return getOperands(); } }]; let hasCanonicalizer = 1; } def AndOp : IntArithmeticOp<"and", [Commutative]> { let summary = "integer binary and"; let hasFolder = 1; } +def ATOMIC_RMW_KIND_ADDF : I64EnumAttrCase<"addf", 0>; +def ATOMIC_RMW_KIND_ADDI : I64EnumAttrCase<"addi", 1>; +def ATOMIC_RMW_KIND_ASSIGN : I64EnumAttrCase<"assign", 2>; +def ATOMIC_RMW_KIND_MAXF : I64EnumAttrCase<"maxf", 3>; +def ATOMIC_RMW_KIND_MAXS : I64EnumAttrCase<"maxs", 4>; +def ATOMIC_RMW_KIND_MAXU : I64EnumAttrCase<"maxu", 5>; +def ATOMIC_RMW_KIND_MINF : I64EnumAttrCase<"minf", 6>; +def ATOMIC_RMW_KIND_MINS : I64EnumAttrCase<"mins", 7>; +def ATOMIC_RMW_KIND_MINU : I64EnumAttrCase<"minu", 8>; +def ATOMIC_RMW_KIND_MULF : I64EnumAttrCase<"mulf", 9>; +def ATOMIC_RMW_KIND_MULI : I64EnumAttrCase<"muli", 10>; + +def AtomicRMWKindAttr : I64EnumAttr< + "AtomicRMWKind", "", + [ATOMIC_RMW_KIND_ADDF, ATOMIC_RMW_KIND_ADDI, ATOMIC_RMW_KIND_ASSIGN, + ATOMIC_RMW_KIND_MAXF, ATOMIC_RMW_KIND_MAXS, ATOMIC_RMW_KIND_MAXU, + ATOMIC_RMW_KIND_MINF, ATOMIC_RMW_KIND_MINS, ATOMIC_RMW_KIND_MINU, + ATOMIC_RMW_KIND_MULF, ATOMIC_RMW_KIND_MULI]> { + let cppNamespace = "::mlir"; +} + +def AtomicRMWOp : Std_Op<"atomic_rmw", [ + AllTypesMatch<["value", "result"]>, + TypesMatchWith<"value type matches element type of memref", + "memref", "value", + "$_self.cast().getElementType()"> + ]> { + let summary = "atomic read-modify-write operation"; + let description = [{ + The "atomic_rmw" operation provides a way to perform a read-modify-write + sequence that is free from data races. The kind enumeration specifies the + modification to perform. The value operand represents the new value to be + applied during the modification. The memref operand represents the buffer + that the read and write will be performed against, as accessed by the + specified indices. The arity of the indices is the rank of the memref. The + result represents the latest value that was stored. + + Example: + + ```mlir + %x = atomic_rmw "addf" %value, %I[%i] : (f32, memref<10xf32>) -> f32 + ``` + }]; + + let arguments = (ins + AtomicRMWKindAttr:$kind, + AnyTypeOf<[AnySignlessInteger, AnyFloat]>:$value, + MemRefOf<[AnySignlessInteger, AnyFloat]>:$memref, + Variadic:$indices); + let results = (outs AnyTypeOf<[AnySignlessInteger, AnyFloat]>:$result); + + let assemblyFormat = [{ + $kind $value `,` $memref `[` $indices `]` attr-dict `:` `(` type($value) `,` + type($memref) `)` `->` type($result) + }]; + + let extraClassDeclaration = [{ + MemRefType getMemRefType() { + return memref().getType().cast(); + } + }]; +} + def BranchOp : Std_Op<"br", [Terminator]> { let summary = "branch operation"; let description = [{ The "br" operation represents a branch operation in a function. The operation takes variable number of operands and produces no results. The operand number and types for each successor must match the arguments of the block successor. For example: ^bb2: %2 = call @someFn() br ^bb3(%2 : tensor<*xf32>) ^bb3(%3: tensor<*xf32>): }]; let successors = (successor AnySuccessor:$dest); let builders = [OpBuilder<"Builder *, OperationState &result, Block *dest", [{ result.addSuccessor(dest, llvm::None); }]>]; // BranchOp is fully verified by traits. let verifier = ?; let extraClassDeclaration = [{ Block *getDest(); void setDest(Block *block); /// Erase the operand at 'index' from the operand list. void eraseOperand(unsigned index); }]; let hasCanonicalizer = 1; let assemblyFormat = "$dest attr-dict"; } def CallOp : Std_Op<"call", [CallOpInterface]> { let summary = "call operation"; let description = [{ The "call" operation represents a direct call to a function that is within the same symbol scope as the call. The operands and result types of the call must match the specified function type. The callee is encoded as a function attribute named "callee". %2 = call @my_add(%0, %1) : (f32, f32) -> f32 }]; let arguments = (ins FlatSymbolRefAttr:$callee, Variadic:$operands); let results = (outs Variadic); let builders = [OpBuilder< "Builder *builder, OperationState &result, FuncOp callee," "ValueRange operands = {}", [{ result.addOperands(operands); result.addAttribute("callee", builder->getSymbolRefAttr(callee)); result.addTypes(callee.getType().getResults()); }]>, OpBuilder< "Builder *builder, OperationState &result, SymbolRefAttr callee," "ArrayRef results, ValueRange operands = {}", [{ result.addOperands(operands); result.addAttribute("callee", callee); result.addTypes(results); }]>, OpBuilder< "Builder *builder, OperationState &result, StringRef callee," "ArrayRef results, ValueRange operands = {}", [{ build(builder, result, builder->getSymbolRefAttr(callee), results, operands); }]>]; let extraClassDeclaration = [{ StringRef getCallee() { return callee(); } FunctionType getCalleeType(); /// Get the argument operands to the called function. operand_range getArgOperands() { return {arg_operand_begin(), arg_operand_end()}; } operand_iterator arg_operand_begin() { return operand_begin(); } operand_iterator arg_operand_end() { return operand_end(); } /// Return the callee of this operation. CallInterfaceCallable getCallableForCallee() { return getAttrOfType("callee"); } }]; let assemblyFormat = [{ $callee `(` $operands `)` attr-dict `:` functional-type($operands, results) }]; } def CallIndirectOp : Std_Op<"call_indirect", [ CallOpInterface, TypesMatchWith<"callee input types match argument types", "callee", "operands", "$_self.cast().getInputs()">, TypesMatchWith<"callee result types match result types", "callee", "results", "$_self.cast().getResults()"> ]> { let summary = "indirect call operation"; let description = [{ The "call_indirect" operation represents an indirect call to a value of function type. Functions are first class types in MLIR, and may be passed as arguments and merged together with block arguments. The operands and result types of the call must match the specified function type. %3 = call_indirect %2(%0, %1) : (f32, f32) -> f32 }]; let arguments = (ins FunctionType:$callee, Variadic:$operands); let results = (outs Variadic:$results); let builders = [OpBuilder< "Builder *, OperationState &result, Value callee," "ValueRange operands = {}", [{ result.operands.push_back(callee); result.addOperands(operands); result.addTypes(callee.getType().cast().getResults()); }]>]; let extraClassDeclaration = [{ Value getCallee() { return getOperand(0); } /// Get the argument operands to the called function. operand_range getArgOperands() { return {arg_operand_begin(), arg_operand_end()}; } operand_iterator arg_operand_begin() { return ++operand_begin(); } operand_iterator arg_operand_end() { return operand_end(); } /// Return the callee of this operation. CallInterfaceCallable getCallableForCallee() { return getCallee(); } }]; let verifier = ?; let hasCanonicalizer = 1; let assemblyFormat = "$callee `(` $operands `)` attr-dict `:` type($callee)"; } def CeilFOp : FloatUnaryOp<"ceilf"> { let summary = "ceiling of the specified value"; let description = [{ The `ceilf` operation computes the ceiling of a given value. It takes one operand and returns one result of the same type. This type may be a float scalar type, a vector whose element type is float, or a tensor of floats. It has no standard attributes. }]; } def CmpFOp : Std_Op<"cmpf", [NoSideEffect, SameTypeOperands, SameOperandsAndResultShape, TypesMatchWith< "result type has i1 element type and same shape as operands", "lhs", "result", "getI1SameShape($_self)">]> { let summary = "floating-point comparison operation"; let description = [{ The "cmpf" operation compares its two operands according to the float comparison rules and the predicate specified by the respective attribute. The predicate defines the type of comparison: (un)orderedness, (in)equality and signed less/greater than (or equal to) as well as predicates that are always true or false. The operands must have the same type, and this type must be a float type, or a vector or tensor thereof. The result is an i1, or a vector/tensor thereof having the same shape as the inputs. Unlike cmpi, the operands are always treated as signed. The u prefix indicates *unordered* comparison, not unsigned comparison, so "une" means unordered or not equal. For the sake of readability by humans, custom assembly form for the operation uses a string-typed attribute for the predicate. The value of this attribute corresponds to lower-cased name of the predicate constant, e.g., "one" means "ordered not equal". The string representation of the attribute is merely a syntactic sugar and is converted to an integer attribute by the parser. %r1 = cmpf "oeq" %0, %1 : f32 %r2 = cmpf "ult" %0, %1 : tensor<42x42xf64> %r3 = "std.cmpf"(%0, %1) {predicate: 0} : (f8, f8) -> i1 }]; let arguments = (ins FloatLike:$lhs, FloatLike:$rhs); let results = (outs BoolLike:$result); let builders = [OpBuilder< "Builder *builder, OperationState &result, CmpFPredicate predicate," "Value lhs, Value rhs", [{ ::buildCmpFOp(builder, result, predicate, lhs, rhs); }]>]; let extraClassDeclaration = [{ static StringRef getPredicateAttrName() { return "predicate"; } static CmpFPredicate getPredicateByName(StringRef name); CmpFPredicate getPredicate() { return (CmpFPredicate)getAttrOfType(getPredicateAttrName()) .getInt(); } }]; let hasFolder = 1; } def CMPI_P_EQ : I64EnumAttrCase<"eq", 0>; def CMPI_P_NE : I64EnumAttrCase<"ne", 1>; def CMPI_P_SLT : I64EnumAttrCase<"slt", 2>; def CMPI_P_SLE : I64EnumAttrCase<"sle", 3>; def CMPI_P_SGT : I64EnumAttrCase<"sgt", 4>; def CMPI_P_SGE : I64EnumAttrCase<"sge", 5>; def CMPI_P_ULT : I64EnumAttrCase<"ult", 6>; def CMPI_P_ULE : I64EnumAttrCase<"ule", 7>; def CMPI_P_UGT : I64EnumAttrCase<"ugt", 8>; def CMPI_P_UGE : I64EnumAttrCase<"uge", 9>; def CmpIPredicateAttr : I64EnumAttr< "CmpIPredicate", "", [CMPI_P_EQ, CMPI_P_NE, CMPI_P_SLT, CMPI_P_SLE, CMPI_P_SGT, CMPI_P_SGE, CMPI_P_ULT, CMPI_P_ULE, CMPI_P_UGT, CMPI_P_UGE]> { let cppNamespace = "::mlir"; } def CmpIOp : Std_Op<"cmpi", [NoSideEffect, SameTypeOperands, SameOperandsAndResultShape, TypesMatchWith< "result type has i1 element type and same shape as operands", "lhs", "result", "getI1SameShape($_self)">]> { let summary = "integer comparison operation"; let description = [{ The "cmpi" operation compares its two operands according to the integer comparison rules and the predicate specified by the respective attribute. The predicate defines the type of comparison: (in)equality, (un)signed less/greater than (or equal to). The operands must have the same type, and this type must be an integer type, a vector or a tensor thereof. The result is an i1, or a vector/tensor thereof having the same shape as the inputs. Since integers are signless, the predicate also explicitly indicates whether to interpret the operands as signed or unsigned integers for less/greater than comparisons. For the sake of readability by humans, custom assembly form for the operation uses a string-typed attribute for the predicate. The value of this attribute corresponds to lower-cased name of the predicate constant, e.g., "slt" means "signed less than". The string representation of the attribute is merely a syntactic sugar and is converted to an integer attribute by the parser. %r1 = cmpi "eq" %0, %1 : i32 %r2 = cmpi "slt" %0, %1 : tensor<42x42xi64> %r3 = "std.cmpi"(%0, %1){predicate: 0} : (i8, i8) -> i1 }]; let arguments = (ins CmpIPredicateAttr:$predicate, SignlessIntegerLike:$lhs, SignlessIntegerLike:$rhs ); let results = (outs BoolLike:$result); let builders = [OpBuilder< "Builder *builder, OperationState &result, CmpIPredicate predicate," "Value lhs, Value rhs", [{ ::buildCmpIOp(builder, result, predicate, lhs, rhs); }]>]; let extraClassDeclaration = [{ static StringRef getPredicateAttrName() { return "predicate"; } static CmpIPredicate getPredicateByName(StringRef name); CmpIPredicate getPredicate() { return (CmpIPredicate)getAttrOfType(getPredicateAttrName()) .getInt(); } }]; let verifier = [{ return success(); }]; let hasFolder = 1; let assemblyFormat = "$predicate `,` $lhs `,` $rhs attr-dict `:` type($lhs)"; } def CondBranchOp : Std_Op<"cond_br", [Terminator]> { let summary = "conditional branch operation"; let description = [{ The "cond_br" operation represents a conditional branch operation in a function. The operation takes variable number of operands and produces no results. The operand number and types for each successor must match the arguments of the block successor. For example: ^bb0: %0 = extract_element %arg0[] : tensor cond_br %0, ^bb1, ^bb2 ^bb1: ... ^bb2: ... }]; let arguments = (ins I1:$condition); let successors = (successor AnySuccessor:$trueDest, AnySuccessor:$falseDest); // CondBranchOp is fully verified by traits. let verifier = ?; let extraClassDeclaration = [{ // These are the indices into the dests list. enum { trueIndex = 0, falseIndex = 1 }; // The condition operand is the first operand in the list. Value getCondition() { return getOperand(0); } /// Return the destination if the condition is true. Block *getTrueDest() { return getSuccessor(trueIndex); } /// Return the destination if the condition is false. Block *getFalseDest() { return getSuccessor(falseIndex); } // Accessors for operands to the 'true' destination. Value getTrueOperand(unsigned idx) { assert(idx < getNumTrueOperands()); return getOperand(getTrueDestOperandIndex() + idx); } void setTrueOperand(unsigned idx, Value value) { assert(idx < getNumTrueOperands()); setOperand(getTrueDestOperandIndex() + idx, value); } operand_iterator true_operand_begin() { return operand_begin() + getTrueDestOperandIndex(); } operand_iterator true_operand_end() { return true_operand_begin() + getNumTrueOperands(); } operand_range getTrueOperands() { return {true_operand_begin(), true_operand_end()}; } unsigned getNumTrueOperands() { return getNumSuccessorOperands(trueIndex); } /// Erase the operand at 'index' from the true operand list. void eraseTrueOperand(unsigned index) { getOperation()->eraseSuccessorOperand(trueIndex, index); } // Accessors for operands to the 'false' destination. Value getFalseOperand(unsigned idx) { assert(idx < getNumFalseOperands()); return getOperand(getFalseDestOperandIndex() + idx); } void setFalseOperand(unsigned idx, Value value) { assert(idx < getNumFalseOperands()); setOperand(getFalseDestOperandIndex() + idx, value); } operand_iterator false_operand_begin() { return true_operand_end(); } operand_iterator false_operand_end() { return false_operand_begin() + getNumFalseOperands(); } operand_range getFalseOperands() { return {false_operand_begin(), false_operand_end()}; } unsigned getNumFalseOperands() { return getNumSuccessorOperands(falseIndex); } /// Erase the operand at 'index' from the false operand list. void eraseFalseOperand(unsigned index) { getOperation()->eraseSuccessorOperand(falseIndex, index); } private: /// Get the index of the first true destination operand. unsigned getTrueDestOperandIndex() { return 1; } /// Get the index of the first false destination operand. unsigned getFalseDestOperandIndex() { return getTrueDestOperandIndex() + getNumTrueOperands(); } }]; let hasCanonicalizer = 1; let assemblyFormat = "$condition `,` successors attr-dict"; } def ConstantOp : Std_Op<"constant", [NoSideEffect, DeclareOpInterfaceMethods]> { let summary = "constant"; let arguments = (ins AnyAttr:$value); let results = (outs AnyType); let builders = [OpBuilder< "Builder *builder, OperationState &result, Attribute value", [{ build(builder, result, value.getType(), value); }]>]; let extraClassDeclaration = [{ Attribute getValue() { return getAttr("value"); } /// Returns true if a constant operation can be built with the given value /// and result type. static bool isBuildableWith(Attribute value, Type type); }]; let hasFolder = 1; } def CopySignOp : FloatArithmeticOp<"copysign"> { let summary = "A copysign operation"; let description = [{ The `copysign` returns a value with the magnitude of the first operand and the sign of the second operand. It takes two operands and returns one result of the same type. This type may be a float scalar type, a vector whose element type is float, or a tensor of floats. It has no standard attributes. }]; } def CosOp : FloatUnaryOp<"cos"> { let summary = "cosine of the specified value"; let description = [{ The `cos` operation computes the cosine of a given value. It takes one operand and returns one result of the same type. This type may be a float scalar type, a vector whose element type is float, or a tensor of floats. It has no standard attributes. }]; } def DeallocOp : Std_Op<"dealloc"> { let summary = "memory deallocation operation"; let description = [{ The "dealloc" operation frees the region of memory referenced by a memref which was originally created by the "alloc" operation. The "dealloc" operation should not be called on memrefs which alias an alloc'd memref (i.e. memrefs returned by the "view" and "reshape" operations). %0 = alloc() : memref<8x64xf32, (d0, d1) -> (d0, d1), 1> dealloc %0 : memref<8x64xf32, (d0, d1) -> (d0, d1), 1> }]; let arguments = (ins AnyMemRef:$memref); let hasCanonicalizer = 1; let hasFolder = 1; let assemblyFormat = "$memref attr-dict `:` type($memref)"; } def DimOp : Std_Op<"dim", [NoSideEffect]> { let summary = "dimension index operation"; let description = [{ The "dim" operation takes a memref or tensor operand and returns an "index". It requires a single integer attribute named "index". It returns the size of the specified dimension. For example: %1 = dim %0, 2 : tensor }]; let arguments = (ins AnyTypeOf<[AnyMemRef, AnyTensor], "any tensor or memref type">:$memrefOrTensor, APIntAttr:$index); let results = (outs Index); let builders = [OpBuilder< "Builder *builder, OperationState &result, Value memrefOrTensor," "unsigned index", [{ auto indexType = builder->getIndexType(); auto indexAttr = builder->getIntegerAttr(indexType, index); build(builder, result, indexType, memrefOrTensor, indexAttr); }]>]; let extraClassDeclaration = [{ unsigned getIndex() { return getAttrOfType("index").getValue().getZExtValue(); } }]; let hasFolder = 1; } def DivFOp : FloatArithmeticOp<"divf"> { let summary = "floating point division operation"; } def SignedDivIOp : IntArithmeticOp<"divi_signed"> { let summary = "signed integer division operation"; let hasFolder = 1; } def UnsignedDivIOp : IntArithmeticOp<"divi_unsigned"> { let summary = "unsigned integer division operation"; let hasFolder = 1; } def ExpOp : FloatUnaryOp<"exp"> { let summary = "base-e exponential of the specified value"; } def ExtractElementOp : Std_Op<"extract_element", [NoSideEffect, TypesMatchWith<"result type matches element type of aggregate", "aggregate", "result", "$_self.cast().getElementType()">]> { let summary = "element extract operation"; let description = [{ The "extract_element" op reads a tensor or vector and returns one element from it specified by an index list. The output of extract is a new value with the same type as the elements of the tensor or vector. The arity of indices matches the rank of the accessed value (i.e., if a tensor is of rank 3, then 3 indices are required for the extract). The indices should all be of index type. For example: %3 = extract_element %0[%1, %2] : vector<4x4xi32> }]; let arguments = (ins AnyTypeOf<[AnyVector, AnyTensor]>:$aggregate, Variadic:$indices); let results = (outs AnyType:$result); let builders = [OpBuilder< "Builder *builder, OperationState &result, Value aggregate," "ValueRange indices = {}", [{ auto resType = aggregate.getType().cast() .getElementType(); build(builder, result, resType, aggregate, indices); }]>]; let extraClassDeclaration = [{ Value getAggregate() { return getOperand(0); } operand_range getIndices() { return {operand_begin() + 1, operand_end()}; } }]; let hasFolder = 1; let assemblyFormat = [{ $aggregate `[` $indices `]` attr-dict `:` type($aggregate) }]; } def IndexCastOp : CastOp<"index_cast">, Arguments<(ins AnyType:$in)> { let summary = "cast between index and integer types"; let description = [{ Casts between integer scalars and 'index' scalars. Index is an integer of platform-specific bit width. If casting to a wider integer, the value is sign-extended. If casting to a narrower integer, the value is truncated. }]; let extraClassDeclaration = [{ /// Return true if `a` and `b` are valid operand and result pairs for /// the operation. static bool areCastCompatible(Type a, Type b); }]; let hasFolder = 1; } def FPExtOp : CastOp<"fpext">, Arguments<(ins AnyType:$in)> { let summary = "cast from floating-point to wider floating-point"; let description = [{ Cast a floating-point value to a larger floating-point-typed value. The destination type must to be strictly wider than the source type. Only scalars are currently supported. }]; let extraClassDeclaration = [{ /// Return true if `a` and `b` are valid operand and result pairs for /// the operation. static bool areCastCompatible(Type a, Type b); }]; let hasFolder = 0; } def FPTruncOp : CastOp<"fptrunc">, Arguments<(ins AnyType:$in)> { let summary = "cast from floating-point to narrower floating-point"; let description = [{ Truncate a floating-point value to a smaller floating-point-typed value. The destination type must be strictly narrower than the source type. If the value cannot be exactly represented, it is rounded using the default rounding mode. Only scalars are currently supported. }]; let extraClassDeclaration = [{ /// Return true if `a` and `b` are valid operand and result pairs for /// the operation. static bool areCastCompatible(Type a, Type b); }]; let hasFolder = 0; } def LoadOp : Std_Op<"load", [TypesMatchWith<"result type matches element type of 'memref'", "memref", "result", "$_self.cast().getElementType()">]> { let summary = "load operation"; let description = [{ The "load" op reads an element from a memref specified by an index list. The output of load is a new value with the same type as the elements of the memref. The arity of indices is the rank of the memref (i.e., if the memref loaded from is of rank 3, then 3 indices are required for the load following the memref identifier). For example: %3 = load %0[%1, %1] : memref<4x4xi32> }]; let arguments = (ins AnyMemRef:$memref, Variadic:$indices); let results = (outs AnyType:$result); let builders = [OpBuilder< "Builder *, OperationState &result, Value memref," "ValueRange indices = {}", [{ auto memrefType = memref.getType().cast(); result.addOperands(memref); result.addOperands(indices); result.types.push_back(memrefType.getElementType()); }]>]; let extraClassDeclaration = [{ Value getMemRef() { return getOperand(0); } void setMemRef(Value value) { setOperand(0, value); } MemRefType getMemRefType() { return getMemRef().getType().cast(); } operand_range getIndices() { return {operand_begin() + 1, operand_end()}; } }]; let hasFolder = 1; let assemblyFormat = "$memref `[` $indices `]` attr-dict `:` type($memref)"; } def LogOp : FloatUnaryOp<"log"> { let summary = "base-e logarithm of the specified value"; } def Log10Op : FloatUnaryOp<"log10"> { let summary = "base-10 logarithm of the specified value"; } def Log2Op : FloatUnaryOp<"log2"> { let summary = "base-2 logarithm of the specified value"; } def MemRefCastOp : CastOp<"memref_cast"> { let summary = "memref cast operation"; let description = [{ The "memref_cast" operation converts a memref from one type to an equivalent type with a compatible shape. The source and destination types are compatible if: a. both are ranked memref types with the same element type, affine mappings, address space, and rank but where the individual dimensions may add or remove constant dimensions from the memref type. If the cast converts any dimensions from an unknown to a known size, then it acts as an assertion that fails at runtime of the dynamic dimensions disagree with resultant destination size. Example: Assert that the input dynamic shape matches the destination static shape. %2 = memref_cast %1 : memref to memref<4x4xf32> Erase static shape information, replacing it with dynamic information. %3 = memref_cast %1 : memref<4xf32> to memref The same holds true for offsets and strides. Assert that the input dynamic shape matches the destination static stride. %4 = memref_cast %1 : memref<12x4xf32, offset:?, strides: [?, ?]> to memref<12x4xf32, offset:5, strides: [4, 1]> Erase static offset and stride information, replacing it with dynamic information. %5 = memref_cast %1 : memref<12x4xf32, offset:5, strides: [4, 1]> to memref<12x4xf32, offset:?, strides: [?, ?]> b. either or both memref types are unranked with the same element type, and address space. Example: Cast to concrete shape. %4 = memref_cast %1 : memref<*xf32> to memref<4x?xf32> Erase rank information. %5 = memref_cast %1 : memref<4x?xf32> to memref<*xf32> }]; let arguments = (ins AnyRankedOrUnrankedMemRef:$source); let results = (outs AnyRankedOrUnrankedMemRef); let extraClassDeclaration = [{ /// Return true if `a` and `b` are valid operand and result pairs for /// the operation. static bool areCastCompatible(Type a, Type b); /// The result of a memref_cast is always a memref. Type getType() { return getResult().getType(); } }]; } def MulFOp : FloatArithmeticOp<"mulf"> { let summary = "floating point multiplication operation"; let hasFolder = 1; } def MulIOp : IntArithmeticOp<"muli", [Commutative]> { let summary = "integer multiplication operation"; let hasFolder = 1; } def NegFOp : FloatUnaryOp<"negf"> { let summary = "floating point negation"; let description = [{ The `negf` operation computes the negation of a given value. It takes one operand and returns one result of the same type. This type may be a float scalar type, a vector whose element type is float, or a tensor of floats. It has no standard attributes. }]; } def OrOp : IntArithmeticOp<"or", [Commutative]> { let summary = "integer binary or"; let hasFolder = 1; } def PrefetchOp : Std_Op<"prefetch"> { let summary = "prefetch operation"; let description = [{ The "prefetch" op prefetches data from a memref location described with subscript indices similar to std.load, and with three attributes: a read/write specifier, a locality hint, and a cache type specifier as shown below: prefetch %0[%i, %j], read, locality<3>, data : memref<400x400xi32> The read/write specifier is either 'read' or 'write', the locality hint ranges from locality<0> (no locality) to locality<3> (extremely local keep in cache). The cache type specifier is either 'data' or 'instr' and specifies whether the prefetch is performed on data cache or on instruction cache. }]; let arguments = (ins AnyMemRef:$memref, Variadic:$indices, BoolAttr:$isWrite, Confined, IntMaxValue<3>]>:$localityHint, BoolAttr:$isDataCache); let builders = [OpBuilder< "Builder *builder, OperationState &result, Value memref," "ArrayRef indices, bool isWrite, unsigned hint, bool isData", [{ auto hintAttr = builder->getI32IntegerAttr(hint); auto isWriteAttr = builder->getBoolAttr(isWrite); auto isDataCacheAttr = builder->getBoolAttr(isData); result.addOperands(memref); result.addOperands(indices); result.addAttribute("localityHint", hintAttr); result.addAttribute("isWrite", isWriteAttr); result.addAttribute("isDataCache", isDataCacheAttr); }]>]; let extraClassDeclaration = [{ MemRefType getMemRefType() { return memref().getType().cast(); } static StringRef getLocalityHintAttrName() { return "localityHint"; } static StringRef getIsWriteAttrName() { return "isWrite"; } static StringRef getIsDataCacheAttrName() { return "isDataCache"; } }]; let hasFolder = 1; } def RankOp : Std_Op<"rank", [NoSideEffect]> { let summary = "rank operation"; let description = [{ The "rank" operation takes a tensor operand and returns its rank. %1 = rank %0 : index }]; let arguments = (ins AnyTensor); let results = (outs Index); let verifier = ?; let builders = [OpBuilder< "Builder *builder, OperationState &result, Value tensor", [{ auto indexType = builder->getIndexType(); build(builder, result, indexType, tensor); }]>]; let hasFolder = 1; let assemblyFormat = "operands attr-dict `:` type(operands)"; } def RemFOp : FloatArithmeticOp<"remf"> { let summary = "floating point division remainder operation"; } def SignedRemIOp : IntArithmeticOp<"remi_signed"> { let summary = "signed integer division remainder operation"; let hasFolder = 1; } def UnsignedRemIOp : IntArithmeticOp<"remi_unsigned"> { let summary = "unsigned integer division remainder operation"; let hasFolder = 1; } def ReturnOp : Std_Op<"return", [Terminator, HasParent<"FuncOp">]> { let summary = "return operation"; let description = [{ The "return" operation represents a return operation within a function. The operation takes variable number of operands and produces no results. The operand number and types must match the signature of the function that contains the operation. For example: func @foo() : (i32, f8) { ... return %0, %1 : i32, f8 }]; let arguments = (ins Variadic:$operands); let builders = [OpBuilder< "Builder *b, OperationState &result", [{ build(b, result, llvm::None); }] >]; let assemblyFormat = "attr-dict ($operands^ `:` type($operands))?"; } def SelectOp : Std_Op<"select", [NoSideEffect, SameOperandsAndResultShape, AllTypesMatch<["true_value", "false_value", "result"]>, TypesMatchWith<"condition type matches i1 equivalent of result type", "result", "condition", "getI1SameShape($_self)">]> { let summary = "select operation"; let description = [{ The "select" operation chooses one value based on a binary condition supplied as its first operand. If the value of the first operand is 1, the second operand is chosen, otherwise the third operand is chosen. The second and the third operand must have the same type. The operation applies elementwise to vectors and tensors. The shape of all arguments must be identical. For example, the maximum operation is obtained by combining "select" with "cmpi" as follows. %2 = cmpi "gt" %0, %1 : i32 // %2 is i1 %3 = select %2, %0, %1 : i32 }]; let arguments = (ins BoolLike:$condition, SignlessIntegerOrFloatLike:$true_value, SignlessIntegerOrFloatLike:$false_value); let results = (outs SignlessIntegerOrFloatLike:$result); let verifier = ?; let builders = [OpBuilder< "Builder *builder, OperationState &result, Value condition," "Value trueValue, Value falseValue", [{ result.addOperands({condition, trueValue, falseValue}); result.addTypes(trueValue.getType()); }]>]; let extraClassDeclaration = [{ Value getCondition() { return condition(); } Value getTrueValue() { return true_value(); } Value getFalseValue() { return false_value(); } }]; let hasFolder = 1; let assemblyFormat = [{ $condition `,` $true_value `,` $false_value attr-dict `:` type($result) }]; } def SignExtendIOp : Std_Op<"sexti", [NoSideEffect, SameOperandsAndResultShape]> { let summary = "integer sign extension operation"; let description = [{ The integer sign extension operation takes an integer input of width M and an integer destination type of width N. The destination bit-width must be larger than the input bit-width (N > M). The top-most (N - M) bits of the output are filled with copies of the most-significant bit of the input. %1 = constant 5 : i3 // %1 is 0b101 %2 = sexti %1 : i3 to i6 // %2 is 0b111101 %3 = constant 2 : i3 // %3 is 0b010 %4 = sexti %3 : i3 to i6 // %4 is 0b000010 %5 = sexti %0 : vector<2 x i32> to vector<2 x i64> }]; let arguments = (ins SignlessIntegerLike:$value); let results = (outs SignlessIntegerLike); let builders = [OpBuilder< "Builder *builder, OperationState &result, Value value, Type destType", [{ result.addOperands(value); result.addTypes(destType); }]>]; let parser = [{ return impl::parseCastOp(parser, result); }]; let printer = [{ return printStandardCastOp(this->getOperation(), p); }]; } def ShiftLeftOp : IntArithmeticOp<"shift_left"> { let summary = "integer left-shift"; let description = [{ The shift_left operation shifts an integer value to the left by a variable amount. The low order bits are filled with zeros. %1 = constant 5 : i8 // %1 is 0b00000101 %2 = constant 3 : i8 %3 = shift_left %1, %2 : (i8, i8) -> i8 // %3 is 0b00101000 }]; } def SignedShiftRightOp : IntArithmeticOp<"shift_right_signed"> { let summary = "signed integer right-shift"; let description = [{ The shift_right_signed operation shifts an integer value to the right by a variable amount. The integer is interpreted as signed. The high order bits in the output are filled with copies of the most-significant bit of the shifted value (which means that the sign of the value is preserved). %1 = constant 160 : i8 // %1 is 0b10100000 %2 = constant 3 : i8 %3 = shift_right_signed %1, %2 : (i8, i8) -> i8 // %3 is 0b11110100 %4 = constant 96 : i8 // %4 is 0b01100000 %5 = shift_right_signed %4, %2 : (i8, i8) -> i8 // %5 is 0b00001100 }]; } def UnsignedShiftRightOp : IntArithmeticOp<"shift_right_unsigned"> { let summary = "unsigned integer right-shift"; let description = [{ The shift_right_unsigned operation shifts an integer value to the right by a variable amount. The integer is interpreted as unsigned. The high order bits are always filled with zeros. %1 = constant 160 : i8 // %1 is 0b10100000 %2 = constant 3 : i8 %3 = shift_right_unsigned %1, %2 : (i8, i8) -> i8 // %3 is 0b00010100 }]; } def SIToFPOp : CastOp<"sitofp">, Arguments<(ins AnyType:$in)> { let summary = "cast from integer type to floating-point"; let description = [{ Cast from a value interpreted as signed integer to the corresponding floating-point value. If the value cannot be exactly represented, it is rounded using the default rounding mode. Only scalars are currently supported. }]; let extraClassDeclaration = [{ /// Return true if `a` and `b` are valid operand and result pairs for /// the operation. static bool areCastCompatible(Type a, Type b); }]; let hasFolder = 0; } def SplatOp : Std_Op<"splat", [NoSideEffect, TypesMatchWith<"operand type matches element type of result", "aggregate", "input", "$_self.cast().getElementType()">]> { let summary = "splat or broadcast operation"; let description = [{ The "splat" op reads a value of integer or float type and broadcasts it into a vector or a tensor. The output of splat is thus a new value of either vector or tensor type with elemental type being its operand's type. When the result is a tensor, it has to be statically shaped. %1 = splat %0 : vector<8xi32> %2 = splat %0 : tensor<4x8xi32> TODO: Extend this operation to broadcast to dynamically shaped tensors in the same way dynamically shaped memrefs are handled. // Broadcasts %s to a 2-d dynamically shaped tensor, with %m, %n binding // to the sizes of the two dynamic dimensions. %m = "foo"() : () -> (index) %n = "bar"() : () -> (index) %t = splat %s [%m, %n] : tensor }]; let arguments = (ins AnyTypeOf<[AnySignlessInteger, AnyFloat], "integer or float type">:$input); let results = (outs AnyTypeOf<[AnyVector, AnyStaticShapeTensor]>:$aggregate); let builders = [OpBuilder<"Builder *builder, OperationState &result, Value element, " "Type aggregateType", [{ build(builder, result, aggregateType, element); }]>]; let hasFolder = 1; let assemblyFormat = "$input attr-dict `:` type($aggregate)"; } def StoreOp : Std_Op<"store", [TypesMatchWith<"type of 'value' matches element type of 'memref'", "memref", "value", "$_self.cast().getElementType()">]> { let summary = "store operation"; let description = [{ The "store" op writes an element to a memref specified by an index list. The arity of indices is the rank of the memref (i.e. if the memref being stored to is of rank 3, then 3 indices are required for the store following the memref identifier). The store operation does not produce a result. In the following example, the ssa value '%v' is stored in memref '%A' at indices [%i, %j]: store %v, %A[%i, %j] : memref<4x128xf32, (d0, d1) -> (d0, d1), 0> }]; let arguments = (ins AnyType:$value, AnyMemRef:$memref, Variadic:$indices); let builders = [OpBuilder< "Builder *, OperationState &result, Value valueToStore, Value memref", [{ result.addOperands(valueToStore); result.addOperands(memref); }]>]; let extraClassDeclaration = [{ Value getValueToStore() { return getOperand(0); } Value getMemRef() { return getOperand(1); } void setMemRef(Value value) { setOperand(1, value); } MemRefType getMemRefType() { return getMemRef().getType().cast(); } operand_range getIndices() { return {operand_begin() + 2, operand_end()}; } }]; let hasFolder = 1; let assemblyFormat = [{ $value `,` $memref `[` $indices `]` attr-dict `:` type($memref) }]; } def SubFOp : FloatArithmeticOp<"subf"> { let summary = "floating point subtraction operation"; let hasFolder = 1; } def SubIOp : IntArithmeticOp<"subi"> { let summary = "integer subtraction operation"; let hasFolder = 1; } def SubViewOp : Std_Op<"subview", [AttrSizedOperandSegments, NoSideEffect]> { let summary = "memref subview operation"; let description = [{ The "subview" operation converts a memref type to another memref type which represents a reduced-size view of the original memref as specified by the operation's offsets, sizes and strides arguments. The SubView operation supports the following arguments: *) Memref: the "base" memref on which to create a "view" memref. *) Offsets: zero or memref-rank number of dynamic offsets into the "base" memref at which to create the "view" memref. *) Sizes: zero or memref-rank dynamic size operands which specify the dynamic sizes of the result "view" memref type. *) Strides: zero or memref-rank number of dynamic strides which are applied multiplicatively to the base memref strides in each dimension. Note on the number of operands for offsets, sizes and strides: For each of these, the number of operands must either be same as the memref-rank number or empty. For the latter, those values will be treated as constants. Example 1: %0 = alloc() : memref<64x4xf32, (d0, d1) -> (d0 * 4 + d1)> // Create a sub-view of "base" memref '%0' with offset arguments '%c0', // dynamic sizes for each dimension, and stride arguments '%c1'. %1 = subview %0[%c0, %c0][%size0, %size1][%c1, %c1] : memref<64x4xf32, (d0, d1) -> (d0 * 4 + d1) > to memref (d0 * s1 + d1 + s0)> Example 2: %0 = alloc() : memref<8x16x4xf32, (d0, d1, d1) -> (d0 * 64 + d1 * 4 + d2)> // Create a sub-view of "base" memref '%0' with dynamic offsets, sizes, // and strides. // Note that dynamic offsets are represented by the linearized dynamic // offset symbol 's0' in the subview memref layout map, and that the // dynamic strides operands, after being applied to the base memref // strides in each dimension, are represented in the view memref layout // map as symbols 's1', 's2' and 's3'. %1 = subview %0[%i, %j, %k][%size0, %size1, %size2][%x, %y, %z] : memref<8x16x4xf32, (d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)> to memref (d0 * s1 + d1 * s2 + d2 * s3 + s0)> Example 3: %0 = alloc() : memref<8x16x4xf32, (d0, d1, d1) -> (d0 * 64 + d1 * 4 + d2)> // Subview with constant offsets, sizes and strides. %1 = subview %0[][][] : memref<8x16x4xf32, (d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)> to memref<4x4x4xf32, (d0, d1, d2) -> (d0 * 16 + d1 * 4 + d2 + 8)> Example 4: %0 = alloc(%arg0, %arg1) : memref // Subview with constant size, but dynamic offsets and // strides. The resulting memref has a static shape, but if the // base memref has an affine map to describe the layout, the result // memref also uses an affine map to describe the layout. The // strides of the result memref is computed as follows: // // Let #map1 represents the layout of the base memref, and #map2 // represents the layout of the result memref. A #mapsubview can be // constructed to map an index from the result memref to the base // memref (note that the description below uses more convenient // naming for symbols, while in affine maps, symbols are // represented as unsigned numbers that identify that symbol in the // given affine map. // // #mapsubview = (d0, d1)[o0, o1, t0, t1] -> (d0 * t0 + o0, d1 * t1 + o1) // // where, o0, o1, ... are offsets, and t0, t1, ... are strides. Then, // // #map2 = #map1.compose(#mapsubview) // // If the layout map is represented as // // #map1 = (d0, d1)[s0, s1, s2] -> (d0 * s1 + d1 * s2 + s0) // // then, // // #map2 = (d0, d1)[s0, s1, s2, o0, o1, t0, t1] -> // (d0 * s1 * t0 + d1 * s2 * t1 + o0 * s1 + o1 * s2 + s0) // // Representing this canonically // // #map2 = (d0, d1)[r0, r1, r2] -> (d0 * r1 + d1 * r2 + r0) // // where, r0 = o0 * s1 + o1 * s2 + s0, r1 = s1 * t0, r2 = s2 * t1. %1 = subview %0[%i, %j][][%x, %y] : : memref (d0 * s1 + d1 * s2 + s0)> to memref<4x4xf32, (d0, d1)[r0, r1, r2] -> (d0 * r1 + d1 * r2 + r0)> // Note that the subview op does not guarantee that the result // memref is "inbounds" w.r.t to base memref. It is upto the client // to ensure that the subview is accessed in a manner that is // in-bounds. } }]; // TODO(b/144779634, ravishankarm) : Use different arguments for // offsets, sizes and strides. let arguments = (ins AnyMemRef:$source, Variadic:$offsets, Variadic:$sizes, Variadic:$strides, I32ElementsAttr:$operand_segment_sizes ); let results = (outs AnyMemRef); let builders = [ OpBuilder< "Builder *b, OperationState &result, Value source, " "ValueRange offsets, ValueRange sizes, " "ValueRange strides, Type resultType = Type(), " "ArrayRef attrs = {}">, OpBuilder< "Builder *builder, OperationState &result, " "Type resultType, Value source"> ]; let extraClassDeclaration = [{ /// Returns the type of the base memref operand. MemRefType getBaseMemRefType() { return source().getType().cast(); } /// The result of a subview is always a memref. MemRefType getType() { return getResult().getType().cast(); } /// Returns as integer value the number of offset operands. int64_t getNumOffsets() { return llvm::size(offsets()); } /// Returns as integer value the number of size operands. int64_t getNumSizes() { return llvm::size(sizes()); } /// Returns as integer value the number of stride operands. int64_t getNumStrides() { return llvm::size(strides()); } /// Returns the dynamic sizes for this subview operation if specified. operand_range getDynamicSizes() { return sizes(); } /// Returns in `staticStrides` the static value of the stride /// operands. Returns failure() if the static value of the stride /// operands could not be retrieved. LogicalResult getStaticStrides(SmallVectorImpl &staticStrides); // Auxiliary range data structure and helper function that unpacks the // offset, size and stride operands of the SubViewOp into a list of triples. // Such a list of triple is sometimes more convenient to manipulate. struct Range { Value offset, size, stride; }; SmallVector getRanges(); }]; let hasCanonicalizer = 1; } def SqrtOp : FloatUnaryOp<"sqrt"> { let summary = "sqrt of the specified value"; let description = [{ The `sqrt` operation computes the square root. It takes one operand and returns one result of the same type. This type may be a float scalar type, a vector whose element type is float, or a tensor of floats. It has no standard attributes. }]; } def TanhOp : FloatUnaryOp<"tanh"> { let summary = "hyperbolic tangent of the specified value"; let description = [{ The `tanh` operation computes the hyperbolic tangent. It takes one operand and returns one result of the same type. This type may be a float scalar type, a vector whose element type is float, or a tensor of floats. It has no standard attributes. }]; } def TensorCastOp : CastOp<"tensor_cast"> { let summary = "tensor cast operation"; let description = [{ The "tensor_cast" operation converts a tensor from one type to an equivalent type without changing any data elements. The source and destination types must both be tensor types with the same element type. If both are ranked then the rank should be the same and static dimensions should match. The operation is invalid if converting to a mismatching constant dimension. Convert from unknown rank to rank 2 with unknown dimension sizes. %2 = tensor_cast %1 : tensor<*xf32> to tensor }]; let arguments = (ins AnyTensor); let results = (outs AnyTensor); let extraClassDeclaration = [{ /// Return true if `a` and `b` are valid operand and result pairs for /// the operation. static bool areCastCompatible(Type a, Type b); /// The result of a tensor_cast is always a tensor. TensorType getType() { return getResult().getType().cast(); } }]; } def TensorLoadOp : Std_Op<"tensor_load", [SameOperandsAndResultShape, SameOperandsAndResultElementType, TypesMatchWith<"result type matches tensor equivalent of 'memref'", "memref", "result", "getTensorTypeFromMemRefType($_self)">]> { let summary = "tensor load operation"; let description = [{ The "tensor_load" operation creates a tensor from a memref, making an independent copy of the element data. The result value is a tensor whose shape and element type match the memref operand. Produce a value of tensor<4x?xf32> type. %12 = tensor_load %10 : memref<4x?xf32, #layout, memspace0> }]; let arguments = (ins AnyMemRef:$memref); let results = (outs AnyTensor:$result); // TensorLoadOp is fully verified by traits. let verifier = ?; let builders = [OpBuilder< "Builder *builder, OperationState &result, Value memref", [{ auto memrefType = memref.getType().cast(); auto resultType = RankedTensorType::get(memrefType.getShape(), memrefType.getElementType()); result.addOperands(memref); result.addTypes(resultType); }]>]; let extraClassDeclaration = [{ /// The result of a tensor_load is always a tensor. TensorType getType() { return getResult().getType().cast(); } }]; let assemblyFormat = "$memref attr-dict `:` type($memref)"; } def TensorStoreOp : Std_Op<"tensor_store", [SameOperandsShape, SameOperandsElementType, TypesMatchWith<"type of 'value' matches tensor equivalent of 'memref'", "memref", "tensor", "getTensorTypeFromMemRefType($_self)">]> { let summary = "tensor store operation"; let description = [{ The "tensor_store" operation stores the contents of a tensor into a memref. The first operand is a value of tensor type, the second operand is a value of memref type. The shapes and element types of these must match, and are specified by the memref type. Example: %9 = dim %8, 1 : tensor<4x?xf32> %10 = alloc(%9) : memref<4x?xf32, #layout, memspace0> tensor_store %8, %10 : memref<4x?xf32, #layout, memspace0> }]; let arguments = (ins AnyTensor:$tensor, AnyMemRef:$memref); // TensorStoreOp is fully verified by traits. let verifier = ?; let assemblyFormat = "$tensor `,` $memref attr-dict `:` type($memref)"; } def TruncateIOp : Std_Op<"trunci", [NoSideEffect, SameOperandsAndResultShape]> { let summary = "integer truncation operation"; let description = [{ The integer truncation operation takes an integer input of width M and an integer destination type of width N. The destination bit-width must be smaller than the input bit-width (N < M). The top-most (N - M) bits of the input are discarded. %1 = constant 21 : i5 // %1 is 0b10101 %2 = trunci %1 : i5 to i4 // %2 is 0b0101 %3 = trunci %1 : i5 to i3 // %3 is 0b101 %5 = trunci %0 : vector<2 x i32> to vector<2 x i16> }]; let arguments = (ins SignlessIntegerLike:$value); let results = (outs SignlessIntegerLike); let builders = [OpBuilder< "Builder *builder, OperationState &result, Value value, Type destType", [{ result.addOperands(value); result.addTypes(destType); }]>]; let parser = [{ return impl::parseCastOp(parser, result); }]; let printer = [{ return printStandardCastOp(this->getOperation(), p); }]; } def ViewOp : Std_Op<"view", [NoSideEffect]> { let summary = "memref view operation"; let description = [{ The "view" operation converts a 1-D memref with i8 element type, to an N-D memref with arbitrary element type. In addition, the ViewOp supports the following arguments: *) A single dynamic offset operand can be specified which represents a a dynamic offset within the base 1-D memref at which to create the resulting memref view. *) A dynamic size operand must be specified for each dynamic dimension in the resulting view memref type. // Allocate a flat 1D/i8 memref. %0 = alloc() : memref<2048xi8> // ViewOp with static offset and sizes. %1 = view %0[][] : memref<2048xi8> to memref<64x4xf32> // ViewOp with dynamic offset and one dynamic size. %2 = view %0[%offset_1024][%size0] : memref<2048xi8> to memref (d0 * 4 + d1 + s0)> // ViewOp creating 3D shape where two of the dim sizes are dynamic. // *) The dynamic offset specified in the ViewOp is applied to the // base 1-D memref, and is represented by the symbol 's0' in the // layout map of the ViewOp result memref type. // *) The dynamic size for the second dimension induces a dynamic // stride for the first dimension, which is represented by the // symbol 's1' in the layout map of the ViewOp result memref type. // Note that this dynamic stride will be computed from the view // shape and dynamic sizes. %3 = view %0[%offset_1024][%size0, %size1] : memref<2048xi8> to memref (d0 * s1 + d1 * 4 + d2 + s0)> }]; let arguments = (ins MemRefRankOf<[I8], [1]>:$source, Variadic:$operands); let results = (outs AnyMemRef); let extraClassDeclaration = [{ /// The result of a view is always a memref. MemRefType getType() { return getResult().getType().cast(); } /// Returns the dynamic offset for this view operation if specified. /// Returns nullptr if no dynamic offset was specified. Value getDynamicOffset(); /// Returns the starting operand list position of the dynamic size operands. unsigned getDynamicSizesOperandStart() { return getDynamicOffset() == nullptr ? 1 : 2; } /// Returns the dynamic sizes for this view operation. operand_range getDynamicSizes() { return {operand_begin() + getDynamicSizesOperandStart(), operand_end()}; } }]; let hasCanonicalizer = 1; } def XOrOp : IntArithmeticOp<"xor", [Commutative]> { let summary = "integer binary xor"; let hasFolder = 1; } def ZeroExtendIOp : Std_Op<"zexti", [NoSideEffect, SameOperandsAndResultShape]> { let summary = "integer zero extension operation"; let description = [{ The integer zero extension operation takes an integer input of width M and an integer destination type of width N. The destination bit-width must be larger than the input bit-width (N > M). The top-most (N - M) bits of the output are filled with zeros. %1 = constant 5 : i3 // %1 is 0b101 %2 = zexti %1 : i3 to i6 // %2 is 0b000101 %3 = constant 2 : i3 // %3 is 0b010 %4 = zexti %3 : i3 to i6 // %4 is 0b000010 %5 = zexti %0 : vector<2 x i32> to vector<2 x i64> }]; let arguments = (ins SignlessIntegerLike:$value); let results = (outs SignlessIntegerLike); let builders = [OpBuilder< "Builder *builder, OperationState &result, Value value, Type destType", [{ result.addOperands(value); result.addTypes(destType); }]>]; let parser = [{ return impl::parseCastOp(parser, result); }]; let printer = [{ return printStandardCastOp(this->getOperation(), p); }]; } def AssumeAlignmentOp : Std_Op<"assume_alignment"> { let summary = "assertion that gives alignment information to the input memref"; let description = [{ The assume alignment operation takes a memref and a integer of alignment value, and internally annotates the buffer with the given alignment. If the buffer isn't aligned to the given alignment, the behavior is undefined. This operation doesn't affect the semantics of a correct program. It's for optimization only, and the optimization is best-effort. }]; let arguments = (ins AnyMemRef:$memref, PositiveI32Attr:$alignment); let results = (outs); let assemblyFormat = "$memref `,` $alignment attr-dict `:` type($memref)"; } #endif // STANDARD_OPS diff --git a/mlir/lib/Conversion/StandardToLLVM/ConvertStandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/ConvertStandardToLLVM.cpp index 061d4f9bd095..b5b415e7705d 100644 --- a/mlir/lib/Conversion/StandardToLLVM/ConvertStandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/ConvertStandardToLLVM.cpp @@ -1,2830 +1,2998 @@ //===- ConvertStandardToLLVM.cpp - Standard to LLVM dialect conversion-----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file implements a pass to convert MLIR standard and builtin dialects // into the LLVM IR dialect. // //===----------------------------------------------------------------------===// #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h" #include "mlir/ADT/TypeSwitch.h" #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/IR/Builders.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Module.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Pass/Pass.h" #include "mlir/Support/Functional.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/Passes.h" #include "mlir/Transforms/Utils.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Type.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FormatVariadic.h" using namespace mlir; #define PASS_NAME "convert-std-to-llvm" // Extract an LLVM IR type from the LLVM IR dialect type. static LLVM::LLVMType unwrap(Type type) { if (!type) return nullptr; auto *mlirContext = type.getContext(); auto wrappedLLVMType = type.dyn_cast(); if (!wrappedLLVMType) emitError(UnknownLoc::get(mlirContext), "conversion resulted in a non-LLVM type"); return wrappedLLVMType; } /// Initialize customization to default callbacks. LLVMTypeConverterCustomization::LLVMTypeConverterCustomization() { funcArgConverter = structFuncArgTypeConverter; } /// Callback to convert function argument types. It converts a MemRef function /// argument to a list of non-aggregate types containing descriptor /// information, and an UnrankedmemRef function argument to a list containing /// the rank and a pointer to a descriptor struct. LogicalResult mlir::structFuncArgTypeConverter(LLVMTypeConverter &converter, Type type, SmallVectorImpl &result) { if (auto memref = type.dyn_cast()) { auto converted = converter.convertMemRefSignature(memref); if (converted.empty()) return failure(); result.append(converted.begin(), converted.end()); return success(); } if (type.isa()) { auto converted = converter.convertUnrankedMemRefSignature(); if (converted.empty()) return failure(); result.append(converted.begin(), converted.end()); return success(); } auto converted = converter.convertType(type); if (!converted) return failure(); result.push_back(converted); return success(); } /// Convert a MemRef type to a bare pointer to the MemRef element type. static Type convertMemRefTypeToBarePtr(LLVMTypeConverter &converter, MemRefType type) { int64_t offset; SmallVector strides; if (failed(getStridesAndOffset(type, strides, offset))) return {}; LLVM::LLVMType elementType = unwrap(converter.convertType(type.getElementType())); if (!elementType) return {}; return elementType.getPointerTo(type.getMemorySpace()); } /// Callback to convert function argument types. It converts MemRef function /// arguments to bare pointers to the MemRef element type. LogicalResult mlir::barePtrFuncArgTypeConverter(LLVMTypeConverter &converter, Type type, SmallVectorImpl &result) { // TODO: Add support for unranked memref. if (auto memrefTy = type.dyn_cast()) { auto llvmTy = convertMemRefTypeToBarePtr(converter, memrefTy); if (!llvmTy) return failure(); result.push_back(llvmTy); return success(); } auto llvmTy = converter.convertType(type); if (!llvmTy) return failure(); result.push_back(llvmTy); return success(); } /// Create an LLVMTypeConverter using default LLVMTypeConverterCustomization. LLVMTypeConverter::LLVMTypeConverter(MLIRContext *ctx) : LLVMTypeConverter(ctx, LLVMTypeConverterCustomization()) {} /// Create an LLVMTypeConverter using 'custom' customizations. LLVMTypeConverter::LLVMTypeConverter( MLIRContext *ctx, const LLVMTypeConverterCustomization &customs) : llvmDialect(ctx->getRegisteredDialect()), customizations(customs) { assert(llvmDialect && "LLVM IR dialect is not registered"); module = &llvmDialect->getLLVMModule(); // Register conversions for the standard types. addConversion([&](FloatType type) { return convertFloatType(type); }); addConversion([&](FunctionType type) { return convertFunctionType(type); }); addConversion([&](IndexType type) { return convertIndexType(type); }); addConversion([&](IntegerType type) { return convertIntegerType(type); }); addConversion([&](MemRefType type) { return convertMemRefType(type); }); addConversion( [&](UnrankedMemRefType type) { return convertUnrankedMemRefType(type); }); addConversion([&](VectorType type) { return convertVectorType(type); }); // LLVMType is legal, so add a pass-through conversion. addConversion([](LLVM::LLVMType type) { return type; }); } /// Get the LLVM context. llvm::LLVMContext &LLVMTypeConverter::getLLVMContext() { return module->getContext(); } LLVM::LLVMType LLVMTypeConverter::getIndexType() { return LLVM::LLVMType::getIntNTy( llvmDialect, module->getDataLayout().getPointerSizeInBits()); } Type LLVMTypeConverter::convertIndexType(IndexType type) { return getIndexType(); } Type LLVMTypeConverter::convertIntegerType(IntegerType type) { return LLVM::LLVMType::getIntNTy(llvmDialect, type.getWidth()); } Type LLVMTypeConverter::convertFloatType(FloatType type) { switch (type.getKind()) { case mlir::StandardTypes::F32: return LLVM::LLVMType::getFloatTy(llvmDialect); case mlir::StandardTypes::F64: return LLVM::LLVMType::getDoubleTy(llvmDialect); case mlir::StandardTypes::F16: return LLVM::LLVMType::getHalfTy(llvmDialect); case mlir::StandardTypes::BF16: { auto *mlirContext = llvmDialect->getContext(); return emitError(UnknownLoc::get(mlirContext), "unsupported type: BF16"), Type(); } default: llvm_unreachable("non-float type in convertFloatType"); } } // Except for signatures, MLIR function types are converted into LLVM // pointer-to-function types. Type LLVMTypeConverter::convertFunctionType(FunctionType type) { SignatureConversion conversion(type.getNumInputs()); LLVM::LLVMType converted = convertFunctionSignature(type, /*isVariadic=*/false, conversion); return converted.getPointerTo(); } /// In signatures, MemRef descriptors are expanded into lists of non-aggregate /// values. SmallVector LLVMTypeConverter::convertMemRefSignature(MemRefType type) { SmallVector results; assert(isStrided(type) && "Non-strided layout maps must have been normalized away"); LLVM::LLVMType elementType = unwrap(convertType(type.getElementType())); if (!elementType) return {}; auto indexTy = getIndexType(); results.insert(results.begin(), 2, elementType.getPointerTo(type.getMemorySpace())); results.push_back(indexTy); auto rank = type.getRank(); results.insert(results.end(), 2 * rank, indexTy); return results; } /// In signatures, unranked MemRef descriptors are expanded into a pair "rank, /// pointer to descriptor". SmallVector LLVMTypeConverter::convertUnrankedMemRefSignature() { return {getIndexType(), LLVM::LLVMType::getInt8PtrTy(llvmDialect)}; } // Function types are converted to LLVM Function types by recursively converting // argument and result types. If MLIR Function has zero results, the LLVM // Function has one VoidType result. If MLIR Function has more than one result, // they are into an LLVM StructType in their order of appearance. LLVM::LLVMType LLVMTypeConverter::convertFunctionSignature( FunctionType type, bool isVariadic, LLVMTypeConverter::SignatureConversion &result) { // Convert argument types one by one and check for errors. for (auto &en : llvm::enumerate(type.getInputs())) { Type type = en.value(); SmallVector converted; if (failed(customizations.funcArgConverter(*this, type, converted))) return {}; result.addInputs(en.index(), converted); } SmallVector argTypes; argTypes.reserve(llvm::size(result.getConvertedTypes())); for (Type type : result.getConvertedTypes()) argTypes.push_back(unwrap(type)); // If function does not return anything, create the void result type, // if it returns on element, convert it, otherwise pack the result types into // a struct. LLVM::LLVMType resultType = type.getNumResults() == 0 ? LLVM::LLVMType::getVoidTy(llvmDialect) : unwrap(packFunctionResults(type.getResults())); if (!resultType) return {}; return LLVM::LLVMType::getFunctionTy(resultType, argTypes, isVariadic); } /// Converts the function type to a C-compatible format, in particular using /// pointers to memref descriptors for arguments. LLVM::LLVMType LLVMTypeConverter::convertFunctionTypeCWrapper(FunctionType type) { SmallVector inputs; for (Type t : type.getInputs()) { auto converted = convertType(t).dyn_cast_or_null(); if (!converted) return {}; if (t.isa() || t.isa()) converted = converted.getPointerTo(); inputs.push_back(converted); } LLVM::LLVMType resultType = type.getNumResults() == 0 ? LLVM::LLVMType::getVoidTy(llvmDialect) : unwrap(packFunctionResults(type.getResults())); if (!resultType) return {}; return LLVM::LLVMType::getFunctionTy(resultType, inputs, false); } /// Creates descriptor structs from individual values constituting them. Operation *LLVMTypeConverter::materializeConversion(PatternRewriter &rewriter, Type type, ArrayRef values, Location loc) { if (auto unrankedMemRefType = type.dyn_cast()) return UnrankedMemRefDescriptor::pack(rewriter, loc, *this, unrankedMemRefType, values) .getDefiningOp(); auto memRefType = type.dyn_cast(); assert(memRefType && "1->N conversion is only supported for memrefs"); return MemRefDescriptor::pack(rewriter, loc, *this, memRefType, values) .getDefiningOp(); } // Convert a MemRef to an LLVM type. The result is a MemRef descriptor which // contains: // 1. the pointer to the data buffer, followed by // 2. a lowered `index`-type integer containing the distance between the // beginning of the buffer and the first element to be accessed through the // view, followed by // 3. an array containing as many `index`-type integers as the rank of the // MemRef: the array represents the size, in number of elements, of the memref // along the given dimension. For constant MemRef dimensions, the // corresponding size entry is a constant whose runtime value must match the // static value, followed by // 4. a second array containing as many `index`-type integers as the rank of // the MemRef: the second array represents the "stride" (in tensor abstraction // sense), i.e. the number of consecutive elements of the underlying buffer. // TODO(ntv, zinenko): add assertions for the static cases. // // template // struct { // Elem *allocatedPtr; // Elem *alignedPtr; // int64_t offset; // int64_t sizes[Rank]; // omitted when rank == 0 // int64_t strides[Rank]; // omitted when rank == 0 // }; static constexpr unsigned kAllocatedPtrPosInMemRefDescriptor = 0; static constexpr unsigned kAlignedPtrPosInMemRefDescriptor = 1; static constexpr unsigned kOffsetPosInMemRefDescriptor = 2; static constexpr unsigned kSizePosInMemRefDescriptor = 3; static constexpr unsigned kStridePosInMemRefDescriptor = 4; Type LLVMTypeConverter::convertMemRefType(MemRefType type) { int64_t offset; SmallVector strides; bool strideSuccess = succeeded(getStridesAndOffset(type, strides, offset)); assert(strideSuccess && "Non-strided layout maps must have been normalized away"); (void)strideSuccess; LLVM::LLVMType elementType = unwrap(convertType(type.getElementType())); if (!elementType) return {}; auto ptrTy = elementType.getPointerTo(type.getMemorySpace()); auto indexTy = getIndexType(); auto rank = type.getRank(); if (rank > 0) { auto arrayTy = LLVM::LLVMType::getArrayTy(indexTy, type.getRank()); return LLVM::LLVMType::getStructTy(ptrTy, ptrTy, indexTy, arrayTy, arrayTy); } return LLVM::LLVMType::getStructTy(ptrTy, ptrTy, indexTy); } // Converts UnrankedMemRefType to LLVMType. The result is a descriptor which // contains: // 1. int64_t rank, the dynamic rank of this MemRef // 2. void* ptr, pointer to the static ranked MemRef descriptor. This will be // stack allocated (alloca) copy of a MemRef descriptor that got casted to // be unranked. static constexpr unsigned kRankInUnrankedMemRefDescriptor = 0; static constexpr unsigned kPtrInUnrankedMemRefDescriptor = 1; Type LLVMTypeConverter::convertUnrankedMemRefType(UnrankedMemRefType type) { auto rankTy = LLVM::LLVMType::getInt64Ty(llvmDialect); auto ptrTy = LLVM::LLVMType::getInt8PtrTy(llvmDialect); return LLVM::LLVMType::getStructTy(rankTy, ptrTy); } // Convert an n-D vector type to an LLVM vector type via (n-1)-D array type when // n > 1. // For example, `vector<4 x f32>` converts to `!llvm.type<"<4 x float>">` and // `vector<4 x 8 x 16 f32>` converts to `!llvm<"[4 x [8 x <16 x float>]]">`. Type LLVMTypeConverter::convertVectorType(VectorType type) { auto elementType = unwrap(convertType(type.getElementType())); if (!elementType) return {}; auto vectorType = LLVM::LLVMType::getVectorTy(elementType, type.getShape().back()); auto shape = type.getShape(); for (int i = shape.size() - 2; i >= 0; --i) vectorType = LLVM::LLVMType::getArrayTy(vectorType, shape[i]); return vectorType; } ConvertToLLVMPattern::ConvertToLLVMPattern(StringRef rootOpName, MLIRContext *context, LLVMTypeConverter &typeConverter_, PatternBenefit benefit) : ConversionPattern(rootOpName, benefit, context), typeConverter(typeConverter_) {} /*============================================================================*/ /* StructBuilder implementation */ /*============================================================================*/ StructBuilder::StructBuilder(Value v) : value(v) { assert(value != nullptr && "value cannot be null"); structType = value.getType().dyn_cast(); assert(structType && "expected llvm type"); } Value StructBuilder::extractPtr(OpBuilder &builder, Location loc, unsigned pos) { Type type = structType.cast().getStructElementType(pos); return builder.create(loc, type, value, builder.getI64ArrayAttr(pos)); } void StructBuilder::setPtr(OpBuilder &builder, Location loc, unsigned pos, Value ptr) { value = builder.create(loc, structType, value, ptr, builder.getI64ArrayAttr(pos)); } /*============================================================================*/ /* MemRefDescriptor implementation */ /*============================================================================*/ /// Construct a helper for the given descriptor value. MemRefDescriptor::MemRefDescriptor(Value descriptor) : StructBuilder(descriptor) { assert(value != nullptr && "value cannot be null"); indexType = value.getType().cast().getStructElementType( kOffsetPosInMemRefDescriptor); } /// Builds IR creating an `undef` value of the descriptor type. MemRefDescriptor MemRefDescriptor::undef(OpBuilder &builder, Location loc, Type descriptorType) { Value descriptor = builder.create(loc, descriptorType.cast()); return MemRefDescriptor(descriptor); } /// Builds IR creating a MemRef descriptor that represents `type` and /// populates it with static shape and stride information extracted from the /// type. MemRefDescriptor MemRefDescriptor::fromStaticShape(OpBuilder &builder, Location loc, LLVMTypeConverter &typeConverter, MemRefType type, Value memory) { assert(type.hasStaticShape() && "unexpected dynamic shape"); // Extract all strides and offsets and verify they are static. int64_t offset; SmallVector strides; auto result = getStridesAndOffset(type, strides, offset); (void)result; assert(succeeded(result) && "unexpected failure in stride computation"); assert(offset != MemRefType::getDynamicStrideOrOffset() && "expected static offset"); assert(!llvm::is_contained(strides, MemRefType::getDynamicStrideOrOffset()) && "expected static strides"); auto convertedType = typeConverter.convertType(type); assert(convertedType && "unexpected failure in memref type conversion"); auto descr = MemRefDescriptor::undef(builder, loc, convertedType); descr.setAllocatedPtr(builder, loc, memory); descr.setAlignedPtr(builder, loc, memory); descr.setConstantOffset(builder, loc, offset); // Fill in sizes and strides for (unsigned i = 0, e = type.getRank(); i != e; ++i) { descr.setConstantSize(builder, loc, i, type.getDimSize(i)); descr.setConstantStride(builder, loc, i, strides[i]); } return descr; } /// Builds IR extracting the allocated pointer from the descriptor. Value MemRefDescriptor::allocatedPtr(OpBuilder &builder, Location loc) { return extractPtr(builder, loc, kAllocatedPtrPosInMemRefDescriptor); } /// Builds IR inserting the allocated pointer into the descriptor. void MemRefDescriptor::setAllocatedPtr(OpBuilder &builder, Location loc, Value ptr) { setPtr(builder, loc, kAllocatedPtrPosInMemRefDescriptor, ptr); } /// Builds IR extracting the aligned pointer from the descriptor. Value MemRefDescriptor::alignedPtr(OpBuilder &builder, Location loc) { return extractPtr(builder, loc, kAlignedPtrPosInMemRefDescriptor); } /// Builds IR inserting the aligned pointer into the descriptor. void MemRefDescriptor::setAlignedPtr(OpBuilder &builder, Location loc, Value ptr) { setPtr(builder, loc, kAlignedPtrPosInMemRefDescriptor, ptr); } // Creates a constant Op producing a value of `resultType` from an index-typed // integer attribute. static Value createIndexAttrConstant(OpBuilder &builder, Location loc, Type resultType, int64_t value) { return builder.create( loc, resultType, builder.getIntegerAttr(builder.getIndexType(), value)); } /// Builds IR extracting the offset from the descriptor. Value MemRefDescriptor::offset(OpBuilder &builder, Location loc) { return builder.create( loc, indexType, value, builder.getI64ArrayAttr(kOffsetPosInMemRefDescriptor)); } /// Builds IR inserting the offset into the descriptor. void MemRefDescriptor::setOffset(OpBuilder &builder, Location loc, Value offset) { value = builder.create( loc, structType, value, offset, builder.getI64ArrayAttr(kOffsetPosInMemRefDescriptor)); } /// Builds IR inserting the offset into the descriptor. void MemRefDescriptor::setConstantOffset(OpBuilder &builder, Location loc, uint64_t offset) { setOffset(builder, loc, createIndexAttrConstant(builder, loc, indexType, offset)); } /// Builds IR extracting the pos-th size from the descriptor. Value MemRefDescriptor::size(OpBuilder &builder, Location loc, unsigned pos) { return builder.create( loc, indexType, value, builder.getI64ArrayAttr({kSizePosInMemRefDescriptor, pos})); } /// Builds IR inserting the pos-th size into the descriptor void MemRefDescriptor::setSize(OpBuilder &builder, Location loc, unsigned pos, Value size) { value = builder.create( loc, structType, value, size, builder.getI64ArrayAttr({kSizePosInMemRefDescriptor, pos})); } /// Builds IR inserting the pos-th size into the descriptor void MemRefDescriptor::setConstantSize(OpBuilder &builder, Location loc, unsigned pos, uint64_t size) { setSize(builder, loc, pos, createIndexAttrConstant(builder, loc, indexType, size)); } /// Builds IR extracting the pos-th size from the descriptor. Value MemRefDescriptor::stride(OpBuilder &builder, Location loc, unsigned pos) { return builder.create( loc, indexType, value, builder.getI64ArrayAttr({kStridePosInMemRefDescriptor, pos})); } /// Builds IR inserting the pos-th stride into the descriptor void MemRefDescriptor::setStride(OpBuilder &builder, Location loc, unsigned pos, Value stride) { value = builder.create( loc, structType, value, stride, builder.getI64ArrayAttr({kStridePosInMemRefDescriptor, pos})); } /// Builds IR inserting the pos-th stride into the descriptor void MemRefDescriptor::setConstantStride(OpBuilder &builder, Location loc, unsigned pos, uint64_t stride) { setStride(builder, loc, pos, createIndexAttrConstant(builder, loc, indexType, stride)); } LLVM::LLVMType MemRefDescriptor::getElementType() { return value.getType().cast().getStructElementType( kAlignedPtrPosInMemRefDescriptor); } /// Creates a MemRef descriptor structure from a list of individual values /// composing that descriptor, in the following order: /// - allocated pointer; /// - aligned pointer; /// - offset; /// - sizes; /// - shapes; /// where is the MemRef rank as provided in `type`. Value MemRefDescriptor::pack(OpBuilder &builder, Location loc, LLVMTypeConverter &converter, MemRefType type, ValueRange values) { Type llvmType = converter.convertType(type); auto d = MemRefDescriptor::undef(builder, loc, llvmType); d.setAllocatedPtr(builder, loc, values[kAllocatedPtrPosInMemRefDescriptor]); d.setAlignedPtr(builder, loc, values[kAlignedPtrPosInMemRefDescriptor]); d.setOffset(builder, loc, values[kOffsetPosInMemRefDescriptor]); int64_t rank = type.getRank(); for (unsigned i = 0; i < rank; ++i) { d.setSize(builder, loc, i, values[kSizePosInMemRefDescriptor + i]); d.setStride(builder, loc, i, values[kSizePosInMemRefDescriptor + rank + i]); } return d; } /// Builds IR extracting individual elements of a MemRef descriptor structure /// and returning them as `results` list. void MemRefDescriptor::unpack(OpBuilder &builder, Location loc, Value packed, MemRefType type, SmallVectorImpl &results) { int64_t rank = type.getRank(); results.reserve(results.size() + getNumUnpackedValues(type)); MemRefDescriptor d(packed); results.push_back(d.allocatedPtr(builder, loc)); results.push_back(d.alignedPtr(builder, loc)); results.push_back(d.offset(builder, loc)); for (int64_t i = 0; i < rank; ++i) results.push_back(d.size(builder, loc, i)); for (int64_t i = 0; i < rank; ++i) results.push_back(d.stride(builder, loc, i)); } /// Returns the number of non-aggregate values that would be produced by /// `unpack`. unsigned MemRefDescriptor::getNumUnpackedValues(MemRefType type) { // Two pointers, offset, sizes, shapes. return 3 + 2 * type.getRank(); } /*============================================================================*/ /* MemRefDescriptorView implementation. */ /*============================================================================*/ MemRefDescriptorView::MemRefDescriptorView(ValueRange range) : rank((range.size() - kSizePosInMemRefDescriptor) / 2), elements(range) {} Value MemRefDescriptorView::allocatedPtr() { return elements[kAllocatedPtrPosInMemRefDescriptor]; } Value MemRefDescriptorView::alignedPtr() { return elements[kAlignedPtrPosInMemRefDescriptor]; } Value MemRefDescriptorView::offset() { return elements[kOffsetPosInMemRefDescriptor]; } Value MemRefDescriptorView::size(unsigned pos) { return elements[kSizePosInMemRefDescriptor + pos]; } Value MemRefDescriptorView::stride(unsigned pos) { return elements[kSizePosInMemRefDescriptor + rank + pos]; } /*============================================================================*/ /* UnrankedMemRefDescriptor implementation */ /*============================================================================*/ /// Construct a helper for the given descriptor value. UnrankedMemRefDescriptor::UnrankedMemRefDescriptor(Value descriptor) : StructBuilder(descriptor) {} /// Builds IR creating an `undef` value of the descriptor type. UnrankedMemRefDescriptor UnrankedMemRefDescriptor::undef(OpBuilder &builder, Location loc, Type descriptorType) { Value descriptor = builder.create(loc, descriptorType.cast()); return UnrankedMemRefDescriptor(descriptor); } Value UnrankedMemRefDescriptor::rank(OpBuilder &builder, Location loc) { return extractPtr(builder, loc, kRankInUnrankedMemRefDescriptor); } void UnrankedMemRefDescriptor::setRank(OpBuilder &builder, Location loc, Value v) { setPtr(builder, loc, kRankInUnrankedMemRefDescriptor, v); } Value UnrankedMemRefDescriptor::memRefDescPtr(OpBuilder &builder, Location loc) { return extractPtr(builder, loc, kPtrInUnrankedMemRefDescriptor); } void UnrankedMemRefDescriptor::setMemRefDescPtr(OpBuilder &builder, Location loc, Value v) { setPtr(builder, loc, kPtrInUnrankedMemRefDescriptor, v); } /// Builds IR populating an unranked MemRef descriptor structure from a list /// of individual constituent values in the following order: /// - rank of the memref; /// - pointer to the memref descriptor. Value UnrankedMemRefDescriptor::pack(OpBuilder &builder, Location loc, LLVMTypeConverter &converter, UnrankedMemRefType type, ValueRange values) { Type llvmType = converter.convertType(type); auto d = UnrankedMemRefDescriptor::undef(builder, loc, llvmType); d.setRank(builder, loc, values[kRankInUnrankedMemRefDescriptor]); d.setMemRefDescPtr(builder, loc, values[kPtrInUnrankedMemRefDescriptor]); return d; } /// Builds IR extracting individual elements that compose an unranked memref /// descriptor and returns them as `results` list. void UnrankedMemRefDescriptor::unpack(OpBuilder &builder, Location loc, Value packed, SmallVectorImpl &results) { UnrankedMemRefDescriptor d(packed); results.reserve(results.size() + 2); results.push_back(d.rank(builder, loc)); results.push_back(d.memRefDescPtr(builder, loc)); } namespace { // Base class for Standard to LLVM IR op conversions. Matches the Op type // provided as template argument. Carries a reference to the LLVM dialect in // case it is necessary for rewriters. template class LLVMLegalizationPattern : public ConvertToLLVMPattern { public: // Construct a conversion pattern. explicit LLVMLegalizationPattern(LLVM::LLVMDialect &dialect_, LLVMTypeConverter &typeConverter_) : ConvertToLLVMPattern(SourceOp::getOperationName(), dialect_.getContext(), typeConverter_), dialect(dialect_) {} // Get the LLVM IR dialect. LLVM::LLVMDialect &getDialect() const { return dialect; } // Get the LLVM context. llvm::LLVMContext &getContext() const { return dialect.getLLVMContext(); } // Get the LLVM module in which the types are constructed. llvm::Module &getModule() const { return dialect.getLLVMModule(); } // Get the MLIR type wrapping the LLVM integer type whose bit width is defined // by the pointer size used in the LLVM module. LLVM::LLVMType getIndexType() const { return LLVM::LLVMType::getIntNTy( &dialect, getModule().getDataLayout().getPointerSizeInBits()); } LLVM::LLVMType getVoidType() const { return LLVM::LLVMType::getVoidTy(&dialect); } // Get the MLIR type wrapping the LLVM i8* type. LLVM::LLVMType getVoidPtrType() const { return LLVM::LLVMType::getInt8PtrTy(&dialect); } // Create an LLVM IR pseudo-operation defining the given index constant. Value createIndexConstant(ConversionPatternRewriter &builder, Location loc, uint64_t value) const { return createIndexAttrConstant(builder, loc, getIndexType(), value); } protected: LLVM::LLVMDialect &dialect; }; /// Only retain those attributes that are not constructed by /// `LLVMFuncOp::build`. If `filterArgAttrs` is set, also filter out argument /// attributes. static void filterFuncAttributes(ArrayRef attrs, bool filterArgAttrs, SmallVectorImpl &result) { for (const auto &attr : attrs) { if (attr.first.is(SymbolTable::getSymbolAttrName()) || attr.first.is(impl::getTypeAttrName()) || attr.first.is("std.varargs") || (filterArgAttrs && impl::isArgAttrName(attr.first.strref()))) continue; result.push_back(attr); } } /// Creates an auxiliary function with pointer-to-memref-descriptor-struct /// arguments instead of unpacked arguments. This function can be called from C /// by passing a pointer to a C struct corresponding to a memref descriptor. /// Internally, the auxiliary function unpacks the descriptor into individual /// components and forwards them to `newFuncOp`. static void wrapForExternalCallers(OpBuilder &rewriter, Location loc, LLVMTypeConverter &typeConverter, FuncOp funcOp, LLVM::LLVMFuncOp newFuncOp) { auto type = funcOp.getType(); SmallVector attributes; filterFuncAttributes(funcOp.getAttrs(), /*filterArgAttrs=*/false, attributes); auto wrapperFuncOp = rewriter.create( loc, llvm::formatv("_mlir_ciface_{0}", funcOp.getName()).str(), typeConverter.convertFunctionTypeCWrapper(type), LLVM::Linkage::External, attributes); OpBuilder::InsertionGuard guard(rewriter); rewriter.setInsertionPointToStart(wrapperFuncOp.addEntryBlock()); SmallVector args; for (auto &en : llvm::enumerate(type.getInputs())) { Value arg = wrapperFuncOp.getArgument(en.index()); if (auto memrefType = en.value().dyn_cast()) { Value loaded = rewriter.create(loc, arg); MemRefDescriptor::unpack(rewriter, loc, loaded, memrefType, args); continue; } if (en.value().isa()) { Value loaded = rewriter.create(loc, arg); UnrankedMemRefDescriptor::unpack(rewriter, loc, loaded, args); continue; } args.push_back(wrapperFuncOp.getArgument(en.index())); } auto call = rewriter.create(loc, newFuncOp, args); rewriter.create(loc, call.getResults()); } /// Creates an auxiliary function with pointer-to-memref-descriptor-struct /// arguments instead of unpacked arguments. Creates a body for the (external) /// `newFuncOp` that allocates a memref descriptor on stack, packs the /// individual arguments into this descriptor and passes a pointer to it into /// the auxiliary function. This auxiliary external function is now compatible /// with functions defined in C using pointers to C structs corresponding to a /// memref descriptor. static void wrapExternalFunction(OpBuilder &builder, Location loc, LLVMTypeConverter &typeConverter, FuncOp funcOp, LLVM::LLVMFuncOp newFuncOp) { OpBuilder::InsertionGuard guard(builder); LLVM::LLVMType wrapperType = typeConverter.convertFunctionTypeCWrapper(funcOp.getType()); // This conversion can only fail if it could not convert one of the argument // types. But since it has been applies to a non-wrapper function before, it // should have failed earlier and not reach this point at all. assert(wrapperType && "unexpected type conversion failure"); SmallVector attributes; filterFuncAttributes(funcOp.getAttrs(), /*filterArgAttrs=*/false, attributes); // Create the auxiliary function. auto wrapperFunc = builder.create( loc, llvm::formatv("_mlir_ciface_{0}", funcOp.getName()).str(), wrapperType, LLVM::Linkage::External, attributes); builder.setInsertionPointToStart(newFuncOp.addEntryBlock()); // Get a ValueRange containing arguments. FunctionType type = funcOp.getType(); SmallVector args; args.reserve(type.getNumInputs()); ValueRange wrapperArgsRange(newFuncOp.getArguments()); // Iterate over the inputs of the original function and pack values into // memref descriptors if the original type is a memref. for (auto &en : llvm::enumerate(type.getInputs())) { Value arg; int numToDrop = 1; auto memRefType = en.value().dyn_cast(); auto unrankedMemRefType = en.value().dyn_cast(); if (memRefType || unrankedMemRefType) { numToDrop = memRefType ? MemRefDescriptor::getNumUnpackedValues(memRefType) : UnrankedMemRefDescriptor::getNumUnpackedValues(); Value packed = memRefType ? MemRefDescriptor::pack(builder, loc, typeConverter, memRefType, wrapperArgsRange.take_front(numToDrop)) : UnrankedMemRefDescriptor::pack( builder, loc, typeConverter, unrankedMemRefType, wrapperArgsRange.take_front(numToDrop)); auto ptrTy = packed.getType().cast().getPointerTo(); Value one = builder.create( loc, typeConverter.convertType(builder.getIndexType()), builder.getIntegerAttr(builder.getIndexType(), 1)); Value allocated = builder.create(loc, ptrTy, one, /*alignment=*/0); builder.create(loc, packed, allocated); arg = allocated; } else { arg = wrapperArgsRange[0]; } args.push_back(arg); wrapperArgsRange = wrapperArgsRange.drop_front(numToDrop); } assert(wrapperArgsRange.empty() && "did not map some of the arguments"); auto call = builder.create(loc, wrapperFunc, args); builder.create(loc, call.getResults()); } struct FuncOpConversionBase : public LLVMLegalizationPattern { protected: using LLVMLegalizationPattern::LLVMLegalizationPattern; using UnsignedTypePair = std::pair; // Gather the positions and types of memref-typed arguments in a given // FunctionType. void getMemRefArgIndicesAndTypes( FunctionType type, SmallVectorImpl &argsInfo) const { argsInfo.reserve(type.getNumInputs()); for (auto en : llvm::enumerate(type.getInputs())) { if (en.value().isa() || en.value().isa()) argsInfo.push_back({en.index(), en.value()}); } } // Convert input FuncOp to LLVMFuncOp by using the LLVMTypeConverter provided // to this legalization pattern. LLVM::LLVMFuncOp convertFuncOpToLLVMFuncOp(FuncOp funcOp, ConversionPatternRewriter &rewriter) const { // Convert the original function arguments. They are converted using the // LLVMTypeConverter provided to this legalization pattern. auto varargsAttr = funcOp.getAttrOfType("std.varargs"); TypeConverter::SignatureConversion result(funcOp.getNumArguments()); auto llvmType = typeConverter.convertFunctionSignature( funcOp.getType(), varargsAttr && varargsAttr.getValue(), result); // Propagate argument attributes to all converted arguments obtained after // converting a given original argument. SmallVector attributes; filterFuncAttributes(funcOp.getAttrs(), /*filterArgAttrs=*/true, attributes); for (unsigned i = 0, e = funcOp.getNumArguments(); i < e; ++i) { auto attr = impl::getArgAttrDict(funcOp, i); if (!attr) continue; auto mapping = result.getInputMapping(i); assert(mapping.hasValue() && "unexpected deletion of function argument"); SmallString<8> name; for (size_t j = 0; j < mapping->size; ++j) { impl::getArgAttrName(mapping->inputNo + j, name); attributes.push_back(rewriter.getNamedAttr(name, attr)); } } // Create an LLVM function, use external linkage by default until MLIR // functions have linkage. auto newFuncOp = rewriter.create( funcOp.getLoc(), funcOp.getName(), llvmType, LLVM::Linkage::External, attributes); rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(), newFuncOp.end()); // Tell the rewriter to convert the region signature. rewriter.applySignatureConversion(&newFuncOp.getBody(), result); return newFuncOp; } }; /// FuncOp legalization pattern that converts MemRef arguments to pointers to /// MemRef descriptors (LLVM struct data types) containing all the MemRef type /// information. struct FuncOpConversion : public FuncOpConversionBase { FuncOpConversion(LLVM::LLVMDialect &dialect, LLVMTypeConverter &converter, bool emitCWrappers) : FuncOpConversionBase(dialect, converter), emitWrappers(emitCWrappers) {} PatternMatchResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { auto funcOp = cast(op); auto newFuncOp = convertFuncOpToLLVMFuncOp(funcOp, rewriter); if (emitWrappers) { if (newFuncOp.isExternal()) wrapExternalFunction(rewriter, op->getLoc(), typeConverter, funcOp, newFuncOp); else wrapForExternalCallers(rewriter, op->getLoc(), typeConverter, funcOp, newFuncOp); } rewriter.eraseOp(op); return matchSuccess(); } private: /// If true, also create the adaptor functions having signatures compatible /// with those produced by clang. const bool emitWrappers; }; /// FuncOp legalization pattern that converts MemRef arguments to bare pointers /// to the MemRef element type. This will impact the calling convention and ABI. struct BarePtrFuncOpConversion : public FuncOpConversionBase { using FuncOpConversionBase::FuncOpConversionBase; PatternMatchResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { auto funcOp = cast(op); // Store the positions and type of memref-typed arguments so that we can // promote them to MemRef descriptor structs at the beginning of the // function. SmallVector promotedArgsInfo; getMemRefArgIndicesAndTypes(funcOp.getType(), promotedArgsInfo); auto newFuncOp = convertFuncOpToLLVMFuncOp(funcOp, rewriter); if (newFuncOp.getBody().empty()) { rewriter.eraseOp(op); return matchSuccess(); } // Promote bare pointers from MemRef arguments to a MemRef descriptor struct // at the beginning of the function so that all the MemRefs in the function // have a uniform representation. Block *firstBlock = &newFuncOp.getBody().front(); rewriter.setInsertionPoint(firstBlock, firstBlock->begin()); auto funcLoc = funcOp.getLoc(); for (const auto &argInfo : promotedArgsInfo) { // TODO: Add support for unranked MemRefs. if (auto memrefType = argInfo.second.dyn_cast()) { // Replace argument with a placeholder (undef), promote argument to a // MemRef descriptor and replace placeholder with the last instruction // of the MemRef descriptor. The placeholder is needed to avoid // replacing argument uses in the MemRef descriptor instructions. BlockArgument arg = firstBlock->getArgument(argInfo.first); Value placeHolder = rewriter.create(funcLoc, arg.getType()); rewriter.replaceUsesOfBlockArgument(arg, placeHolder); auto desc = MemRefDescriptor::fromStaticShape( rewriter, funcLoc, typeConverter, memrefType, arg); rewriter.replaceOp(placeHolder.getDefiningOp(), {desc}); } } rewriter.eraseOp(op); return matchSuccess(); } }; //////////////// Support for Lowering operations on n-D vectors //////////////// namespace { // Helper struct to "unroll" operations on n-D vectors in terms of operations on // 1-D LLVM vectors. struct NDVectorTypeInfo { // LLVM array struct which encodes n-D vectors. LLVM::LLVMType llvmArrayTy; // LLVM vector type which encodes the inner 1-D vector type. LLVM::LLVMType llvmVectorTy; // Multiplicity of llvmArrayTy to llvmVectorTy. SmallVector arraySizes; }; } // namespace // For >1-D vector types, extracts the necessary information to iterate over all // 1-D subvectors in the underlying llrepresentation of the n-D vector // Iterates on the llvm array type until we hit a non-array type (which is // asserted to be an llvm vector type). static NDVectorTypeInfo extractNDVectorTypeInfo(VectorType vectorType, LLVMTypeConverter &converter) { assert(vectorType.getRank() > 1 && "expected >1D vector type"); NDVectorTypeInfo info; info.llvmArrayTy = converter.convertType(vectorType).dyn_cast(); if (!info.llvmArrayTy) return info; info.arraySizes.reserve(vectorType.getRank() - 1); auto llvmTy = info.llvmArrayTy; while (llvmTy.isArrayTy()) { info.arraySizes.push_back(llvmTy.getArrayNumElements()); llvmTy = llvmTy.getArrayElementType(); } if (!llvmTy.isVectorTy()) return info; info.llvmVectorTy = llvmTy; return info; } // Express `linearIndex` in terms of coordinates of `basis`. // Returns the empty vector when linearIndex is out of the range [0, P] where // P is the product of all the basis coordinates. // // Prerequisites: // Basis is an array of nonnegative integers (signed type inherited from // vector shape type). static SmallVector getCoordinates(ArrayRef basis, unsigned linearIndex) { SmallVector res; res.reserve(basis.size()); for (unsigned basisElement : llvm::reverse(basis)) { res.push_back(linearIndex % basisElement); linearIndex = linearIndex / basisElement; } if (linearIndex > 0) return {}; std::reverse(res.begin(), res.end()); return res; } // Iterate of linear index, convert to coords space and insert splatted 1-D // vector in each position. template void nDVectorIterate(const NDVectorTypeInfo &info, OpBuilder &builder, Lambda fun) { unsigned ub = 1; for (auto s : info.arraySizes) ub *= s; for (unsigned linearIndex = 0; linearIndex < ub; ++linearIndex) { auto coords = getCoordinates(info.arraySizes, linearIndex); // Linear index is out of bounds, we are done. if (coords.empty()) break; assert(coords.size() == info.arraySizes.size()); auto position = builder.getI64ArrayAttr(coords); fun(position); } } ////////////// End Support for Lowering operations on n-D vectors ////////////// // Basic lowering implementation for one-to-one rewriting from Standard Ops to // LLVM Dialect Ops. template struct OneToOneLLVMOpLowering : public LLVMLegalizationPattern { using LLVMLegalizationPattern::LLVMLegalizationPattern; using Super = OneToOneLLVMOpLowering; // Convert the type of the result to an LLVM type, pass operands as is, // preserve attributes. PatternMatchResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { unsigned numResults = op->getNumResults(); Type packedType; if (numResults != 0) { packedType = this->typeConverter.packFunctionResults(op->getResultTypes()); if (!packedType) return this->matchFailure(); } auto newOp = rewriter.create(op->getLoc(), packedType, operands, op->getAttrs()); // If the operation produced 0 or 1 result, return them immediately. if (numResults == 0) return rewriter.eraseOp(op), this->matchSuccess(); if (numResults == 1) return rewriter.replaceOp(op, newOp.getOperation()->getResult(0)), this->matchSuccess(); // Otherwise, it had been converted to an operation producing a structure. // Extract individual results from the structure and return them as list. SmallVector results; results.reserve(numResults); for (unsigned i = 0; i < numResults; ++i) { auto type = this->typeConverter.convertType(op->getResult(i).getType()); results.push_back(rewriter.create( op->getLoc(), type, newOp.getOperation()->getResult(0), rewriter.getI64ArrayAttr(i))); } rewriter.replaceOp(op, results); return this->matchSuccess(); } }; -template struct OpCountValidator { +template +struct OpCountValidator { static_assert( std::is_base_of< typename OpTrait::NOperands::template Impl, SourceOp>::value, "wrong operand count"); }; -template struct OpCountValidator { +template +struct OpCountValidator { static_assert(std::is_base_of, SourceOp>::value, "expected a single operand"); }; -template void ValidateOpCount() { +template +void ValidateOpCount() { OpCountValidator(); } // Basic lowering implementation for rewriting from Standard Ops to LLVM Dialect // Ops for N-ary ops with one result. This supports higher-dimensional vector // types. template struct NaryOpLLVMOpLowering : public LLVMLegalizationPattern { using LLVMLegalizationPattern::LLVMLegalizationPattern; using Super = NaryOpLLVMOpLowering; // Convert the type of the result to an LLVM type, pass operands as is, // preserve attributes. PatternMatchResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { ValidateOpCount(); static_assert( std::is_base_of, SourceOp>::value, "expected single result op"); static_assert(std::is_base_of, SourceOp>::value, "expected same operands and result type"); // Cannot convert ops if their operands are not of LLVM type. for (Value operand : operands) { if (!operand || !operand.getType().isa()) return this->matchFailure(); } auto loc = op->getLoc(); auto llvmArrayTy = operands[0].getType().cast(); if (!llvmArrayTy.isArrayTy()) { auto newOp = rewriter.create( op->getLoc(), operands[0].getType(), operands, op->getAttrs()); rewriter.replaceOp(op, newOp.getResult()); return this->matchSuccess(); } auto vectorType = op->getResult(0).getType().dyn_cast(); if (!vectorType) return this->matchFailure(); auto vectorTypeInfo = extractNDVectorTypeInfo(vectorType, this->typeConverter); auto llvmVectorTy = vectorTypeInfo.llvmVectorTy; if (!llvmVectorTy || llvmArrayTy != vectorTypeInfo.llvmArrayTy) return this->matchFailure(); Value desc = rewriter.create(loc, llvmArrayTy); nDVectorIterate(vectorTypeInfo, rewriter, [&](ArrayAttr position) { // For this unrolled `position` corresponding to the `linearIndex`^th // element, extract operand vectors SmallVector extractedOperands; for (unsigned i = 0; i < OpCount; ++i) { extractedOperands.push_back(rewriter.create( loc, llvmVectorTy, operands[i], position)); } Value newVal = rewriter.create( loc, llvmVectorTy, extractedOperands, op->getAttrs()); desc = rewriter.create(loc, llvmArrayTy, desc, newVal, position); }); rewriter.replaceOp(op, desc); return this->matchSuccess(); } }; template using UnaryOpLLVMOpLowering = NaryOpLLVMOpLowering; template using BinaryOpLLVMOpLowering = NaryOpLLVMOpLowering; // Specific lowerings. // FIXME: this should be tablegen'ed. struct AbsFOpLowering : public UnaryOpLLVMOpLowering { using Super::Super; }; struct CeilFOpLowering : public UnaryOpLLVMOpLowering { using Super::Super; }; struct CosOpLowering : public UnaryOpLLVMOpLowering { using Super::Super; }; struct ExpOpLowering : public UnaryOpLLVMOpLowering { using Super::Super; }; struct LogOpLowering : public UnaryOpLLVMOpLowering { using Super::Super; }; struct Log10OpLowering : public UnaryOpLLVMOpLowering { using Super::Super; }; struct Log2OpLowering : public UnaryOpLLVMOpLowering { using Super::Super; }; struct NegFOpLowering : public UnaryOpLLVMOpLowering { using Super::Super; }; struct AddIOpLowering : public BinaryOpLLVMOpLowering { using Super::Super; }; struct SubIOpLowering : public BinaryOpLLVMOpLowering { using Super::Super; }; struct MulIOpLowering : public BinaryOpLLVMOpLowering { using Super::Super; }; struct SignedDivIOpLowering : public BinaryOpLLVMOpLowering { using Super::Super; }; struct SqrtOpLowering : public UnaryOpLLVMOpLowering { using Super::Super; }; struct UnsignedDivIOpLowering : public BinaryOpLLVMOpLowering { using Super::Super; }; struct SignedRemIOpLowering : public BinaryOpLLVMOpLowering { using Super::Super; }; struct UnsignedRemIOpLowering : public BinaryOpLLVMOpLowering { using Super::Super; }; struct AndOpLowering : public BinaryOpLLVMOpLowering { using Super::Super; }; struct OrOpLowering : public BinaryOpLLVMOpLowering { using Super::Super; }; struct XOrOpLowering : public BinaryOpLLVMOpLowering { using Super::Super; }; struct AddFOpLowering : public BinaryOpLLVMOpLowering { using Super::Super; }; struct SubFOpLowering : public BinaryOpLLVMOpLowering { using Super::Super; }; struct MulFOpLowering : public BinaryOpLLVMOpLowering { using Super::Super; }; struct DivFOpLowering : public BinaryOpLLVMOpLowering { using Super::Super; }; struct RemFOpLowering : public BinaryOpLLVMOpLowering { using Super::Super; }; struct CopySignOpLowering : public BinaryOpLLVMOpLowering { using Super::Super; }; struct SelectOpLowering : public OneToOneLLVMOpLowering { using Super::Super; }; struct ConstLLVMOpLowering : public OneToOneLLVMOpLowering { using Super::Super; }; struct ShiftLeftOpLowering : public OneToOneLLVMOpLowering { using Super::Super; }; struct SignedShiftRightOpLowering : public OneToOneLLVMOpLowering { using Super::Super; }; struct UnsignedShiftRightOpLowering : public OneToOneLLVMOpLowering { using Super::Super; }; // Check if the MemRefType `type` is supported by the lowering. We currently // only support memrefs with identity maps. static bool isSupportedMemRefType(MemRefType type) { return type.getAffineMaps().empty() || llvm::all_of(type.getAffineMaps(), [](AffineMap map) { return map.isIdentity(); }); } // An `alloc` is converted into a definition of a memref descriptor value and // a call to `malloc` to allocate the underlying data buffer. The memref // descriptor is of the LLVM structure type where: // 1. the first element is a pointer to the allocated (typed) data buffer, // 2. the second element is a pointer to the (typed) payload, aligned to the // specified alignment, // 3. the remaining elements serve to store all the sizes and strides of the // memref using LLVM-converted `index` type. // // Alignment is obtained by allocating `alignment - 1` more bytes than requested // and shifting the aligned pointer relative to the allocated memory. If // alignment is unspecified, the two pointers are equal. struct AllocOpLowering : public LLVMLegalizationPattern { using LLVMLegalizationPattern::LLVMLegalizationPattern; AllocOpLowering(LLVM::LLVMDialect &dialect_, LLVMTypeConverter &converter, bool useAlloca = false) : LLVMLegalizationPattern(dialect_, converter), useAlloca(useAlloca) {} PatternMatchResult match(Operation *op) const override { MemRefType type = cast(op).getType(); if (isSupportedMemRefType(type)) return matchSuccess(); int64_t offset; SmallVector strides; auto successStrides = getStridesAndOffset(type, strides, offset); if (failed(successStrides)) return matchFailure(); // Dynamic strides are ok if they can be deduced from dynamic sizes (which // is guaranteed when succeeded(successStrides)). Dynamic offset however can // never be alloc'ed. if (offset == MemRefType::getDynamicStrideOrOffset()) return matchFailure(); return matchSuccess(); } void rewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { auto loc = op->getLoc(); auto allocOp = cast(op); MemRefType type = allocOp.getType(); // Get actual sizes of the memref as values: static sizes are constant // values and dynamic sizes are passed to 'alloc' as operands. In case of // zero-dimensional memref, assume a scalar (size 1). SmallVector sizes; sizes.reserve(type.getRank()); unsigned i = 0; for (int64_t s : type.getShape()) sizes.push_back(s == -1 ? operands[i++] : createIndexConstant(rewriter, loc, s)); if (sizes.empty()) sizes.push_back(createIndexConstant(rewriter, loc, 1)); // Compute the total number of memref elements. Value cumulativeSize = sizes.front(); for (unsigned i = 1, e = sizes.size(); i < e; ++i) cumulativeSize = rewriter.create( loc, getIndexType(), ArrayRef{cumulativeSize, sizes[i]}); // Compute the size of an individual element. This emits the MLIR equivalent // of the following sizeof(...) implementation in LLVM IR: // %0 = getelementptr %elementType* null, %indexType 1 // %1 = ptrtoint %elementType* %0 to %indexType // which is a common pattern of getting the size of a type in bytes. auto elementType = type.getElementType(); auto convertedPtrType = typeConverter.convertType(elementType) .cast() .getPointerTo(); auto nullPtr = rewriter.create(loc, convertedPtrType); auto one = createIndexConstant(rewriter, loc, 1); auto gep = rewriter.create(loc, convertedPtrType, ArrayRef{nullPtr, one}); auto elementSize = rewriter.create(loc, getIndexType(), gep); cumulativeSize = rewriter.create( loc, getIndexType(), ArrayRef{cumulativeSize, elementSize}); // Allocate the underlying buffer and store a pointer to it in the MemRef // descriptor. Value allocated = nullptr; int alignment = 0; Value alignmentValue = nullptr; if (auto alignAttr = allocOp.alignment()) alignment = alignAttr.getValue().getSExtValue(); if (useAlloca) { allocated = rewriter.create(loc, getVoidPtrType(), cumulativeSize, alignment); } else { // Insert the `malloc` declaration if it is not already present. auto module = op->getParentOfType(); auto mallocFunc = module.lookupSymbol("malloc"); if (!mallocFunc) { OpBuilder moduleBuilder( op->getParentOfType().getBodyRegion()); mallocFunc = moduleBuilder.create( rewriter.getUnknownLoc(), "malloc", LLVM::LLVMType::getFunctionTy(getVoidPtrType(), getIndexType(), /*isVarArg=*/false)); } if (alignment != 0) { alignmentValue = createIndexConstant(rewriter, loc, alignment); cumulativeSize = rewriter.create( loc, rewriter.create(loc, cumulativeSize, alignmentValue), one); } allocated = rewriter .create( loc, getVoidPtrType(), rewriter.getSymbolRefAttr(mallocFunc), cumulativeSize) .getResult(0); } auto structElementType = typeConverter.convertType(elementType); auto elementPtrType = structElementType.cast().getPointerTo( type.getMemorySpace()); Value bitcastAllocated = rewriter.create( loc, elementPtrType, ArrayRef(allocated)); int64_t offset; SmallVector strides; auto successStrides = getStridesAndOffset(type, strides, offset); assert(succeeded(successStrides) && "unexpected non-strided memref"); (void)successStrides; assert(offset != MemRefType::getDynamicStrideOrOffset() && "unexpected dynamic offset"); // 0-D memref corner case: they have size 1 ... assert(((type.getRank() == 0 && strides.empty() && sizes.size() == 1) || (strides.size() == sizes.size())) && "unexpected number of strides"); // Create the MemRef descriptor. auto structType = typeConverter.convertType(type); auto memRefDescriptor = MemRefDescriptor::undef(rewriter, loc, structType); // Field 1: Allocated pointer, used for malloc/free. memRefDescriptor.setAllocatedPtr(rewriter, loc, bitcastAllocated); // Field 2: Actual aligned pointer to payload. Value bitcastAligned = bitcastAllocated; if (!useAlloca && alignment != 0) { assert(alignmentValue); // offset = (align - (ptr % align))% align Value intVal = rewriter.create( loc, this->getIndexType(), allocated); Value ptrModAlign = rewriter.create(loc, intVal, alignmentValue); Value subbed = rewriter.create(loc, alignmentValue, ptrModAlign); Value offset = rewriter.create(loc, subbed, alignmentValue); Value aligned = rewriter.create(loc, allocated.getType(), allocated, offset); bitcastAligned = rewriter.create( loc, elementPtrType, ArrayRef(aligned)); } memRefDescriptor.setAlignedPtr(rewriter, loc, bitcastAligned); // Field 3: Offset in aligned pointer. memRefDescriptor.setOffset(rewriter, loc, createIndexConstant(rewriter, loc, offset)); if (type.getRank() == 0) // No size/stride descriptor in memref, return the descriptor value. return rewriter.replaceOp(op, {memRefDescriptor}); // Fields 4 and 5: Sizes and strides of the strided MemRef. // Store all sizes in the descriptor. Only dynamic sizes are passed in as // operands to AllocOp. Value runningStride = nullptr; // Iterate strides in reverse order, compute runningStride and strideValues. auto nStrides = strides.size(); SmallVector strideValues(nStrides, nullptr); for (unsigned i = 0; i < nStrides; ++i) { int64_t index = nStrides - 1 - i; if (strides[index] == MemRefType::getDynamicStrideOrOffset()) // Identity layout map is enforced in the match function, so we compute: // `runningStride *= sizes[index + 1]` - runningStride = - runningStride - ? rewriter.create(loc, runningStride, - sizes[index + 1]) - : createIndexConstant(rewriter, loc, 1); + runningStride = runningStride + ? rewriter.create(loc, runningStride, + sizes[index + 1]) + : createIndexConstant(rewriter, loc, 1); else runningStride = createIndexConstant(rewriter, loc, strides[index]); strideValues[index] = runningStride; } // Fill size and stride descriptors in memref. for (auto indexedSize : llvm::enumerate(sizes)) { int64_t index = indexedSize.index(); memRefDescriptor.setSize(rewriter, loc, index, indexedSize.value()); memRefDescriptor.setStride(rewriter, loc, index, strideValues[index]); } // Return the final value of the descriptor. rewriter.replaceOp(op, {memRefDescriptor}); } bool useAlloca; }; // A CallOp automatically promotes MemRefType to a sequence of alloca/store and // passes the pointer to the MemRef across function boundaries. template struct CallOpInterfaceLowering : public LLVMLegalizationPattern { using LLVMLegalizationPattern::LLVMLegalizationPattern; using Super = CallOpInterfaceLowering; using Base = LLVMLegalizationPattern; PatternMatchResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { OperandAdaptor transformed(operands); auto callOp = cast(op); // Pack the result types into a struct. Type packedResult; unsigned numResults = callOp.getNumResults(); auto resultTypes = llvm::to_vector<4>(callOp.getResultTypes()); for (Type resType : resultTypes) { assert(!resType.isa() && "Returning unranked memref is not supported. Pass result as an" "argument instead."); (void)resType; } if (numResults != 0) { if (!(packedResult = this->typeConverter.packFunctionResults(resultTypes))) return this->matchFailure(); } auto promoted = this->typeConverter.promoteMemRefDescriptors( op->getLoc(), /*opOperands=*/op->getOperands(), operands, rewriter); auto newOp = rewriter.create(op->getLoc(), packedResult, promoted, op->getAttrs()); // If < 2 results, packing did not do anything and we can just return. if (numResults < 2) { rewriter.replaceOp(op, newOp.getResults()); return this->matchSuccess(); } // Otherwise, it had been converted to an operation producing a structure. // Extract individual results from the structure and return them as list. // TODO(aminim, ntv, riverriddle, zinenko): this seems like patching around // a particular interaction between MemRefType and CallOp lowering. Find a // way to avoid special casing. SmallVector results; results.reserve(numResults); for (unsigned i = 0; i < numResults; ++i) { auto type = this->typeConverter.convertType(op->getResult(i).getType()); results.push_back(rewriter.create( op->getLoc(), type, newOp.getOperation()->getResult(0), rewriter.getI64ArrayAttr(i))); } rewriter.replaceOp(op, results); return this->matchSuccess(); } }; struct CallOpLowering : public CallOpInterfaceLowering { using Super::Super; }; struct CallIndirectOpLowering : public CallOpInterfaceLowering { using Super::Super; }; // A `dealloc` is converted into a call to `free` on the underlying data buffer. // The memref descriptor being an SSA value, there is no need to clean it up // in any way. struct DeallocOpLowering : public LLVMLegalizationPattern { using LLVMLegalizationPattern::LLVMLegalizationPattern; DeallocOpLowering(LLVM::LLVMDialect &dialect_, LLVMTypeConverter &converter, bool useAlloca = false) : LLVMLegalizationPattern(dialect_, converter), useAlloca(useAlloca) {} PatternMatchResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { if (useAlloca) return rewriter.eraseOp(op), matchSuccess(); assert(operands.size() == 1 && "dealloc takes one operand"); OperandAdaptor transformed(operands); // Insert the `free` declaration if it is not already present. auto freeFunc = op->getParentOfType().lookupSymbol("free"); if (!freeFunc) { OpBuilder moduleBuilder(op->getParentOfType().getBodyRegion()); freeFunc = moduleBuilder.create( rewriter.getUnknownLoc(), "free", LLVM::LLVMType::getFunctionTy(getVoidType(), getVoidPtrType(), /*isVarArg=*/false)); } MemRefDescriptor memref(transformed.memref()); Value casted = rewriter.create( op->getLoc(), getVoidPtrType(), memref.allocatedPtr(rewriter, op->getLoc())); rewriter.replaceOpWithNewOp( op, ArrayRef(), rewriter.getSymbolRefAttr(freeFunc), casted); return matchSuccess(); } bool useAlloca; }; // A `tanh` is converted into a call to the `tanh` function. struct TanhOpLowering : public LLVMLegalizationPattern { using LLVMLegalizationPattern::LLVMLegalizationPattern; PatternMatchResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { using LLVMFuncOpT = LLVM::LLVMFuncOp; using LLVMTypeT = LLVM::LLVMType; OperandAdaptor transformed(operands); LLVMTypeT operandType = transformed.operand().getType().dyn_cast_or_null(); if (!operandType) return matchFailure(); std::string functionName; if (operandType.isFloatTy()) functionName = "tanhf"; else if (operandType.isDoubleTy()) functionName = "tanh"; else return matchFailure(); // Get a reference to the tanh function, inserting it if necessary. Operation *tanhFunc = SymbolTable::lookupNearestSymbolFrom(op, functionName); LLVMFuncOpT tanhLLVMFunc; if (tanhFunc) { tanhLLVMFunc = cast(tanhFunc); } else { PatternRewriter::InsertionGuard insertGuard(rewriter); auto module = op->getParentOfType(); rewriter.setInsertionPointToStart(module.getBody()); tanhLLVMFunc = rewriter.create( module.getLoc(), functionName, LLVMTypeT::getFunctionTy(operandType, operandType, /*isVarArg=*/false)); } rewriter.replaceOpWithNewOp( op, operandType, rewriter.getSymbolRefAttr(tanhLLVMFunc), transformed.operand()); return matchSuccess(); } }; struct MemRefCastOpLowering : public LLVMLegalizationPattern { using LLVMLegalizationPattern::LLVMLegalizationPattern; PatternMatchResult match(Operation *op) const override { auto memRefCastOp = cast(op); Type srcType = memRefCastOp.getOperand().getType(); Type dstType = memRefCastOp.getType(); if (srcType.isa() && dstType.isa()) { MemRefType sourceType = memRefCastOp.getOperand().getType().cast(); MemRefType targetType = memRefCastOp.getType().cast(); return (isSupportedMemRefType(targetType) && isSupportedMemRefType(sourceType)) ? matchSuccess() : matchFailure(); } // At least one of the operands is unranked type assert(srcType.isa() || dstType.isa()); // Unranked to unranked cast is disallowed return !(srcType.isa() && dstType.isa()) ? matchSuccess() : matchFailure(); } void rewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { auto memRefCastOp = cast(op); OperandAdaptor transformed(operands); auto srcType = memRefCastOp.getOperand().getType(); auto dstType = memRefCastOp.getType(); auto targetStructType = typeConverter.convertType(memRefCastOp.getType()); auto loc = op->getLoc(); if (srcType.isa() && dstType.isa()) { // memref_cast is defined for source and destination memref types with the // same element type, same mappings, same address space and same rank. // Therefore a simple bitcast suffices. If not it is undefined behavior. rewriter.replaceOpWithNewOp(op, targetStructType, transformed.source()); } else if (srcType.isa() && dstType.isa()) { // Casting ranked to unranked memref type // Set the rank in the destination from the memref type // Allocate space on the stack and copy the src memref descriptor // Set the ptr in the destination to the stack space auto srcMemRefType = srcType.cast(); int64_t rank = srcMemRefType.getRank(); // ptr = AllocaOp sizeof(MemRefDescriptor) auto ptr = typeConverter.promoteOneMemRefDescriptor( loc, transformed.source(), rewriter); // voidptr = BitCastOp srcType* to void* auto voidPtr = rewriter.create(loc, getVoidPtrType(), ptr) .getResult(); // rank = ConstantOp srcRank auto rankVal = rewriter.create( loc, typeConverter.convertType(rewriter.getIntegerType(64)), rewriter.getI64IntegerAttr(rank)); // undef = UndefOp UnrankedMemRefDescriptor memRefDesc = UnrankedMemRefDescriptor::undef(rewriter, loc, targetStructType); // d1 = InsertValueOp undef, rank, 0 memRefDesc.setRank(rewriter, loc, rankVal); // d2 = InsertValueOp d1, voidptr, 1 memRefDesc.setMemRefDescPtr(rewriter, loc, voidPtr); rewriter.replaceOp(op, (Value)memRefDesc); } else if (srcType.isa() && dstType.isa()) { // Casting from unranked type to ranked. // The operation is assumed to be doing a correct cast. If the destination // type mismatches the unranked the type, it is undefined behavior. UnrankedMemRefDescriptor memRefDesc(transformed.source()); // ptr = ExtractValueOp src, 1 auto ptr = memRefDesc.memRefDescPtr(rewriter, loc); // castPtr = BitCastOp i8* to structTy* auto castPtr = rewriter .create( loc, targetStructType.cast().getPointerTo(), ptr) .getResult(); // struct = LoadOp castPtr auto loadOp = rewriter.create(loc, castPtr); rewriter.replaceOp(op, loadOp.getResult()); } else { llvm_unreachable("Unsuppored unranked memref to unranked memref cast"); } } }; // A `dim` is converted to a constant for static sizes and to an access to the // size stored in the memref descriptor for dynamic sizes. struct DimOpLowering : public LLVMLegalizationPattern { using LLVMLegalizationPattern::LLVMLegalizationPattern; PatternMatchResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { auto dimOp = cast(op); OperandAdaptor transformed(operands); MemRefType type = dimOp.getOperand().getType().cast(); auto shape = type.getShape(); int64_t index = dimOp.getIndex(); // Extract dynamic size from the memref descriptor. if (ShapedType::isDynamic(shape[index])) rewriter.replaceOp(op, {MemRefDescriptor(transformed.memrefOrTensor()) .size(rewriter, op->getLoc(), index)}); else // Use constant for static size. rewriter.replaceOp( op, createIndexConstant(rewriter, op->getLoc(), shape[index])); return matchSuccess(); } }; // Common base for load and store operations on MemRefs. Restricts the match // to supported MemRef types. Provides functionality to emit code accessing a // specific element of the underlying data buffer. template struct LoadStoreOpLowering : public LLVMLegalizationPattern { using LLVMLegalizationPattern::LLVMLegalizationPattern; using Base = LoadStoreOpLowering; PatternMatchResult match(Operation *op) const override { MemRefType type = cast(op).getMemRefType(); return isSupportedMemRefType(type) ? this->matchSuccess() : this->matchFailure(); } // Given subscript indices and array sizes in row-major order, // i_n, i_{n-1}, ..., i_1 // s_n, s_{n-1}, ..., s_1 // obtain a value that corresponds to the linearized subscript // \sum_k i_k * \prod_{j=1}^{k-1} s_j // by accumulating the running linearized value. // Note that `indices` and `allocSizes` are passed in the same order as they // appear in load/store operations and memref type declarations. Value linearizeSubscripts(ConversionPatternRewriter &builder, Location loc, ArrayRef indices, ArrayRef allocSizes) const { assert(indices.size() == allocSizes.size() && "mismatching number of indices and allocation sizes"); assert(!indices.empty() && "cannot linearize a 0-dimensional access"); Value linearized = indices.front(); for (int i = 1, nSizes = allocSizes.size(); i < nSizes; ++i) { linearized = builder.create( loc, this->getIndexType(), ArrayRef{linearized, allocSizes[i]}); linearized = builder.create( loc, this->getIndexType(), ArrayRef{linearized, indices[i]}); } return linearized; } // This is a strided getElementPtr variant that linearizes subscripts as: // `base_offset + index_0 * stride_0 + ... + index_n * stride_n`. Value getStridedElementPtr(Location loc, Type elementTypePtr, Value descriptor, ArrayRef indices, ArrayRef strides, int64_t offset, ConversionPatternRewriter &rewriter) const { MemRefDescriptor memRefDescriptor(descriptor); Value base = memRefDescriptor.alignedPtr(rewriter, loc); Value offsetValue = offset == MemRefType::getDynamicStrideOrOffset() ? memRefDescriptor.offset(rewriter, loc) : this->createIndexConstant(rewriter, loc, offset); for (int i = 0, e = indices.size(); i < e; ++i) { Value stride = strides[i] == MemRefType::getDynamicStrideOrOffset() ? memRefDescriptor.stride(rewriter, loc, i) : this->createIndexConstant(rewriter, loc, strides[i]); Value additionalOffset = rewriter.create(loc, indices[i], stride); offsetValue = rewriter.create(loc, offsetValue, additionalOffset); } return rewriter.create(loc, elementTypePtr, base, offsetValue); } Value getDataPtr(Location loc, MemRefType type, Value memRefDesc, ArrayRef indices, ConversionPatternRewriter &rewriter, llvm::Module &module) const { LLVM::LLVMType ptrType = MemRefDescriptor(memRefDesc).getElementType(); int64_t offset; SmallVector strides; auto successStrides = getStridesAndOffset(type, strides, offset); assert(succeeded(successStrides) && "unexpected non-strided memref"); (void)successStrides; return getStridedElementPtr(loc, ptrType, memRefDesc, indices, strides, offset, rewriter); } }; // Load operation is lowered to obtaining a pointer to the indexed element // and loading it. struct LoadOpLowering : public LoadStoreOpLowering { using Base::Base; PatternMatchResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { auto loadOp = cast(op); OperandAdaptor transformed(operands); auto type = loadOp.getMemRefType(); Value dataPtr = getDataPtr(op->getLoc(), type, transformed.memref(), transformed.indices(), rewriter, getModule()); rewriter.replaceOpWithNewOp(op, dataPtr); return matchSuccess(); } }; // Store operation is lowered to obtaining a pointer to the indexed element, // and storing the given value to it. struct StoreOpLowering : public LoadStoreOpLowering { using Base::Base; PatternMatchResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { auto type = cast(op).getMemRefType(); OperandAdaptor transformed(operands); Value dataPtr = getDataPtr(op->getLoc(), type, transformed.memref(), transformed.indices(), rewriter, getModule()); rewriter.replaceOpWithNewOp(op, transformed.value(), dataPtr); return matchSuccess(); } }; // The prefetch operation is lowered in a way similar to the load operation // except that the llvm.prefetch operation is used for replacement. struct PrefetchOpLowering : public LoadStoreOpLowering { using Base::Base; PatternMatchResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { auto prefetchOp = cast(op); OperandAdaptor transformed(operands); auto type = prefetchOp.getMemRefType(); Value dataPtr = getDataPtr(op->getLoc(), type, transformed.memref(), transformed.indices(), rewriter, getModule()); // Replace with llvm.prefetch. auto llvmI32Type = typeConverter.convertType(rewriter.getIntegerType(32)); auto isWrite = rewriter.create( op->getLoc(), llvmI32Type, rewriter.getI32IntegerAttr(prefetchOp.isWrite())); auto localityHint = rewriter.create( op->getLoc(), llvmI32Type, rewriter.getI32IntegerAttr(prefetchOp.localityHint().getZExtValue())); auto isData = rewriter.create( op->getLoc(), llvmI32Type, rewriter.getI32IntegerAttr(prefetchOp.isDataCache())); rewriter.replaceOpWithNewOp(op, dataPtr, isWrite, localityHint, isData); return matchSuccess(); } }; // The lowering of index_cast becomes an integer conversion since index becomes // an integer. If the bit width of the source and target integer types is the // same, just erase the cast. If the target type is wider, sign-extend the // value, otherwise truncate it. struct IndexCastOpLowering : public LLVMLegalizationPattern { using LLVMLegalizationPattern::LLVMLegalizationPattern; PatternMatchResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { IndexCastOpOperandAdaptor transformed(operands); auto indexCastOp = cast(op); auto targetType = this->typeConverter.convertType(indexCastOp.getResult().getType()) .cast(); auto sourceType = transformed.in().getType().cast(); unsigned targetBits = targetType.getUnderlyingType()->getIntegerBitWidth(); unsigned sourceBits = sourceType.getUnderlyingType()->getIntegerBitWidth(); if (targetBits == sourceBits) rewriter.replaceOp(op, transformed.in()); else if (targetBits < sourceBits) rewriter.replaceOpWithNewOp(op, targetType, transformed.in()); else rewriter.replaceOpWithNewOp(op, targetType, transformed.in()); return matchSuccess(); } }; // Convert std.cmp predicate into the LLVM dialect CmpPredicate. The two // enums share the numerical values so just cast. template static LLVMPredType convertCmpPredicate(StdPredType pred) { return static_cast(pred); } struct CmpIOpLowering : public LLVMLegalizationPattern { using LLVMLegalizationPattern::LLVMLegalizationPattern; PatternMatchResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { auto cmpiOp = cast(op); CmpIOpOperandAdaptor transformed(operands); rewriter.replaceOpWithNewOp( op, typeConverter.convertType(cmpiOp.getResult().getType()), rewriter.getI64IntegerAttr(static_cast( convertCmpPredicate(cmpiOp.getPredicate()))), transformed.lhs(), transformed.rhs()); return matchSuccess(); } }; struct CmpFOpLowering : public LLVMLegalizationPattern { using LLVMLegalizationPattern::LLVMLegalizationPattern; PatternMatchResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { auto cmpfOp = cast(op); CmpFOpOperandAdaptor transformed(operands); rewriter.replaceOpWithNewOp( op, typeConverter.convertType(cmpfOp.getResult().getType()), rewriter.getI64IntegerAttr(static_cast( convertCmpPredicate(cmpfOp.getPredicate()))), transformed.lhs(), transformed.rhs()); return matchSuccess(); } }; struct SIToFPLowering : public OneToOneLLVMOpLowering { using Super::Super; }; struct FPExtLowering : public OneToOneLLVMOpLowering { using Super::Super; }; struct FPTruncLowering : public OneToOneLLVMOpLowering { using Super::Super; }; struct SignExtendIOpLowering : public OneToOneLLVMOpLowering { using Super::Super; }; struct TruncateIOpLowering : public OneToOneLLVMOpLowering { using Super::Super; }; struct ZeroExtendIOpLowering : public OneToOneLLVMOpLowering { using Super::Super; }; // Base class for LLVM IR lowering terminator operations with successors. template struct OneToOneLLVMTerminatorLowering : public LLVMLegalizationPattern { using LLVMLegalizationPattern::LLVMLegalizationPattern; using Super = OneToOneLLVMTerminatorLowering; PatternMatchResult matchAndRewrite(Operation *op, ArrayRef properOperands, ArrayRef destinations, ArrayRef> operands, ConversionPatternRewriter &rewriter) const override { SmallVector operandRanges(operands.begin(), operands.end()); rewriter.replaceOpWithNewOp(op, properOperands, destinations, operandRanges, op->getAttrs()); return this->matchSuccess(); } }; // Special lowering pattern for `ReturnOps`. Unlike all other operations, // `ReturnOp` interacts with the function signature and must have as many // operands as the function has return values. Because in LLVM IR, functions // can only return 0 or 1 value, we pack multiple values into a structure type. // Emit `UndefOp` followed by `InsertValueOp`s to create such structure if // necessary before returning it struct ReturnOpLowering : public LLVMLegalizationPattern { using LLVMLegalizationPattern::LLVMLegalizationPattern; PatternMatchResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { unsigned numArguments = op->getNumOperands(); // If ReturnOp has 0 or 1 operand, create it and return immediately. if (numArguments == 0) { rewriter.replaceOpWithNewOp( op, ArrayRef(), ArrayRef(), op->getAttrs()); return matchSuccess(); } if (numArguments == 1) { rewriter.replaceOpWithNewOp( op, ArrayRef(operands.front()), ArrayRef(), op->getAttrs()); return matchSuccess(); } // Otherwise, we need to pack the arguments into an LLVM struct type before // returning. auto packedType = typeConverter.packFunctionResults( llvm::to_vector<4>(op->getOperandTypes())); Value packed = rewriter.create(op->getLoc(), packedType); for (unsigned i = 0; i < numArguments; ++i) { packed = rewriter.create( op->getLoc(), packedType, packed, operands[i], rewriter.getI64ArrayAttr(i)); } rewriter.replaceOpWithNewOp( op, llvm::makeArrayRef(packed), ArrayRef(), op->getAttrs()); return matchSuccess(); } }; // FIXME: this should be tablegen'ed as well. struct BranchOpLowering : public OneToOneLLVMTerminatorLowering { using Super::Super; }; struct CondBranchOpLowering : public OneToOneLLVMTerminatorLowering { using Super::Super; }; // The Splat operation is lowered to an insertelement + a shufflevector // operation. Splat to only 1-d vector result types are lowered. struct SplatOpLowering : public LLVMLegalizationPattern { using LLVMLegalizationPattern::LLVMLegalizationPattern; PatternMatchResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { auto splatOp = cast(op); VectorType resultType = splatOp.getType().dyn_cast(); if (!resultType || resultType.getRank() != 1) return matchFailure(); // First insert it into an undef vector so we can shuffle it. auto vectorType = typeConverter.convertType(splatOp.getType()); Value undef = rewriter.create(op->getLoc(), vectorType); auto zero = rewriter.create( op->getLoc(), typeConverter.convertType(rewriter.getIntegerType(32)), rewriter.getZeroAttr(rewriter.getIntegerType(32))); auto v = rewriter.create( op->getLoc(), vectorType, undef, splatOp.getOperand(), zero); int64_t width = splatOp.getType().cast().getDimSize(0); SmallVector zeroValues(width, 0); // Shuffle the value across the desired number of elements. ArrayAttr zeroAttrs = rewriter.getI32ArrayAttr(zeroValues); rewriter.replaceOpWithNewOp(op, v, undef, zeroAttrs); return matchSuccess(); } }; // The Splat operation is lowered to an insertelement + a shufflevector // operation. Splat to only 2+-d vector result types are lowered by the // SplatNdOpLowering, the 1-d case is handled by SplatOpLowering. struct SplatNdOpLowering : public LLVMLegalizationPattern { using LLVMLegalizationPattern::LLVMLegalizationPattern; PatternMatchResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { auto splatOp = cast(op); OperandAdaptor adaptor(operands); VectorType resultType = splatOp.getType().dyn_cast(); if (!resultType || resultType.getRank() == 1) return matchFailure(); // First insert it into an undef vector so we can shuffle it. auto loc = op->getLoc(); auto vectorTypeInfo = extractNDVectorTypeInfo(resultType, typeConverter); auto llvmArrayTy = vectorTypeInfo.llvmArrayTy; auto llvmVectorTy = vectorTypeInfo.llvmVectorTy; if (!llvmArrayTy || !llvmVectorTy) return matchFailure(); // Construct returned value. Value desc = rewriter.create(loc, llvmArrayTy); // Construct a 1-D vector with the splatted value that we insert in all the // places within the returned descriptor. Value vdesc = rewriter.create(loc, llvmVectorTy); auto zero = rewriter.create( loc, typeConverter.convertType(rewriter.getIntegerType(32)), rewriter.getZeroAttr(rewriter.getIntegerType(32))); Value v = rewriter.create(loc, llvmVectorTy, vdesc, adaptor.input(), zero); // Shuffle the value across the desired number of elements. int64_t width = resultType.getDimSize(resultType.getRank() - 1); SmallVector zeroValues(width, 0); ArrayAttr zeroAttrs = rewriter.getI32ArrayAttr(zeroValues); v = rewriter.create(loc, v, v, zeroAttrs); // Iterate of linear index, convert to coords space and insert splatted 1-D // vector in each position. nDVectorIterate(vectorTypeInfo, rewriter, [&](ArrayAttr position) { desc = rewriter.create(loc, llvmArrayTy, desc, v, position); }); rewriter.replaceOp(op, desc); return matchSuccess(); } }; /// Conversion pattern that transforms a subview op into: /// 1. An `llvm.mlir.undef` operation to create a memref descriptor /// 2. Updates to the descriptor to introduce the data ptr, offset, size /// and stride. /// The subview op is replaced by the descriptor. struct SubViewOpLowering : public LLVMLegalizationPattern { using LLVMLegalizationPattern::LLVMLegalizationPattern; PatternMatchResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { auto loc = op->getLoc(); auto viewOp = cast(op); // TODO(b/144779634, ravishankarm) : After Tblgen is adapted to support // having multiple variadic operands where each operand can have different // number of entries, clean all of this up. SmallVector dynamicOffsets( std::next(operands.begin()), std::next(operands.begin(), 1 + viewOp.getNumOffsets())); SmallVector dynamicSizes( std::next(operands.begin(), 1 + viewOp.getNumOffsets()), std::next(operands.begin(), 1 + viewOp.getNumOffsets() + viewOp.getNumSizes())); SmallVector dynamicStrides( std::next(operands.begin(), 1 + viewOp.getNumOffsets() + viewOp.getNumSizes()), operands.end()); auto sourceMemRefType = viewOp.source().getType().cast(); auto sourceElementTy = typeConverter.convertType(sourceMemRefType.getElementType()) .dyn_cast_or_null(); auto viewMemRefType = viewOp.getType(); auto targetElementTy = typeConverter.convertType(viewMemRefType.getElementType()) .dyn_cast(); auto targetDescTy = typeConverter.convertType(viewMemRefType) .dyn_cast_or_null(); if (!sourceElementTy || !targetDescTy) return matchFailure(); // Currently, only rank > 0 and full or no operands are supported. Fail to // convert otherwise. unsigned rank = sourceMemRefType.getRank(); if (viewMemRefType.getRank() == 0 || (!dynamicOffsets.empty() && rank != dynamicOffsets.size()) || (!dynamicSizes.empty() && rank != dynamicSizes.size()) || (!dynamicStrides.empty() && rank != dynamicStrides.size())) return matchFailure(); int64_t offset; SmallVector strides; auto successStrides = getStridesAndOffset(viewMemRefType, strides, offset); if (failed(successStrides)) return matchFailure(); // Fail to convert if neither a dynamic nor static offset is available. if (dynamicOffsets.empty() && offset == MemRefType::getDynamicStrideOrOffset()) return matchFailure(); // Create the descriptor. if (!operands.front().getType().isa()) return matchFailure(); MemRefDescriptor sourceMemRef(operands.front()); auto targetMemRef = MemRefDescriptor::undef(rewriter, loc, targetDescTy); // Copy the buffer pointer from the old descriptor to the new one. Value extracted = sourceMemRef.allocatedPtr(rewriter, loc); Value bitcastPtr = rewriter.create( loc, targetElementTy.getPointerTo(), extracted); targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr); extracted = sourceMemRef.alignedPtr(rewriter, loc); bitcastPtr = rewriter.create( loc, targetElementTy.getPointerTo(), extracted); targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr); // Extract strides needed to compute offset. SmallVector strideValues; strideValues.reserve(viewMemRefType.getRank()); for (int i = 0, e = viewMemRefType.getRank(); i < e; ++i) strideValues.push_back(sourceMemRef.stride(rewriter, loc, i)); // Fill in missing dynamic sizes. auto llvmIndexType = typeConverter.convertType(rewriter.getIndexType()); if (dynamicSizes.empty()) { dynamicSizes.reserve(viewMemRefType.getRank()); auto shape = viewMemRefType.getShape(); for (auto extent : shape) { dynamicSizes.push_back(rewriter.create( loc, llvmIndexType, rewriter.getI64IntegerAttr(extent))); } } // Offset. if (dynamicOffsets.empty()) { targetMemRef.setConstantOffset(rewriter, loc, offset); } else { Value baseOffset = sourceMemRef.offset(rewriter, loc); for (int i = 0, e = viewMemRefType.getRank(); i < e; ++i) { Value min = dynamicOffsets[i]; baseOffset = rewriter.create( loc, baseOffset, rewriter.create(loc, min, strideValues[i])); } targetMemRef.setOffset(rewriter, loc, baseOffset); } // Update sizes and strides. for (int i = viewMemRefType.getRank() - 1; i >= 0; --i) { targetMemRef.setSize(rewriter, loc, i, dynamicSizes[i]); Value newStride; if (dynamicStrides.empty()) newStride = rewriter.create( loc, llvmIndexType, rewriter.getI64IntegerAttr(strides[i])); else newStride = rewriter.create(loc, dynamicStrides[i], strideValues[i]); targetMemRef.setStride(rewriter, loc, i, newStride); } rewriter.replaceOp(op, {targetMemRef}); return matchSuccess(); } }; /// Conversion pattern that transforms a op into: /// 1. An `llvm.mlir.undef` operation to create a memref descriptor /// 2. Updates to the descriptor to introduce the data ptr, offset, size /// and stride. /// The view op is replaced by the descriptor. struct ViewOpLowering : public LLVMLegalizationPattern { using LLVMLegalizationPattern::LLVMLegalizationPattern; // Build and return the value for the idx^th shape dimension, either by // returning the constant shape dimension or counting the proper dynamic size. Value getSize(ConversionPatternRewriter &rewriter, Location loc, ArrayRef shape, ArrayRef dynamicSizes, unsigned idx) const { assert(idx < shape.size()); if (!ShapedType::isDynamic(shape[idx])) return createIndexConstant(rewriter, loc, shape[idx]); // Count the number of dynamic dims in range [0, idx] unsigned nDynamic = llvm::count_if(shape.take_front(idx), [](int64_t v) { return ShapedType::isDynamic(v); }); return dynamicSizes[nDynamic]; } // Build and return the idx^th stride, either by returning the constant stride // or by computing the dynamic stride from the current `runningStride` and // `nextSize`. The caller should keep a running stride and update it with the // result returned by this function. Value getStride(ConversionPatternRewriter &rewriter, Location loc, ArrayRef strides, Value nextSize, Value runningStride, unsigned idx) const { assert(idx < strides.size()); if (strides[idx] != MemRefType::getDynamicStrideOrOffset()) return createIndexConstant(rewriter, loc, strides[idx]); if (nextSize) return runningStride ? rewriter.create(loc, runningStride, nextSize) : nextSize; assert(!runningStride); return createIndexConstant(rewriter, loc, 1); } PatternMatchResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { auto loc = op->getLoc(); auto viewOp = cast(op); ViewOpOperandAdaptor adaptor(operands); auto viewMemRefType = viewOp.getType(); auto targetElementTy = typeConverter.convertType(viewMemRefType.getElementType()) .dyn_cast(); auto targetDescTy = typeConverter.convertType(viewMemRefType).dyn_cast(); if (!targetDescTy) return op->emitWarning("Target descriptor type not converted to LLVM"), matchFailure(); int64_t offset; SmallVector strides; auto successStrides = getStridesAndOffset(viewMemRefType, strides, offset); if (failed(successStrides)) return op->emitWarning("cannot cast to non-strided shape"), matchFailure(); // Create the descriptor. MemRefDescriptor sourceMemRef(adaptor.source()); auto targetMemRef = MemRefDescriptor::undef(rewriter, loc, targetDescTy); // Field 1: Copy the allocated pointer, used for malloc/free. Value extracted = sourceMemRef.allocatedPtr(rewriter, loc); Value bitcastPtr = rewriter.create( loc, targetElementTy.getPointerTo(), extracted); targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr); // Field 2: Copy the actual aligned pointer to payload. extracted = sourceMemRef.alignedPtr(rewriter, loc); bitcastPtr = rewriter.create( loc, targetElementTy.getPointerTo(), extracted); targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr); // Field 3: Copy the offset in aligned pointer. unsigned numDynamicSizes = llvm::size(viewOp.getDynamicSizes()); (void)numDynamicSizes; bool hasDynamicOffset = offset == MemRefType::getDynamicStrideOrOffset(); auto sizeAndOffsetOperands = adaptor.operands(); assert(llvm::size(sizeAndOffsetOperands) == numDynamicSizes + (hasDynamicOffset ? 1 : 0)); Value baseOffset = !hasDynamicOffset ? createIndexConstant(rewriter, loc, offset) // TODO(ntv): better adaptor. : sizeAndOffsetOperands.front(); targetMemRef.setOffset(rewriter, loc, baseOffset); // Early exit for 0-D corner case. if (viewMemRefType.getRank() == 0) return rewriter.replaceOp(op, {targetMemRef}), matchSuccess(); // Fields 4 and 5: Update sizes and strides. if (strides.back() != 1) return op->emitWarning("cannot cast to non-contiguous shape"), matchFailure(); Value stride = nullptr, nextSize = nullptr; // Drop the dynamic stride from the operand list, if present. ArrayRef sizeOperands(sizeAndOffsetOperands); if (hasDynamicOffset) sizeOperands = sizeOperands.drop_front(); for (int i = viewMemRefType.getRank() - 1; i >= 0; --i) { // Update size. Value size = getSize(rewriter, loc, viewMemRefType.getShape(), sizeOperands, i); targetMemRef.setSize(rewriter, loc, i, size); // Update stride. stride = getStride(rewriter, loc, strides, nextSize, stride, i); targetMemRef.setStride(rewriter, loc, i, stride); nextSize = size; } rewriter.replaceOp(op, {targetMemRef}); return matchSuccess(); } }; struct AssumeAlignmentOpLowering : public LLVMLegalizationPattern { using LLVMLegalizationPattern::LLVMLegalizationPattern; PatternMatchResult matchAndRewrite(Operation *op, ArrayRef operands, ConversionPatternRewriter &rewriter) const override { OperandAdaptor transformed(operands); Value memref = transformed.memref(); unsigned alignment = cast(op).alignment().getZExtValue(); MemRefDescriptor memRefDescriptor(memref); Value ptr = memRefDescriptor.alignedPtr(rewriter, memref.getLoc()); // Emit llvm.assume(memref.alignedPtr & (alignment - 1) == 0). Notice that // the asserted memref.alignedPtr isn't used anywhere else, as the real // users like load/store/views always re-extract memref.alignedPtr as they // get lowered. // // This relies on LLVM's CSE optimization (potentially after SROA), since // after CSE all memref.alignedPtr instances get de-duplicated into the same // pointer SSA value. Value zero = createIndexAttrConstant(rewriter, op->getLoc(), getIndexType(), 0); Value mask = createIndexAttrConstant(rewriter, op->getLoc(), getIndexType(), alignment - 1); Value ptrValue = rewriter.create(op->getLoc(), getIndexType(), ptr); rewriter.create( op->getLoc(), rewriter.create( op->getLoc(), LLVM::ICmpPredicate::eq, rewriter.create(op->getLoc(), ptrValue, mask), zero)); rewriter.eraseOp(op); return matchSuccess(); } }; } // namespace +/// Try to match the kind of a std.atomic_rmw to determine whether to use a +/// lowering to llvm.atomicrmw or fallback to llvm.cmpxchg. +static Optional matchSimpleAtomicOp(AtomicRMWOp atomicOp) { + switch (atomicOp.kind()) { + case AtomicRMWKind::addf: + return LLVM::AtomicBinOp::fadd; + case AtomicRMWKind::addi: + return LLVM::AtomicBinOp::add; + case AtomicRMWKind::assign: + return LLVM::AtomicBinOp::xchg; + case AtomicRMWKind::maxs: + return LLVM::AtomicBinOp::max; + case AtomicRMWKind::maxu: + return LLVM::AtomicBinOp::umax; + case AtomicRMWKind::mins: + return LLVM::AtomicBinOp::min; + case AtomicRMWKind::minu: + return LLVM::AtomicBinOp::umin; + default: + return llvm::None; + } + llvm_unreachable("Invalid AtomicRMWKind"); +} + +namespace { + +struct AtomicRMWOpLowering : public LoadStoreOpLowering { + using Base::Base; + + PatternMatchResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + auto atomicOp = cast(op); + auto maybeKind = matchSimpleAtomicOp(atomicOp); + if (!maybeKind) + return matchFailure(); + OperandAdaptor adaptor(operands); + auto resultType = adaptor.value().getType(); + auto memRefType = atomicOp.getMemRefType(); + auto dataPtr = getDataPtr(op->getLoc(), memRefType, adaptor.memref(), + adaptor.indices(), rewriter, getModule()); + rewriter.replaceOpWithNewOp( + op, resultType, *maybeKind, dataPtr, adaptor.value(), + LLVM::AtomicOrdering::acq_rel); + return matchSuccess(); + } +}; + +/// Wrap a llvm.cmpxchg operation in a while loop so that the operation can be +/// retried until it succeeds in atomically storing a new value into memory. +/// +/// +---------------------------------+ +/// | | +/// | | +/// | br loop(%loaded) | +/// +---------------------------------+ +/// | +/// -------| | +/// | v v +/// | +--------------------------------+ +/// | | loop(%loaded): | +/// | | | +/// | | %pair = cmpxchg | +/// | | %ok = %pair[0] | +/// | | %new = %pair[1] | +/// | | cond_br %ok, end, loop(%new) | +/// | +--------------------------------+ +/// | | | +/// |----------- | +/// v +/// +--------------------------------+ +/// | end: | +/// | | +/// +--------------------------------+ +/// +struct AtomicCmpXchgOpLowering : public LoadStoreOpLowering { + using Base::Base; + + PatternMatchResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + auto atomicOp = cast(op); + auto maybeKind = matchSimpleAtomicOp(atomicOp); + if (maybeKind) + return matchFailure(); + + LLVM::FCmpPredicate predicate; + switch (atomicOp.kind()) { + case AtomicRMWKind::maxf: + predicate = LLVM::FCmpPredicate::ogt; + break; + case AtomicRMWKind::minf: + predicate = LLVM::FCmpPredicate::olt; + break; + default: + return matchFailure(); + } + + OperandAdaptor adaptor(operands); + auto loc = op->getLoc(); + auto valueType = adaptor.value().getType().cast(); + + // Split the block into initial, loop, and ending parts. + auto *initBlock = rewriter.getInsertionBlock(); + auto initPosition = rewriter.getInsertionPoint(); + auto *loopBlock = rewriter.splitBlock(initBlock, initPosition); + auto loopArgument = loopBlock->addArgument(valueType); + auto loopPosition = rewriter.getInsertionPoint(); + auto *endBlock = rewriter.splitBlock(loopBlock, loopPosition); + + // Compute the loaded value and branch to the loop block. + rewriter.setInsertionPointToEnd(initBlock); + auto memRefType = atomicOp.getMemRefType(); + auto dataPtr = getDataPtr(loc, memRefType, adaptor.memref(), + adaptor.indices(), rewriter, getModule()); + auto init = rewriter.create(loc, dataPtr); + std::array brRegionOperands{init}; + std::array brOperands{brRegionOperands}; + rewriter.create(loc, ArrayRef{}, loopBlock, brOperands); + + // Prepare the body of the loop block. + rewriter.setInsertionPointToStart(loopBlock); + auto predicateI64 = + rewriter.getI64IntegerAttr(static_cast(predicate)); + auto boolType = LLVM::LLVMType::getInt1Ty(&getDialect()); + auto lhs = loopArgument; + auto rhs = adaptor.value(); + auto cmp = + rewriter.create(loc, boolType, predicateI64, lhs, rhs); + auto select = rewriter.create(loc, cmp, lhs, rhs); + + // Prepare the epilog of the loop block. + rewriter.setInsertionPointToEnd(loopBlock); + // Append the cmpxchg op to the end of the loop block. + auto successOrdering = LLVM::AtomicOrdering::acq_rel; + auto failureOrdering = LLVM::AtomicOrdering::monotonic; + auto pairType = LLVM::LLVMType::getStructTy(valueType, boolType); + auto cmpxchg = rewriter.create( + loc, pairType, dataPtr, loopArgument, select, successOrdering, + failureOrdering); + // Extract the %new_loaded and %ok values from the pair. + auto newLoaded = rewriter.create( + loc, valueType, cmpxchg, rewriter.getI64ArrayAttr({0})); + auto ok = rewriter.create( + loc, boolType, cmpxchg, rewriter.getI64ArrayAttr({1})); + + // Conditionally branch to the end or back to the loop depending on %ok. + std::array condBrProperOperands{ok}; + std::array condBrDestinations{endBlock, loopBlock}; + std::array condBrRegionOperands{newLoaded}; + std::array condBrOperands{ArrayRef{}, + condBrRegionOperands}; + rewriter.create(loc, condBrProperOperands, + condBrDestinations, condBrOperands); + + // The 'result' of the atomic_rmw op is the newly loaded value. + rewriter.replaceOp(op, {newLoaded}); + + return matchSuccess(); + } +}; + +} // namespace + static void ensureDistinctSuccessors(Block &bb) { auto *terminator = bb.getTerminator(); // Find repeated successors with arguments. llvm::SmallDenseMap> successorPositions; for (int i = 0, e = terminator->getNumSuccessors(); i < e; ++i) { Block *successor = terminator->getSuccessor(i); // Blocks with no arguments are safe even if they appear multiple times // because they don't need PHI nodes. if (successor->getNumArguments() == 0) continue; successorPositions[successor].push_back(i); } // If a successor appears for the second or more time in the terminator, // create a new dummy block that unconditionally branches to the original // destination, and retarget the terminator to branch to this new block. // There is no need to pass arguments to the dummy block because it will be // dominated by the original block and can therefore use any values defined in // the original block. for (const auto &successor : successorPositions) { const auto &positions = successor.second; // Start from the second occurrence of a block in the successor list. for (auto position = std::next(positions.begin()), end = positions.end(); position != end; ++position) { auto *dummyBlock = new Block(); bb.getParent()->push_back(dummyBlock); auto builder = OpBuilder(dummyBlock); SmallVector operands( terminator->getSuccessorOperands(*position)); builder.create(terminator->getLoc(), successor.first, operands); terminator->setSuccessor(dummyBlock, *position); for (int i = 0, e = terminator->getNumSuccessorOperands(*position); i < e; ++i) terminator->eraseSuccessorOperand(*position, i); } } } void mlir::LLVM::ensureDistinctSuccessors(ModuleOp m) { for (auto f : m.getOps()) { for (auto &bb : f.getBlocks()) { ::ensureDistinctSuccessors(bb); } } } /// Collect a set of patterns to convert from the Standard dialect to LLVM. void mlir::populateStdToLLVMNonMemoryConversionPatterns( LLVMTypeConverter &converter, OwningRewritePatternList &patterns) { // FIXME: this should be tablegen'ed // clang-format off patterns.insert< AbsFOpLowering, AddFOpLowering, AddIOpLowering, AndOpLowering, + AtomicCmpXchgOpLowering, + AtomicRMWOpLowering, BranchOpLowering, CallIndirectOpLowering, CallOpLowering, CeilFOpLowering, CmpFOpLowering, CmpIOpLowering, CondBranchOpLowering, CopySignOpLowering, CosOpLowering, ConstLLVMOpLowering, DivFOpLowering, ExpOpLowering, LogOpLowering, Log10OpLowering, Log2OpLowering, FPExtLowering, FPTruncLowering, IndexCastOpLowering, MulFOpLowering, MulIOpLowering, NegFOpLowering, OrOpLowering, PrefetchOpLowering, RemFOpLowering, ReturnOpLowering, SIToFPLowering, SelectOpLowering, ShiftLeftOpLowering, SignExtendIOpLowering, SignedDivIOpLowering, SignedRemIOpLowering, SignedShiftRightOpLowering, SplatOpLowering, SplatNdOpLowering, SqrtOpLowering, SubFOpLowering, SubIOpLowering, TanhOpLowering, TruncateIOpLowering, UnsignedDivIOpLowering, UnsignedRemIOpLowering, UnsignedShiftRightOpLowering, XOrOpLowering, ZeroExtendIOpLowering>(*converter.getDialect(), converter); // clang-format on } void mlir::populateStdToLLVMMemoryConversionPatters( LLVMTypeConverter &converter, OwningRewritePatternList &patterns, bool useAlloca) { // clang-format off patterns.insert< AssumeAlignmentOpLowering, DimOpLowering, LoadOpLowering, MemRefCastOpLowering, StoreOpLowering, SubViewOpLowering, ViewOpLowering>(*converter.getDialect(), converter); patterns.insert< AllocOpLowering, DeallocOpLowering>( *converter.getDialect(), converter, useAlloca); // clang-format on } void mlir::populateStdToLLVMDefaultFuncOpConversionPattern( LLVMTypeConverter &converter, OwningRewritePatternList &patterns, bool emitCWrappers) { patterns.insert(*converter.getDialect(), converter, emitCWrappers); } void mlir::populateStdToLLVMConversionPatterns( LLVMTypeConverter &converter, OwningRewritePatternList &patterns, bool useAlloca, bool emitCWrappers) { populateStdToLLVMDefaultFuncOpConversionPattern(converter, patterns, emitCWrappers); populateStdToLLVMNonMemoryConversionPatterns(converter, patterns); populateStdToLLVMMemoryConversionPatters(converter, patterns, useAlloca); } static void populateStdToLLVMBarePtrFuncOpConversionPattern( LLVMTypeConverter &converter, OwningRewritePatternList &patterns) { patterns.insert(*converter.getDialect(), converter); } void mlir::populateStdToLLVMBarePtrConversionPatterns( LLVMTypeConverter &converter, OwningRewritePatternList &patterns, bool useAlloca) { populateStdToLLVMBarePtrFuncOpConversionPattern(converter, patterns); populateStdToLLVMNonMemoryConversionPatterns(converter, patterns); populateStdToLLVMMemoryConversionPatters(converter, patterns, useAlloca); } // Create an LLVM IR structure type if there is more than one result. Type LLVMTypeConverter::packFunctionResults(ArrayRef types) { assert(!types.empty() && "expected non-empty list of type"); if (types.size() == 1) return convertType(types.front()); SmallVector resultTypes; resultTypes.reserve(types.size()); for (auto t : types) { auto converted = convertType(t).dyn_cast(); if (!converted) return {}; resultTypes.push_back(converted); } return LLVM::LLVMType::getStructTy(llvmDialect, resultTypes); } Value LLVMTypeConverter::promoteOneMemRefDescriptor(Location loc, Value operand, OpBuilder &builder) { auto *context = builder.getContext(); auto int64Ty = LLVM::LLVMType::getInt64Ty(getDialect()); auto indexType = IndexType::get(context); // Alloca with proper alignment. We do not expect optimizations of this // alloca op and so we omit allocating at the entry block. auto ptrType = operand.getType().cast().getPointerTo(); Value one = builder.create(loc, int64Ty, IntegerAttr::get(indexType, 1)); Value allocated = builder.create(loc, ptrType, one, /*alignment=*/0); // Store into the alloca'ed descriptor. builder.create(loc, operand, allocated); return allocated; } SmallVector LLVMTypeConverter::promoteMemRefDescriptors(Location loc, ValueRange opOperands, ValueRange operands, OpBuilder &builder) { SmallVector promotedOperands; promotedOperands.reserve(operands.size()); for (auto it : llvm::zip(opOperands, operands)) { auto operand = std::get<0>(it); auto llvmOperand = std::get<1>(it); if (operand.getType().isa()) { UnrankedMemRefDescriptor::unpack(builder, loc, llvmOperand, promotedOperands); continue; } if (auto memrefType = operand.getType().dyn_cast()) { MemRefDescriptor::unpack(builder, loc, llvmOperand, operand.getType().cast(), promotedOperands); continue; } promotedOperands.push_back(operand); } return promotedOperands; } namespace { /// A pass converting MLIR operations into the LLVM IR dialect. struct LLVMLoweringPass : public ModulePass { /// Creates an LLVM lowering pass. explicit LLVMLoweringPass(bool useAlloca, bool useBarePtrCallConv, bool emitCWrappers) { this->useAlloca = useAlloca; this->useBarePtrCallConv = useBarePtrCallConv; this->emitCWrappers = emitCWrappers; } explicit LLVMLoweringPass() {} LLVMLoweringPass(const LLVMLoweringPass &pass) {} /// Run the dialect converter on the module. void runOnModule() override { if (useBarePtrCallConv && emitCWrappers) { getModule().emitError() << "incompatible conversion options: bare-pointer calling convention " "and C wrapper emission"; signalPassFailure(); return; } ModuleOp m = getModule(); LLVM::ensureDistinctSuccessors(m); LLVMTypeConverterCustomization customs; customs.funcArgConverter = useBarePtrCallConv ? barePtrFuncArgTypeConverter : structFuncArgTypeConverter; LLVMTypeConverter typeConverter(&getContext(), customs); OwningRewritePatternList patterns; if (useBarePtrCallConv) populateStdToLLVMBarePtrConversionPatterns(typeConverter, patterns, useAlloca); else populateStdToLLVMConversionPatterns(typeConverter, patterns, useAlloca, emitCWrappers); ConversionTarget target(getContext()); target.addLegalDialect(); if (failed(applyPartialConversion(m, target, patterns, &typeConverter))) signalPassFailure(); } /// Use `alloca` instead of `call @malloc` for converting std.alloc. Option useAlloca{ *this, "use-alloca", llvm::cl::desc("Replace emission of malloc/free by alloca"), llvm::cl::init(false)}; /// Convert memrefs to bare pointers in function signatures. Option useBarePtrCallConv{ *this, "use-bare-ptr-memref-call-conv", llvm::cl::desc("Replace FuncOp's MemRef arguments with " "bare pointers to the MemRef element types"), llvm::cl::init(false)}; /// Emit wrappers for C-compatible pointer-to-struct memref descriptors. Option emitCWrappers{ *this, "emit-c-wrappers", llvm::cl::desc("Emit C-compatible wrapper functions"), llvm::cl::init(false)}; }; } // end namespace std::unique_ptr> mlir::createLowerToLLVMPass(bool useAlloca, bool useBarePtrCallConv, bool emitCWrappers) { return std::make_unique(useAlloca, useBarePtrCallConv, emitCWrappers); } static PassRegistration pass(PASS_NAME, "Convert scalar and vector operations from the " "Standard to the LLVM dialect"); diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index 5c5fcfc47c11..65bd714d7881 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -1,2622 +1,2658 @@ //===- Ops.cpp - Standard MLIR Operations ---------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "mlir/Dialect/StandardOps/IR/Ops.h" #include "mlir/Dialect/CommonFolders.h" #include "mlir/IR/AffineExpr.h" #include "mlir/IR/AffineMap.h" #include "mlir/IR/Builders.h" #include "mlir/IR/Function.h" #include "mlir/IR/Matchers.h" #include "mlir/IR/Module.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/StandardTypes.h" #include "mlir/IR/TypeUtilities.h" #include "mlir/IR/Value.h" #include "mlir/Support/MathExtras.h" #include "mlir/Support/STLExtras.h" #include "mlir/Transforms/InliningUtils.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" // Pull in all enum type definitions and utility function declarations. #include "mlir/Dialect/StandardOps/IR/OpsEnums.cpp.inc" using namespace mlir; //===----------------------------------------------------------------------===// // StandardOpsDialect Interfaces //===----------------------------------------------------------------------===// namespace { /// This class defines the interface for handling inlining with standard /// operations. struct StdInlinerInterface : public DialectInlinerInterface { using DialectInlinerInterface::DialectInlinerInterface; //===--------------------------------------------------------------------===// // Analysis Hooks //===--------------------------------------------------------------------===// /// All operations within standard ops can be inlined. bool isLegalToInline(Operation *, Region *, BlockAndValueMapping &) const final { return true; } //===--------------------------------------------------------------------===// // Transformation Hooks //===--------------------------------------------------------------------===// /// Handle the given inlined terminator by replacing it with a new operation /// as necessary. void handleTerminator(Operation *op, Block *newDest) const final { // Only "std.return" needs to be handled here. auto returnOp = dyn_cast(op); if (!returnOp) return; // Replace the return with a branch to the dest. OpBuilder builder(op); builder.create(op->getLoc(), newDest, returnOp.getOperands()); op->erase(); } /// Handle the given inlined terminator by replacing it with a new operation /// as necessary. void handleTerminator(Operation *op, ArrayRef valuesToRepl) const final { // Only "std.return" needs to be handled here. auto returnOp = cast(op); // Replace the values directly with the return operands. assert(returnOp.getNumOperands() == valuesToRepl.size()); for (const auto &it : llvm::enumerate(returnOp.getOperands())) valuesToRepl[it.index()].replaceAllUsesWith(it.value()); } }; } // end anonymous namespace //===----------------------------------------------------------------------===// // StandardOpsDialect //===----------------------------------------------------------------------===// /// A custom unary operation printer that omits the "std." prefix from the /// operation names. static void printStandardUnaryOp(Operation *op, OpAsmPrinter &p) { assert(op->getNumOperands() == 1 && "unary op should have one operand"); assert(op->getNumResults() == 1 && "unary op should have one result"); int stdDotLen = StandardOpsDialect::getDialectNamespace().size() + 1; p << op->getName().getStringRef().drop_front(stdDotLen) << ' ' << op->getOperand(0); p.printOptionalAttrDict(op->getAttrs()); p << " : " << op->getOperand(0).getType(); } /// A custom binary operation printer that omits the "std." prefix from the /// operation names. static void printStandardBinaryOp(Operation *op, OpAsmPrinter &p) { assert(op->getNumOperands() == 2 && "binary op should have two operands"); assert(op->getNumResults() == 1 && "binary op should have one result"); // If not all the operand and result types are the same, just use the // generic assembly form to avoid omitting information in printing. auto resultType = op->getResult(0).getType(); if (op->getOperand(0).getType() != resultType || op->getOperand(1).getType() != resultType) { p.printGenericOp(op); return; } int stdDotLen = StandardOpsDialect::getDialectNamespace().size() + 1; p << op->getName().getStringRef().drop_front(stdDotLen) << ' ' << op->getOperand(0) << ", " << op->getOperand(1); p.printOptionalAttrDict(op->getAttrs()); // Now we can output only one type for all operands and the result. p << " : " << op->getResult(0).getType(); } /// A custom cast operation printer that omits the "std." prefix from the /// operation names. static void printStandardCastOp(Operation *op, OpAsmPrinter &p) { int stdDotLen = StandardOpsDialect::getDialectNamespace().size() + 1; p << op->getName().getStringRef().drop_front(stdDotLen) << ' ' << op->getOperand(0) << " : " << op->getOperand(0).getType() << " to " << op->getResult(0).getType(); } /// A custom cast operation verifier. -template static LogicalResult verifyCastOp(T op) { +template +static LogicalResult verifyCastOp(T op) { auto opType = op.getOperand().getType(); auto resType = op.getType(); if (!T::areCastCompatible(opType, resType)) return op.emitError("operand type ") << opType << " and result type " << resType << " are cast incompatible"; return success(); } StandardOpsDialect::StandardOpsDialect(MLIRContext *context) : Dialect(getDialectNamespace(), context) { addOperations(); addInterfaces(); } /// Materialize a single constant operation from a given attribute value with /// the desired resultant type. Operation *StandardOpsDialect::materializeConstant(OpBuilder &builder, Attribute value, Type type, Location loc) { return builder.create(loc, type, value); } void mlir::printDimAndSymbolList(Operation::operand_iterator begin, Operation::operand_iterator end, unsigned numDims, OpAsmPrinter &p) { Operation::operand_range operands(begin, end); p << '(' << operands.take_front(numDims) << ')'; if (operands.size() != numDims) p << '[' << operands.drop_front(numDims) << ']'; } // Parses dimension and symbol list, and sets 'numDims' to the number of // dimension operands parsed. // Returns 'false' on success and 'true' on error. ParseResult mlir::parseDimAndSymbolList(OpAsmParser &parser, SmallVectorImpl &operands, unsigned &numDims) { SmallVector opInfos; if (parser.parseOperandList(opInfos, OpAsmParser::Delimiter::Paren)) return failure(); // Store number of dimensions for validation by caller. numDims = opInfos.size(); // Parse the optional symbol operands. auto indexTy = parser.getBuilder().getIndexType(); if (parser.parseOperandList(opInfos, OpAsmParser::Delimiter::OptionalSquare) || parser.resolveOperands(opInfos, indexTy, operands)) return failure(); return success(); } /// Matches a ConstantIndexOp. /// TODO: This should probably just be a general matcher that uses m_Constant /// and checks the operation for an index type. static detail::op_matcher m_ConstantIndex() { return detail::op_matcher(); } //===----------------------------------------------------------------------===// // Common canonicalization pattern support logic //===----------------------------------------------------------------------===// /// This is a common class used for patterns of the form /// "someop(memrefcast) -> someop". It folds the source of any memref_cast /// into the root operation directly. static LogicalResult foldMemRefCast(Operation *op) { bool folded = false; for (OpOperand &operand : op->getOpOperands()) { auto cast = dyn_cast_or_null(operand.get().getDefiningOp()); if (cast && !cast.getOperand().getType().isa()) { operand.set(cast.getOperand()); folded = true; } } return success(folded); } //===----------------------------------------------------------------------===// // AddFOp //===----------------------------------------------------------------------===// OpFoldResult AddFOp::fold(ArrayRef operands) { return constFoldBinaryOp( operands, [](APFloat a, APFloat b) { return a + b; }); } //===----------------------------------------------------------------------===// // AddIOp //===----------------------------------------------------------------------===// OpFoldResult AddIOp::fold(ArrayRef operands) { /// addi(x, 0) -> x if (matchPattern(rhs(), m_Zero())) return lhs(); return constFoldBinaryOp(operands, [](APInt a, APInt b) { return a + b; }); } //===----------------------------------------------------------------------===// // AllocOp //===----------------------------------------------------------------------===// static void print(OpAsmPrinter &p, AllocOp op) { p << "alloc"; // Print dynamic dimension operands. MemRefType type = op.getType(); printDimAndSymbolList(op.operand_begin(), op.operand_end(), type.getNumDynamicDims(), p); p.printOptionalAttrDict(op.getAttrs(), /*elidedAttrs=*/{"map"}); p << " : " << type; } static ParseResult parseAllocOp(OpAsmParser &parser, OperationState &result) { MemRefType type; // Parse the dimension operands and optional symbol operands, followed by a // memref type. unsigned numDimOperands; if (parseDimAndSymbolList(parser, result.operands, numDimOperands) || parser.parseOptionalAttrDict(result.attributes) || parser.parseColonType(type)) return failure(); // Check numDynamicDims against number of question marks in memref type. // Note: this check remains here (instead of in verify()), because the // partition between dim operands and symbol operands is lost after parsing. // Verification still checks that the total number of operands matches // the number of symbols in the affine map, plus the number of dynamic // dimensions in the memref. if (numDimOperands != type.getNumDynamicDims()) return parser.emitError(parser.getNameLoc()) << "dimension operand count does not equal memref dynamic dimension " "count"; result.types.push_back(type); return success(); } static LogicalResult verify(AllocOp op) { auto memRefType = op.getResult().getType().dyn_cast(); if (!memRefType) return op.emitOpError("result must be a memref"); unsigned numSymbols = 0; if (!memRefType.getAffineMaps().empty()) { // Store number of symbols used in affine map (used in subsequent check). AffineMap affineMap = memRefType.getAffineMaps()[0]; numSymbols = affineMap.getNumSymbols(); } // Check that the total number of operands matches the number of symbols in // the affine map, plus the number of dynamic dimensions specified in the // memref type. unsigned numDynamicDims = memRefType.getNumDynamicDims(); if (op.getNumOperands() != numDynamicDims + numSymbols) return op.emitOpError( "operand count does not equal dimension plus symbol operand count"); // Verify that all operands are of type Index. for (auto operandType : op.getOperandTypes()) if (!operandType.isIndex()) return op.emitOpError("requires operands to be of type Index"); return success(); } namespace { /// Fold constant dimensions into an alloc operation. struct SimplifyAllocConst : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; PatternMatchResult matchAndRewrite(AllocOp alloc, PatternRewriter &rewriter) const override { // Check to see if any dimensions operands are constants. If so, we can // substitute and drop them. if (llvm::none_of(alloc.getOperands(), [](Value operand) { return matchPattern(operand, m_ConstantIndex()); })) return matchFailure(); auto memrefType = alloc.getType(); // Ok, we have one or more constant operands. Collect the non-constant ones // and keep track of the resultant memref type to build. SmallVector newShapeConstants; newShapeConstants.reserve(memrefType.getRank()); SmallVector newOperands; unsigned dynamicDimPos = 0; for (unsigned dim = 0, e = memrefType.getRank(); dim < e; ++dim) { int64_t dimSize = memrefType.getDimSize(dim); // If this is already static dimension, keep it. if (dimSize != -1) { newShapeConstants.push_back(dimSize); continue; } auto *defOp = alloc.getOperand(dynamicDimPos).getDefiningOp(); if (auto constantIndexOp = dyn_cast_or_null(defOp)) { // Dynamic shape dimension will be folded. newShapeConstants.push_back(constantIndexOp.getValue()); } else { // Dynamic shape dimension not folded; copy operand from old memref. newShapeConstants.push_back(-1); newOperands.push_back(alloc.getOperand(dynamicDimPos)); } dynamicDimPos++; } // Create new memref type (which will have fewer dynamic dimensions). MemRefType newMemRefType = MemRefType::Builder(memrefType).setShape(newShapeConstants); assert(static_cast(newOperands.size()) == newMemRefType.getNumDynamicDims()); // Create and insert the alloc op for the new memref. auto newAlloc = rewriter.create(alloc.getLoc(), newMemRefType, newOperands, IntegerAttr()); // Insert a cast so we have the same type as the old alloc. auto resultCast = rewriter.create(alloc.getLoc(), newAlloc, alloc.getType()); rewriter.replaceOp(alloc, {resultCast}); return matchSuccess(); } }; /// Fold alloc operations with no uses. Alloc has side effects on the heap, /// but can still be deleted if it has zero uses. struct SimplifyDeadAlloc : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; PatternMatchResult matchAndRewrite(AllocOp alloc, PatternRewriter &rewriter) const override { if (alloc.use_empty()) { rewriter.eraseOp(alloc); return matchSuccess(); } return matchFailure(); } }; } // end anonymous namespace. void AllocOp::getCanonicalizationPatterns(OwningRewritePatternList &results, MLIRContext *context) { results.insert(context); } //===----------------------------------------------------------------------===// // BranchOp //===----------------------------------------------------------------------===// namespace { /// Simplify a branch to a block that has a single predecessor. This effectively /// merges the two blocks. struct SimplifyBrToBlockWithSinglePred : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; PatternMatchResult matchAndRewrite(BranchOp op, PatternRewriter &rewriter) const override { // Check that the successor block has a single predecessor. Block *succ = op.getDest(); Block *opParent = op.getOperation()->getBlock(); if (succ == opParent || !has_single_element(succ->getPredecessors())) return matchFailure(); // Merge the successor into the current block and erase the branch. rewriter.mergeBlocks(succ, opParent, op.getOperands()); rewriter.eraseOp(op); return matchSuccess(); } }; } // end anonymous namespace. Block *BranchOp::getDest() { return getSuccessor(0); } void BranchOp::setDest(Block *block) { return setSuccessor(block, 0); } void BranchOp::eraseOperand(unsigned index) { getOperation()->eraseSuccessorOperand(0, index); } void BranchOp::getCanonicalizationPatterns(OwningRewritePatternList &results, MLIRContext *context) { results.insert(context); } //===----------------------------------------------------------------------===// // CallOp //===----------------------------------------------------------------------===// static LogicalResult verify(CallOp op) { // Check that the callee attribute was specified. auto fnAttr = op.getAttrOfType("callee"); if (!fnAttr) return op.emitOpError("requires a 'callee' symbol reference attribute"); auto fn = op.getParentOfType().lookupSymbol(fnAttr.getValue()); if (!fn) return op.emitOpError() << "'" << fnAttr.getValue() << "' does not reference a valid function"; // Verify that the operand and result types match the callee. auto fnType = fn.getType(); if (fnType.getNumInputs() != op.getNumOperands()) return op.emitOpError("incorrect number of operands for callee"); for (unsigned i = 0, e = fnType.getNumInputs(); i != e; ++i) if (op.getOperand(i).getType() != fnType.getInput(i)) return op.emitOpError("operand type mismatch"); if (fnType.getNumResults() != op.getNumResults()) return op.emitOpError("incorrect number of results for callee"); for (unsigned i = 0, e = fnType.getNumResults(); i != e; ++i) if (op.getResult(i).getType() != fnType.getResult(i)) return op.emitOpError("result type mismatch"); return success(); } FunctionType CallOp::getCalleeType() { SmallVector argTypes(getOperandTypes()); return FunctionType::get(argTypes, getResultTypes(), getContext()); } //===----------------------------------------------------------------------===// // CallIndirectOp //===----------------------------------------------------------------------===// namespace { /// Fold indirect calls that have a constant function as the callee operand. struct SimplifyIndirectCallWithKnownCallee : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; PatternMatchResult matchAndRewrite(CallIndirectOp indirectCall, PatternRewriter &rewriter) const override { // Check that the callee is a constant callee. SymbolRefAttr calledFn; if (!matchPattern(indirectCall.getCallee(), m_Constant(&calledFn))) return matchFailure(); // Replace with a direct call. rewriter.replaceOpWithNewOp(indirectCall, calledFn, indirectCall.getResultTypes(), indirectCall.getArgOperands()); return matchSuccess(); } }; } // end anonymous namespace. void CallIndirectOp::getCanonicalizationPatterns( OwningRewritePatternList &results, MLIRContext *context) { results.insert(context); } //===----------------------------------------------------------------------===// // General helpers for comparison ops //===----------------------------------------------------------------------===// // Return the type of the same shape (scalar, vector or tensor) containing i1. static Type getCheckedI1SameShape(Type type) { auto i1Type = IntegerType::get(1, type.getContext()); if (type.isSignlessIntOrIndexOrFloat()) return i1Type; if (auto tensorType = type.dyn_cast()) return RankedTensorType::get(tensorType.getShape(), i1Type); if (type.isa()) return UnrankedTensorType::get(i1Type); if (auto vectorType = type.dyn_cast()) return VectorType::get(vectorType.getShape(), i1Type); return Type(); } static Type getI1SameShape(Type type) { Type res = getCheckedI1SameShape(type); assert(res && "expected type with valid i1 shape"); return res; } //===----------------------------------------------------------------------===// // CmpIOp //===----------------------------------------------------------------------===// static void buildCmpIOp(Builder *build, OperationState &result, CmpIPredicate predicate, Value lhs, Value rhs) { result.addOperands({lhs, rhs}); result.types.push_back(getI1SameShape(lhs.getType())); result.addAttribute( CmpIOp::getPredicateAttrName(), build->getI64IntegerAttr(static_cast(predicate))); } // Compute `lhs` `pred` `rhs`, where `pred` is one of the known integer // comparison predicates. static bool applyCmpPredicate(CmpIPredicate predicate, const APInt &lhs, const APInt &rhs) { switch (predicate) { case CmpIPredicate::eq: return lhs.eq(rhs); case CmpIPredicate::ne: return lhs.ne(rhs); case CmpIPredicate::slt: return lhs.slt(rhs); case CmpIPredicate::sle: return lhs.sle(rhs); case CmpIPredicate::sgt: return lhs.sgt(rhs); case CmpIPredicate::sge: return lhs.sge(rhs); case CmpIPredicate::ult: return lhs.ult(rhs); case CmpIPredicate::ule: return lhs.ule(rhs); case CmpIPredicate::ugt: return lhs.ugt(rhs); case CmpIPredicate::uge: return lhs.uge(rhs); } llvm_unreachable("unknown comparison predicate"); } // Constant folding hook for comparisons. OpFoldResult CmpIOp::fold(ArrayRef operands) { assert(operands.size() == 2 && "cmpi takes two arguments"); auto lhs = operands.front().dyn_cast_or_null(); auto rhs = operands.back().dyn_cast_or_null(); if (!lhs || !rhs) return {}; auto val = applyCmpPredicate(getPredicate(), lhs.getValue(), rhs.getValue()); return IntegerAttr::get(IntegerType::get(1, getContext()), APInt(1, val)); } //===----------------------------------------------------------------------===// // CmpFOp //===----------------------------------------------------------------------===// // Returns an array of mnemonics for CmpFPredicates indexed by values thereof. static inline const char *const *getCmpFPredicateNames() { static const char *predicateNames[] = { /*AlwaysFalse*/ "false", /*OEQ*/ "oeq", /*OGT*/ "ogt", /*OGE*/ "oge", /*OLT*/ "olt", /*OLE*/ "ole", /*ONE*/ "one", /*ORD*/ "ord", /*UEQ*/ "ueq", /*UGT*/ "ugt", /*UGE*/ "uge", /*ULT*/ "ult", /*ULE*/ "ule", /*UNE*/ "une", /*UNO*/ "uno", /*AlwaysTrue*/ "true", }; static_assert(std::extent::value == (size_t)CmpFPredicate::NumPredicates, "wrong number of predicate names"); return predicateNames; } // Returns a value of the predicate corresponding to the given mnemonic. // Returns NumPredicates (one-past-end) if there is no such mnemonic. CmpFPredicate CmpFOp::getPredicateByName(StringRef name) { return llvm::StringSwitch(name) .Case("false", CmpFPredicate::AlwaysFalse) .Case("oeq", CmpFPredicate::OEQ) .Case("ogt", CmpFPredicate::OGT) .Case("oge", CmpFPredicate::OGE) .Case("olt", CmpFPredicate::OLT) .Case("ole", CmpFPredicate::OLE) .Case("one", CmpFPredicate::ONE) .Case("ord", CmpFPredicate::ORD) .Case("ueq", CmpFPredicate::UEQ) .Case("ugt", CmpFPredicate::UGT) .Case("uge", CmpFPredicate::UGE) .Case("ult", CmpFPredicate::ULT) .Case("ule", CmpFPredicate::ULE) .Case("une", CmpFPredicate::UNE) .Case("uno", CmpFPredicate::UNO) .Case("true", CmpFPredicate::AlwaysTrue) .Default(CmpFPredicate::NumPredicates); } static void buildCmpFOp(Builder *build, OperationState &result, CmpFPredicate predicate, Value lhs, Value rhs) { result.addOperands({lhs, rhs}); result.types.push_back(getI1SameShape(lhs.getType())); result.addAttribute( CmpFOp::getPredicateAttrName(), build->getI64IntegerAttr(static_cast(predicate))); } static ParseResult parseCmpFOp(OpAsmParser &parser, OperationState &result) { SmallVector ops; SmallVector attrs; Attribute predicateNameAttr; Type type; if (parser.parseAttribute(predicateNameAttr, CmpFOp::getPredicateAttrName(), attrs) || parser.parseComma() || parser.parseOperandList(ops, 2) || parser.parseOptionalAttrDict(attrs) || parser.parseColonType(type) || parser.resolveOperands(ops, type, result.operands)) return failure(); if (!predicateNameAttr.isa()) return parser.emitError(parser.getNameLoc(), "expected string comparison predicate attribute"); // Rewrite string attribute to an enum value. StringRef predicateName = predicateNameAttr.cast().getValue(); auto predicate = CmpFOp::getPredicateByName(predicateName); if (predicate == CmpFPredicate::NumPredicates) return parser.emitError(parser.getNameLoc(), "unknown comparison predicate \"" + predicateName + "\""); auto builder = parser.getBuilder(); Type i1Type = getCheckedI1SameShape(type); if (!i1Type) return parser.emitError(parser.getNameLoc(), "expected type with valid i1 shape"); attrs[0].second = builder.getI64IntegerAttr(static_cast(predicate)); result.attributes = attrs; result.addTypes({i1Type}); return success(); } static void print(OpAsmPrinter &p, CmpFOp op) { p << "cmpf "; auto predicateValue = op.getAttrOfType(CmpFOp::getPredicateAttrName()).getInt(); assert(predicateValue >= static_cast(CmpFPredicate::FirstValidValue) && predicateValue < static_cast(CmpFPredicate::NumPredicates) && "unknown predicate index"); p << '"' << getCmpFPredicateNames()[predicateValue] << '"' << ", " << op.lhs() << ", " << op.rhs(); p.printOptionalAttrDict(op.getAttrs(), /*elidedAttrs=*/{CmpFOp::getPredicateAttrName()}); p << " : " << op.lhs().getType(); } static LogicalResult verify(CmpFOp op) { auto predicateAttr = op.getAttrOfType(CmpFOp::getPredicateAttrName()); if (!predicateAttr) return op.emitOpError("requires an integer attribute named 'predicate'"); auto predicate = predicateAttr.getInt(); if (predicate < (int64_t)CmpFPredicate::FirstValidValue || predicate >= (int64_t)CmpFPredicate::NumPredicates) return op.emitOpError("'predicate' attribute value out of range"); return success(); } // Compute `lhs` `pred` `rhs`, where `pred` is one of the known floating point // comparison predicates. static bool applyCmpPredicate(CmpFPredicate predicate, const APFloat &lhs, const APFloat &rhs) { auto cmpResult = lhs.compare(rhs); switch (predicate) { case CmpFPredicate::AlwaysFalse: return false; case CmpFPredicate::OEQ: return cmpResult == APFloat::cmpEqual; case CmpFPredicate::OGT: return cmpResult == APFloat::cmpGreaterThan; case CmpFPredicate::OGE: return cmpResult == APFloat::cmpGreaterThan || cmpResult == APFloat::cmpEqual; case CmpFPredicate::OLT: return cmpResult == APFloat::cmpLessThan; case CmpFPredicate::OLE: return cmpResult == APFloat::cmpLessThan || cmpResult == APFloat::cmpEqual; case CmpFPredicate::ONE: return cmpResult != APFloat::cmpUnordered && cmpResult != APFloat::cmpEqual; case CmpFPredicate::ORD: return cmpResult != APFloat::cmpUnordered; case CmpFPredicate::UEQ: return cmpResult == APFloat::cmpUnordered || cmpResult == APFloat::cmpEqual; case CmpFPredicate::UGT: return cmpResult == APFloat::cmpUnordered || cmpResult == APFloat::cmpGreaterThan; case CmpFPredicate::UGE: return cmpResult == APFloat::cmpUnordered || cmpResult == APFloat::cmpGreaterThan || cmpResult == APFloat::cmpEqual; case CmpFPredicate::ULT: return cmpResult == APFloat::cmpUnordered || cmpResult == APFloat::cmpLessThan; case CmpFPredicate::ULE: return cmpResult == APFloat::cmpUnordered || cmpResult == APFloat::cmpLessThan || cmpResult == APFloat::cmpEqual; case CmpFPredicate::UNE: return cmpResult != APFloat::cmpEqual; case CmpFPredicate::UNO: return cmpResult == APFloat::cmpUnordered; case CmpFPredicate::AlwaysTrue: return true; default: llvm_unreachable("unknown comparison predicate"); } } // Constant folding hook for comparisons. OpFoldResult CmpFOp::fold(ArrayRef operands) { assert(operands.size() == 2 && "cmpf takes two arguments"); auto lhs = operands.front().dyn_cast_or_null(); auto rhs = operands.back().dyn_cast_or_null(); // TODO(gcmn) We could actually do some intelligent things if we know only one // of the operands, but it's inf or nan. if (!lhs || !rhs) return {}; auto val = applyCmpPredicate(getPredicate(), lhs.getValue(), rhs.getValue()); return IntegerAttr::get(IntegerType::get(1, getContext()), APInt(1, val)); } //===----------------------------------------------------------------------===// // CondBranchOp //===----------------------------------------------------------------------===// namespace { /// cond_br true, ^bb1, ^bb2 -> br ^bb1 /// cond_br false, ^bb1, ^bb2 -> br ^bb2 /// struct SimplifyConstCondBranchPred : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; PatternMatchResult matchAndRewrite(CondBranchOp condbr, PatternRewriter &rewriter) const override { if (matchPattern(condbr.getCondition(), m_NonZero())) { // True branch taken. rewriter.replaceOpWithNewOp(condbr, condbr.getTrueDest(), condbr.getTrueOperands()); return matchSuccess(); } else if (matchPattern(condbr.getCondition(), m_Zero())) { // False branch taken. rewriter.replaceOpWithNewOp(condbr, condbr.getFalseDest(), condbr.getFalseOperands()); return matchSuccess(); } return matchFailure(); } }; } // end anonymous namespace. void CondBranchOp::getCanonicalizationPatterns( OwningRewritePatternList &results, MLIRContext *context) { results.insert(context); } //===----------------------------------------------------------------------===// // Constant*Op //===----------------------------------------------------------------------===// static void print(OpAsmPrinter &p, ConstantOp &op) { p << "constant "; p.printOptionalAttrDict(op.getAttrs(), /*elidedAttrs=*/{"value"}); if (op.getAttrs().size() > 1) p << ' '; p << op.getValue(); // If the value is a symbol reference, print a trailing type. if (op.getValue().isa()) p << " : " << op.getType(); } static ParseResult parseConstantOp(OpAsmParser &parser, OperationState &result) { Attribute valueAttr; if (parser.parseOptionalAttrDict(result.attributes) || parser.parseAttribute(valueAttr, "value", result.attributes)) return failure(); // If the attribute is a symbol reference, then we expect a trailing type. Type type; if (!valueAttr.isa()) type = valueAttr.getType(); else if (parser.parseColonType(type)) return failure(); // Add the attribute type to the list. return parser.addTypeToList(type, result.types); } /// The constant op requires an attribute, and furthermore requires that it /// matches the return type. static LogicalResult verify(ConstantOp &op) { auto value = op.getValue(); if (!value) return op.emitOpError("requires a 'value' attribute"); auto type = op.getType(); if (!value.getType().isa() && type != value.getType()) return op.emitOpError() << "requires attribute's type (" << value.getType() << ") to match op's return type (" << type << ")"; if (type.isa() || value.isa()) return success(); if (auto intAttr = value.dyn_cast()) { // If the type has a known bitwidth we verify that the value can be // represented with the given bitwidth. auto bitwidth = type.cast().getWidth(); auto intVal = intAttr.getValue(); if (!intVal.isSignedIntN(bitwidth) && !intVal.isIntN(bitwidth)) return op.emitOpError("requires 'value' to be an integer within the " "range of the integer result type"); return success(); } if (type.isa()) { if (!value.isa()) return op.emitOpError("requires 'value' to be a floating point constant"); return success(); } if (type.isa()) { if (!value.isa()) return op.emitOpError("requires 'value' to be a shaped constant"); return success(); } if (type.isa()) { auto fnAttr = value.dyn_cast(); if (!fnAttr) return op.emitOpError("requires 'value' to be a function reference"); // Try to find the referenced function. auto fn = op.getParentOfType().lookupSymbol(fnAttr.getValue()); if (!fn) return op.emitOpError("reference to undefined function 'bar'"); // Check that the referenced function has the correct type. if (fn.getType() != type) return op.emitOpError("reference to function with mismatched type"); return success(); } if (type.isa() && value.isa()) return success(); return op.emitOpError("unsupported 'value' attribute: ") << value; } OpFoldResult ConstantOp::fold(ArrayRef operands) { assert(operands.empty() && "constant has no operands"); return getValue(); } void ConstantOp::getAsmResultNames( function_ref setNameFn) { Type type = getType(); if (auto intCst = getValue().dyn_cast()) { IntegerType intTy = type.dyn_cast(); // Sugar i1 constants with 'true' and 'false'. if (intTy && intTy.getWidth() == 1) return setNameFn(getResult(), (intCst.getInt() ? "true" : "false")); // Otherwise, build a complex name with the value and type. SmallString<32> specialNameBuffer; llvm::raw_svector_ostream specialName(specialNameBuffer); specialName << 'c' << intCst.getInt(); if (intTy) specialName << '_' << type; setNameFn(getResult(), specialName.str()); } else if (type.isa()) { setNameFn(getResult(), "f"); } else { setNameFn(getResult(), "cst"); } } /// Returns true if a constant operation can be built with the given value and /// result type. bool ConstantOp::isBuildableWith(Attribute value, Type type) { // SymbolRefAttr can only be used with a function type. if (value.isa()) return type.isa(); // Otherwise, the attribute must have the same type as 'type'. if (value.getType() != type) return false; // Finally, check that the attribute kind is handled. return value.isa() || value.isa() || value.isa() || value.isa() || value.isa(); } void ConstantFloatOp::build(Builder *builder, OperationState &result, const APFloat &value, FloatType type) { ConstantOp::build(builder, result, type, builder->getFloatAttr(type, value)); } bool ConstantFloatOp::classof(Operation *op) { return ConstantOp::classof(op) && op->getResult(0).getType().isa(); } /// ConstantIntOp only matches values whose result type is an IntegerType. bool ConstantIntOp::classof(Operation *op) { return ConstantOp::classof(op) && op->getResult(0).getType().isSignlessInteger(); } void ConstantIntOp::build(Builder *builder, OperationState &result, int64_t value, unsigned width) { Type type = builder->getIntegerType(width); ConstantOp::build(builder, result, type, builder->getIntegerAttr(type, value)); } /// Build a constant int op producing an integer with the specified type, /// which must be an integer type. void ConstantIntOp::build(Builder *builder, OperationState &result, int64_t value, Type type) { assert(type.isSignlessInteger() && "ConstantIntOp can only have signless integer type"); ConstantOp::build(builder, result, type, builder->getIntegerAttr(type, value)); } /// ConstantIndexOp only matches values whose result type is Index. bool ConstantIndexOp::classof(Operation *op) { return ConstantOp::classof(op) && op->getResult(0).getType().isIndex(); } void ConstantIndexOp::build(Builder *builder, OperationState &result, int64_t value) { Type type = builder->getIndexType(); ConstantOp::build(builder, result, type, builder->getIntegerAttr(type, value)); } //===----------------------------------------------------------------------===// // DeallocOp //===----------------------------------------------------------------------===// namespace { /// Fold Dealloc operations that are deallocating an AllocOp that is only used /// by other Dealloc operations. struct SimplifyDeadDealloc : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; PatternMatchResult matchAndRewrite(DeallocOp dealloc, PatternRewriter &rewriter) const override { // Check that the memref operand's defining operation is an AllocOp. Value memref = dealloc.memref(); if (!isa_and_nonnull(memref.getDefiningOp())) return matchFailure(); // Check that all of the uses of the AllocOp are other DeallocOps. for (auto *user : memref.getUsers()) if (!isa(user)) return matchFailure(); // Erase the dealloc operation. rewriter.eraseOp(dealloc); return matchSuccess(); } }; } // end anonymous namespace. static LogicalResult verify(DeallocOp op) { if (!op.memref().getType().isa()) return op.emitOpError("operand must be a memref"); return success(); } void DeallocOp::getCanonicalizationPatterns(OwningRewritePatternList &results, MLIRContext *context) { results.insert(context); } LogicalResult DeallocOp::fold(ArrayRef cstOperands, SmallVectorImpl &results) { /// dealloc(memrefcast) -> dealloc return foldMemRefCast(*this); } //===----------------------------------------------------------------------===// // DimOp //===----------------------------------------------------------------------===// static void print(OpAsmPrinter &p, DimOp op) { p << "dim " << op.getOperand() << ", " << op.getIndex(); p.printOptionalAttrDict(op.getAttrs(), /*elidedAttrs=*/{"index"}); p << " : " << op.getOperand().getType(); } static ParseResult parseDimOp(OpAsmParser &parser, OperationState &result) { OpAsmParser::OperandType operandInfo; IntegerAttr indexAttr; Type type; Type indexType = parser.getBuilder().getIndexType(); return failure( parser.parseOperand(operandInfo) || parser.parseComma() || parser.parseAttribute(indexAttr, indexType, "index", result.attributes) || parser.parseOptionalAttrDict(result.attributes) || parser.parseColonType(type) || parser.resolveOperand(operandInfo, type, result.operands) || parser.addTypeToList(indexType, result.types)); } static LogicalResult verify(DimOp op) { // Check that we have an integer index operand. auto indexAttr = op.getAttrOfType("index"); if (!indexAttr) return op.emitOpError("requires an integer attribute named 'index'"); int64_t index = indexAttr.getValue().getSExtValue(); auto type = op.getOperand().getType(); if (auto tensorType = type.dyn_cast()) { if (index >= tensorType.getRank()) return op.emitOpError("index is out of range"); } else if (auto memrefType = type.dyn_cast()) { if (index >= memrefType.getRank()) return op.emitOpError("index is out of range"); } else if (type.isa()) { // ok, assumed to be in-range. } else { return op.emitOpError("requires an operand with tensor or memref type"); } return success(); } OpFoldResult DimOp::fold(ArrayRef operands) { // Constant fold dim when the size along the index referred to is a constant. auto opType = memrefOrTensor().getType(); int64_t indexSize = -1; if (auto tensorType = opType.dyn_cast()) indexSize = tensorType.getShape()[getIndex()]; else if (auto memrefType = opType.dyn_cast()) indexSize = memrefType.getShape()[getIndex()]; if (!ShapedType::isDynamic(indexSize)) return IntegerAttr::get(IndexType::get(getContext()), indexSize); // Fold dim to the size argument for an AllocOp/ViewOp/SubViewOp. auto memrefType = opType.dyn_cast(); if (!memrefType) return {}; // The size at getIndex() is now a dynamic size of a memref. auto memref = memrefOrTensor().getDefiningOp(); if (auto alloc = dyn_cast_or_null(memref)) return *(alloc.getDynamicSizes().begin() + memrefType.getDynamicDimIndex(getIndex())); if (auto view = dyn_cast_or_null(memref)) return *(view.getDynamicSizes().begin() + memrefType.getDynamicDimIndex(getIndex())); // The subview op here is expected to have rank dynamic sizes now. if (auto subview = dyn_cast_or_null(memref)) { auto sizes = subview.sizes(); if (!sizes.empty()) return *(sizes.begin() + getIndex()); } /// dim(memrefcast) -> dim if (succeeded(foldMemRefCast(*this))) return getResult(); return {}; } //===----------------------------------------------------------------------===// // SignedDivIOp //===----------------------------------------------------------------------===// OpFoldResult SignedDivIOp::fold(ArrayRef operands) { assert(operands.size() == 2 && "binary operation takes two operands"); // Don't fold if it would overflow or if it requires a division by zero. bool overflowOrDiv0 = false; auto result = constFoldBinaryOp(operands, [&](APInt a, APInt b) { if (overflowOrDiv0 || !b) { overflowOrDiv0 = true; return a; } return a.sdiv_ov(b, overflowOrDiv0); }); return overflowOrDiv0 ? Attribute() : result; } //===----------------------------------------------------------------------===// // UnsignedDivIOp //===----------------------------------------------------------------------===// OpFoldResult UnsignedDivIOp::fold(ArrayRef operands) { assert(operands.size() == 2 && "binary operation takes two operands"); // Don't fold if it would require a division by zero. bool div0 = false; auto result = constFoldBinaryOp(operands, [&](APInt a, APInt b) { if (div0 || !b) { div0 = true; return a; } return a.udiv(b); }); return div0 ? Attribute() : result; } // --------------------------------------------------------------------------- // DmaStartOp // --------------------------------------------------------------------------- void DmaStartOp::build(Builder *builder, OperationState &result, Value srcMemRef, ValueRange srcIndices, Value destMemRef, ValueRange destIndices, Value numElements, Value tagMemRef, ValueRange tagIndices, Value stride, Value elementsPerStride) { result.addOperands(srcMemRef); result.addOperands(srcIndices); result.addOperands(destMemRef); result.addOperands(destIndices); result.addOperands({numElements, tagMemRef}); result.addOperands(tagIndices); if (stride) result.addOperands({stride, elementsPerStride}); } void DmaStartOp::print(OpAsmPrinter &p) { p << "dma_start " << getSrcMemRef() << '[' << getSrcIndices() << "], " << getDstMemRef() << '[' << getDstIndices() << "], " << getNumElements() << ", " << getTagMemRef() << '[' << getTagIndices() << ']'; if (isStrided()) p << ", " << getStride() << ", " << getNumElementsPerStride(); p.printOptionalAttrDict(getAttrs()); p << " : " << getSrcMemRef().getType() << ", " << getDstMemRef().getType() << ", " << getTagMemRef().getType(); } // Parse DmaStartOp. // Ex: // %dma_id = dma_start %src[%i, %j], %dst[%k, %l], %size, // %tag[%index], %stride, %num_elt_per_stride : // : memref<3076 x f32, 0>, // memref<1024 x f32, 2>, // memref<1 x i32> // ParseResult DmaStartOp::parse(OpAsmParser &parser, OperationState &result) { OpAsmParser::OperandType srcMemRefInfo; SmallVector srcIndexInfos; OpAsmParser::OperandType dstMemRefInfo; SmallVector dstIndexInfos; OpAsmParser::OperandType numElementsInfo; OpAsmParser::OperandType tagMemrefInfo; SmallVector tagIndexInfos; SmallVector strideInfo; SmallVector types; auto indexType = parser.getBuilder().getIndexType(); // Parse and resolve the following list of operands: // *) source memref followed by its indices (in square brackets). // *) destination memref followed by its indices (in square brackets). // *) dma size in KiB. if (parser.parseOperand(srcMemRefInfo) || parser.parseOperandList(srcIndexInfos, OpAsmParser::Delimiter::Square) || parser.parseComma() || parser.parseOperand(dstMemRefInfo) || parser.parseOperandList(dstIndexInfos, OpAsmParser::Delimiter::Square) || parser.parseComma() || parser.parseOperand(numElementsInfo) || parser.parseComma() || parser.parseOperand(tagMemrefInfo) || parser.parseOperandList(tagIndexInfos, OpAsmParser::Delimiter::Square)) return failure(); // Parse optional stride and elements per stride. if (parser.parseTrailingOperandList(strideInfo)) return failure(); bool isStrided = strideInfo.size() == 2; if (!strideInfo.empty() && !isStrided) { return parser.emitError(parser.getNameLoc(), "expected two stride related operands"); } if (parser.parseColonTypeList(types)) return failure(); if (types.size() != 3) return parser.emitError(parser.getNameLoc(), "fewer/more types expected"); if (parser.resolveOperand(srcMemRefInfo, types[0], result.operands) || parser.resolveOperands(srcIndexInfos, indexType, result.operands) || parser.resolveOperand(dstMemRefInfo, types[1], result.operands) || parser.resolveOperands(dstIndexInfos, indexType, result.operands) || // size should be an index. parser.resolveOperand(numElementsInfo, indexType, result.operands) || parser.resolveOperand(tagMemrefInfo, types[2], result.operands) || // tag indices should be index. parser.resolveOperands(tagIndexInfos, indexType, result.operands)) return failure(); auto memrefType0 = types[0].dyn_cast(); if (!memrefType0) return parser.emitError(parser.getNameLoc(), "expected source to be of memref type"); auto memrefType1 = types[1].dyn_cast(); if (!memrefType1) return parser.emitError(parser.getNameLoc(), "expected destination to be of memref type"); auto memrefType2 = types[2].dyn_cast(); if (!memrefType2) return parser.emitError(parser.getNameLoc(), "expected tag to be of memref type"); if (isStrided) { if (parser.resolveOperands(strideInfo, indexType, result.operands)) return failure(); } // Check that source/destination index list size matches associated rank. if (static_cast(srcIndexInfos.size()) != memrefType0.getRank() || static_cast(dstIndexInfos.size()) != memrefType1.getRank()) return parser.emitError(parser.getNameLoc(), "memref rank not equal to indices count"); if (static_cast(tagIndexInfos.size()) != memrefType2.getRank()) return parser.emitError(parser.getNameLoc(), "tag memref rank not equal to indices count"); return success(); } LogicalResult DmaStartOp::verify() { // DMAs from different memory spaces supported. if (getSrcMemorySpace() == getDstMemorySpace()) return emitOpError("DMA should be between different memory spaces"); if (getNumOperands() != getTagMemRefRank() + getSrcMemRefRank() + getDstMemRefRank() + 3 + 1 && getNumOperands() != getTagMemRefRank() + getSrcMemRefRank() + getDstMemRefRank() + 3 + 1 + 2) { return emitOpError("incorrect number of operands"); } return success(); } LogicalResult DmaStartOp::fold(ArrayRef cstOperands, SmallVectorImpl &results) { /// dma_start(memrefcast) -> dma_start return foldMemRefCast(*this); } // --------------------------------------------------------------------------- // DmaWaitOp // --------------------------------------------------------------------------- void DmaWaitOp::build(Builder *builder, OperationState &result, Value tagMemRef, ValueRange tagIndices, Value numElements) { result.addOperands(tagMemRef); result.addOperands(tagIndices); result.addOperands(numElements); } void DmaWaitOp::print(OpAsmPrinter &p) { p << "dma_wait " << getTagMemRef() << '[' << getTagIndices() << "], " << getNumElements(); p.printOptionalAttrDict(getAttrs()); p << " : " << getTagMemRef().getType(); } // Parse DmaWaitOp. // Eg: // dma_wait %tag[%index], %num_elements : memref<1 x i32, (d0) -> (d0), 4> // ParseResult DmaWaitOp::parse(OpAsmParser &parser, OperationState &result) { OpAsmParser::OperandType tagMemrefInfo; SmallVector tagIndexInfos; Type type; auto indexType = parser.getBuilder().getIndexType(); OpAsmParser::OperandType numElementsInfo; // Parse tag memref, its indices, and dma size. if (parser.parseOperand(tagMemrefInfo) || parser.parseOperandList(tagIndexInfos, OpAsmParser::Delimiter::Square) || parser.parseComma() || parser.parseOperand(numElementsInfo) || parser.parseColonType(type) || parser.resolveOperand(tagMemrefInfo, type, result.operands) || parser.resolveOperands(tagIndexInfos, indexType, result.operands) || parser.resolveOperand(numElementsInfo, indexType, result.operands)) return failure(); auto memrefType = type.dyn_cast(); if (!memrefType) return parser.emitError(parser.getNameLoc(), "expected tag to be of memref type"); if (static_cast(tagIndexInfos.size()) != memrefType.getRank()) return parser.emitError(parser.getNameLoc(), "tag memref rank not equal to indices count"); return success(); } LogicalResult DmaWaitOp::fold(ArrayRef cstOperands, SmallVectorImpl &results) { /// dma_wait(memrefcast) -> dma_wait return foldMemRefCast(*this); } //===----------------------------------------------------------------------===// // ExtractElementOp //===----------------------------------------------------------------------===// static LogicalResult verify(ExtractElementOp op) { // Verify the # indices match if we have a ranked type. auto aggregateType = op.getAggregate().getType().cast(); if (aggregateType.hasRank() && aggregateType.getRank() != op.getNumOperands() - 1) return op.emitOpError("incorrect number of indices for extract_element"); return success(); } OpFoldResult ExtractElementOp::fold(ArrayRef operands) { assert(!operands.empty() && "extract_element takes at least one operand"); // The aggregate operand must be a known constant. Attribute aggregate = operands.front(); if (!aggregate) return {}; // If this is a splat elements attribute, simply return the value. All of the // elements of a splat attribute are the same. if (auto splatAggregate = aggregate.dyn_cast()) return splatAggregate.getSplatValue(); // Otherwise, collect the constant indices into the aggregate. SmallVector indices; for (Attribute indice : llvm::drop_begin(operands, 1)) { if (!indice || !indice.isa()) return {}; indices.push_back(indice.cast().getInt()); } // If this is an elements attribute, query the value at the given indices. auto elementsAttr = aggregate.dyn_cast(); if (elementsAttr && elementsAttr.isValidIndex(indices)) return elementsAttr.getValue(indices); return {}; } //===----------------------------------------------------------------------===// // IndexCastOp //===----------------------------------------------------------------------===// // Index cast is applicable from index to integer and backwards. bool IndexCastOp::areCastCompatible(Type a, Type b) { return (a.isIndex() && b.isSignlessInteger()) || (a.isSignlessInteger() && b.isIndex()); } OpFoldResult IndexCastOp::fold(ArrayRef cstOperands) { // Fold IndexCast(IndexCast(x)) -> x auto cast = dyn_cast_or_null(getOperand().getDefiningOp()); if (cast && cast.getOperand().getType() == getType()) return cast.getOperand(); // Fold IndexCast(constant) -> constant // A little hack because we go through int. Otherwise, the size // of the constant might need to change. if (auto value = cstOperands[0].dyn_cast_or_null()) return IntegerAttr::get(getType(), value.getInt()); return {}; } //===----------------------------------------------------------------------===// // LoadOp //===----------------------------------------------------------------------===// static LogicalResult verify(LoadOp op) { if (op.getNumOperands() != 1 + op.getMemRefType().getRank()) return op.emitOpError("incorrect number of indices for load"); return success(); } OpFoldResult LoadOp::fold(ArrayRef cstOperands) { /// load(memrefcast) -> load if (succeeded(foldMemRefCast(*this))) return getResult(); return OpFoldResult(); } //===----------------------------------------------------------------------===// // MemRefCastOp //===----------------------------------------------------------------------===// bool MemRefCastOp::areCastCompatible(Type a, Type b) { auto aT = a.dyn_cast(); auto bT = b.dyn_cast(); auto uaT = a.dyn_cast(); auto ubT = b.dyn_cast(); if (aT && bT) { if (aT.getElementType() != bT.getElementType()) return false; if (aT.getAffineMaps() != bT.getAffineMaps()) { int64_t aOffset, bOffset; SmallVector aStrides, bStrides; if (failed(getStridesAndOffset(aT, aStrides, aOffset)) || failed(getStridesAndOffset(bT, bStrides, bOffset)) || aStrides.size() != bStrides.size()) return false; // Strides along a dimension/offset are compatible if the value in the // source memref is static and the value in the target memref is the // same. They are also compatible if either one is dynamic (see // description of MemRefCastOp for details). auto checkCompatible = [](int64_t a, int64_t b) { return (a == MemRefType::getDynamicStrideOrOffset() || b == MemRefType::getDynamicStrideOrOffset() || a == b); }; if (!checkCompatible(aOffset, bOffset)) return false; for (auto aStride : enumerate(aStrides)) if (!checkCompatible(aStride.value(), bStrides[aStride.index()])) return false; } if (aT.getMemorySpace() != bT.getMemorySpace()) return false; // They must have the same rank, and any specified dimensions must match. if (aT.getRank() != bT.getRank()) return false; for (unsigned i = 0, e = aT.getRank(); i != e; ++i) { int64_t aDim = aT.getDimSize(i), bDim = bT.getDimSize(i); if (aDim != -1 && bDim != -1 && aDim != bDim) return false; } return true; } else { if (!aT && !uaT) return false; if (!bT && !ubT) return false; // Unranked to unranked casting is unsupported if (uaT && ubT) return false; auto aEltType = (aT) ? aT.getElementType() : uaT.getElementType(); auto bEltType = (bT) ? bT.getElementType() : ubT.getElementType(); if (aEltType != bEltType) return false; auto aMemSpace = (aT) ? aT.getMemorySpace() : uaT.getMemorySpace(); auto bMemSpace = (bT) ? bT.getMemorySpace() : ubT.getMemorySpace(); if (aMemSpace != bMemSpace) return false; return true; } return false; } OpFoldResult MemRefCastOp::fold(ArrayRef operands) { return impl::foldCastOp(*this); } //===----------------------------------------------------------------------===// // MulFOp //===----------------------------------------------------------------------===// OpFoldResult MulFOp::fold(ArrayRef operands) { return constFoldBinaryOp( operands, [](APFloat a, APFloat b) { return a * b; }); } //===----------------------------------------------------------------------===// // MulIOp //===----------------------------------------------------------------------===// OpFoldResult MulIOp::fold(ArrayRef operands) { /// muli(x, 0) -> 0 if (matchPattern(rhs(), m_Zero())) return rhs(); /// muli(x, 1) -> x if (matchPattern(rhs(), m_One())) return getOperand(0); // TODO: Handle the overflow case. return constFoldBinaryOp(operands, [](APInt a, APInt b) { return a * b; }); } //===----------------------------------------------------------------------===// // PrefetchOp //===----------------------------------------------------------------------===// static void print(OpAsmPrinter &p, PrefetchOp op) { p << PrefetchOp::getOperationName() << " " << op.memref() << '['; p.printOperands(op.indices()); p << ']' << ", " << (op.isWrite() ? "write" : "read"); p << ", locality<" << op.localityHint(); p << ">, " << (op.isDataCache() ? "data" : "instr"); p.printOptionalAttrDict( op.getAttrs(), /*elidedAttrs=*/{"localityHint", "isWrite", "isDataCache"}); p << " : " << op.getMemRefType(); } static ParseResult parsePrefetchOp(OpAsmParser &parser, OperationState &result) { OpAsmParser::OperandType memrefInfo; SmallVector indexInfo; IntegerAttr localityHint; MemRefType type; StringRef readOrWrite, cacheType; auto indexTy = parser.getBuilder().getIndexType(); auto i32Type = parser.getBuilder().getIntegerType(32); if (parser.parseOperand(memrefInfo) || parser.parseOperandList(indexInfo, OpAsmParser::Delimiter::Square) || parser.parseComma() || parser.parseKeyword(&readOrWrite) || parser.parseComma() || parser.parseKeyword("locality") || parser.parseLess() || parser.parseAttribute(localityHint, i32Type, "localityHint", result.attributes) || parser.parseGreater() || parser.parseComma() || parser.parseKeyword(&cacheType) || parser.parseColonType(type) || parser.resolveOperand(memrefInfo, type, result.operands) || parser.resolveOperands(indexInfo, indexTy, result.operands)) return failure(); if (!readOrWrite.equals("read") && !readOrWrite.equals("write")) return parser.emitError(parser.getNameLoc(), "rw specifier has to be 'read' or 'write'"); result.addAttribute( PrefetchOp::getIsWriteAttrName(), parser.getBuilder().getBoolAttr(readOrWrite.equals("write"))); if (!cacheType.equals("data") && !cacheType.equals("instr")) return parser.emitError(parser.getNameLoc(), "cache type has to be 'data' or 'instr'"); result.addAttribute( PrefetchOp::getIsDataCacheAttrName(), parser.getBuilder().getBoolAttr(cacheType.equals("data"))); return success(); } static LogicalResult verify(PrefetchOp op) { if (op.getNumOperands() != 1 + op.getMemRefType().getRank()) return op.emitOpError("too few indices"); return success(); } LogicalResult PrefetchOp::fold(ArrayRef cstOperands, SmallVectorImpl &results) { // prefetch(memrefcast) -> prefetch return foldMemRefCast(*this); } //===----------------------------------------------------------------------===// // RankOp //===----------------------------------------------------------------------===// OpFoldResult RankOp::fold(ArrayRef operands) { // Constant fold rank when the rank of the tensor is known. auto type = getOperand().getType(); if (auto tensorType = type.dyn_cast()) return IntegerAttr::get(IndexType::get(getContext()), tensorType.getRank()); return IntegerAttr(); } //===----------------------------------------------------------------------===// // SignedRemIOp //===----------------------------------------------------------------------===// OpFoldResult SignedRemIOp::fold(ArrayRef operands) { assert(operands.size() == 2 && "remi_signed takes two operands"); auto rhs = operands.back().dyn_cast_or_null(); if (!rhs) return {}; auto rhsValue = rhs.getValue(); // x % 1 = 0 if (rhsValue.isOneValue()) return IntegerAttr::get(rhs.getType(), APInt(rhsValue.getBitWidth(), 0)); // Don't fold if it requires division by zero. if (rhsValue.isNullValue()) return {}; auto lhs = operands.front().dyn_cast_or_null(); if (!lhs) return {}; return IntegerAttr::get(lhs.getType(), lhs.getValue().srem(rhsValue)); } //===----------------------------------------------------------------------===// // UnsignedRemIOp //===----------------------------------------------------------------------===// OpFoldResult UnsignedRemIOp::fold(ArrayRef operands) { assert(operands.size() == 2 && "remi_unsigned takes two operands"); auto rhs = operands.back().dyn_cast_or_null(); if (!rhs) return {}; auto rhsValue = rhs.getValue(); // x % 1 = 0 if (rhsValue.isOneValue()) return IntegerAttr::get(rhs.getType(), APInt(rhsValue.getBitWidth(), 0)); // Don't fold if it requires division by zero. if (rhsValue.isNullValue()) return {}; auto lhs = operands.front().dyn_cast_or_null(); if (!lhs) return {}; return IntegerAttr::get(lhs.getType(), lhs.getValue().urem(rhsValue)); } //===----------------------------------------------------------------------===// // ReturnOp //===----------------------------------------------------------------------===// static LogicalResult verify(ReturnOp op) { auto function = cast(op.getParentOp()); // The operand number and types must match the function signature. const auto &results = function.getType().getResults(); if (op.getNumOperands() != results.size()) return op.emitOpError("has ") << op.getNumOperands() << " operands, but enclosing function returns " << results.size(); for (unsigned i = 0, e = results.size(); i != e; ++i) if (op.getOperand(i).getType() != results[i]) return op.emitError() << "type of return operand " << i << " (" << op.getOperand(i).getType() << ") doesn't match function result type (" << results[i] << ")"; return success(); } //===----------------------------------------------------------------------===// // SIToFPOp //===----------------------------------------------------------------------===// // sitofp is applicable from integer types to float types. bool SIToFPOp::areCastCompatible(Type a, Type b) { return a.isSignlessInteger() && b.isa(); } //===----------------------------------------------------------------------===// // SelectOp //===----------------------------------------------------------------------===// OpFoldResult SelectOp::fold(ArrayRef operands) { auto condition = getCondition(); // select true, %0, %1 => %0 if (matchPattern(condition, m_One())) return getTrueValue(); // select false, %0, %1 => %1 if (matchPattern(condition, m_Zero())) return getFalseValue(); return nullptr; } //===----------------------------------------------------------------------===// // SignExtendIOp //===----------------------------------------------------------------------===// static LogicalResult verify(SignExtendIOp op) { // Get the scalar type (which is either directly the type of the operand // or the vector's/tensor's element type. auto srcType = getElementTypeOrSelf(op.getOperand().getType()); auto dstType = getElementTypeOrSelf(op.getType()); // For now, index is forbidden for the source and the destination type. if (srcType.isa()) return op.emitError() << srcType << " is not a valid operand type"; if (dstType.isa()) return op.emitError() << dstType << " is not a valid result type"; if (srcType.cast().getWidth() >= dstType.cast().getWidth()) return op.emitError("result type ") << dstType << " must be wider than operand type " << srcType; return success(); } //===----------------------------------------------------------------------===// // SplatOp //===----------------------------------------------------------------------===// static LogicalResult verify(SplatOp op) { // TODO: we could replace this by a trait. if (op.getOperand().getType() != op.getType().cast().getElementType()) return op.emitError("operand should be of elemental type of result type"); return success(); } // Constant folding hook for SplatOp. OpFoldResult SplatOp::fold(ArrayRef operands) { assert(operands.size() == 1 && "splat takes one operand"); auto constOperand = operands.front(); if (!constOperand || (!constOperand.isa() && !constOperand.isa())) return {}; auto shapedType = getType().cast(); assert(shapedType.getElementType() == constOperand.getType() && "incorrect input attribute type for folding"); // SplatElementsAttr::get treats single value for second arg as being a splat. return SplatElementsAttr::get(shapedType, {constOperand}); } //===----------------------------------------------------------------------===// // StoreOp //===----------------------------------------------------------------------===// static LogicalResult verify(StoreOp op) { if (op.getNumOperands() != 2 + op.getMemRefType().getRank()) return op.emitOpError("store index operand count not equal to memref rank"); return success(); } LogicalResult StoreOp::fold(ArrayRef cstOperands, SmallVectorImpl &results) { /// store(memrefcast) -> store return foldMemRefCast(*this); } //===----------------------------------------------------------------------===// // SubFOp //===----------------------------------------------------------------------===// OpFoldResult SubFOp::fold(ArrayRef operands) { return constFoldBinaryOp( operands, [](APFloat a, APFloat b) { return a - b; }); } //===----------------------------------------------------------------------===// // SubIOp //===----------------------------------------------------------------------===// OpFoldResult SubIOp::fold(ArrayRef operands) { // subi(x,x) -> 0 if (getOperand(0) == getOperand(1)) return Builder(getContext()).getZeroAttr(getType()); return constFoldBinaryOp(operands, [](APInt a, APInt b) { return a - b; }); } //===----------------------------------------------------------------------===// // AndOp //===----------------------------------------------------------------------===// OpFoldResult AndOp::fold(ArrayRef operands) { /// and(x, 0) -> 0 if (matchPattern(rhs(), m_Zero())) return rhs(); /// and(x,x) -> x if (lhs() == rhs()) return rhs(); return constFoldBinaryOp(operands, [](APInt a, APInt b) { return a & b; }); } //===----------------------------------------------------------------------===// // OrOp //===----------------------------------------------------------------------===// OpFoldResult OrOp::fold(ArrayRef operands) { /// or(x, 0) -> x if (matchPattern(rhs(), m_Zero())) return lhs(); /// or(x,x) -> x if (lhs() == rhs()) return rhs(); return constFoldBinaryOp(operands, [](APInt a, APInt b) { return a | b; }); } //===----------------------------------------------------------------------===// // XOrOp //===----------------------------------------------------------------------===// OpFoldResult XOrOp::fold(ArrayRef operands) { /// xor(x, 0) -> x if (matchPattern(rhs(), m_Zero())) return lhs(); /// xor(x,x) -> 0 if (lhs() == rhs()) return Builder(getContext()).getZeroAttr(getType()); return constFoldBinaryOp(operands, [](APInt a, APInt b) { return a ^ b; }); } //===----------------------------------------------------------------------===// // TensorCastOp //===----------------------------------------------------------------------===// bool TensorCastOp::areCastCompatible(Type a, Type b) { auto aT = a.dyn_cast(); auto bT = b.dyn_cast(); if (!aT || !bT) return false; if (aT.getElementType() != bT.getElementType()) return false; return succeeded(verifyCompatibleShape(aT, bT)); } OpFoldResult TensorCastOp::fold(ArrayRef operands) { return impl::foldCastOp(*this); } //===----------------------------------------------------------------------===// // Helpers for Tensor[Load|Store]Op //===----------------------------------------------------------------------===// static Type getTensorTypeFromMemRefType(Type type) { if (auto memref = type.dyn_cast()) return RankedTensorType::get(memref.getShape(), memref.getElementType()); return NoneType::get(type.getContext()); } //===----------------------------------------------------------------------===// // TruncateIOp //===----------------------------------------------------------------------===// static LogicalResult verify(TruncateIOp op) { auto srcType = getElementTypeOrSelf(op.getOperand().getType()); auto dstType = getElementTypeOrSelf(op.getType()); if (srcType.isa()) return op.emitError() << srcType << " is not a valid operand type"; if (dstType.isa()) return op.emitError() << dstType << " is not a valid result type"; if (srcType.cast().getWidth() <= dstType.cast().getWidth()) return op.emitError("operand type ") << srcType << " must be wider than result type " << dstType; return success(); } //===----------------------------------------------------------------------===// // ViewOp //===----------------------------------------------------------------------===// static ParseResult parseViewOp(OpAsmParser &parser, OperationState &result) { OpAsmParser::OperandType srcInfo; SmallVector offsetInfo; SmallVector sizesInfo; auto indexType = parser.getBuilder().getIndexType(); Type srcType, dstType; llvm::SMLoc offsetLoc; if (parser.parseOperand(srcInfo) || parser.getCurrentLocation(&offsetLoc) || parser.parseOperandList(offsetInfo, OpAsmParser::Delimiter::Square)) return failure(); if (offsetInfo.size() > 1) return parser.emitError(offsetLoc) << "expects 0 or 1 offset operand"; return failure( parser.parseOperandList(sizesInfo, OpAsmParser::Delimiter::Square) || parser.parseOptionalAttrDict(result.attributes) || parser.parseColonType(srcType) || parser.resolveOperand(srcInfo, srcType, result.operands) || parser.resolveOperands(offsetInfo, indexType, result.operands) || parser.resolveOperands(sizesInfo, indexType, result.operands) || parser.parseKeywordType("to", dstType) || parser.addTypeToList(dstType, result.types)); } static void print(OpAsmPrinter &p, ViewOp op) { p << op.getOperationName() << ' ' << op.getOperand(0) << '['; auto dynamicOffset = op.getDynamicOffset(); if (dynamicOffset != nullptr) p.printOperand(dynamicOffset); p << "][" << op.getDynamicSizes() << ']'; p.printOptionalAttrDict(op.getAttrs()); p << " : " << op.getOperand(0).getType() << " to " << op.getType(); } Value ViewOp::getDynamicOffset() { int64_t offset; SmallVector strides; auto result = succeeded(mlir::getStridesAndOffset(getType(), strides, offset)); assert(result); if (result && offset == MemRefType::getDynamicStrideOrOffset()) return getOperand(1); return nullptr; } static LogicalResult verifyDynamicStrides(MemRefType memrefType, ArrayRef strides) { ArrayRef shape = memrefType.getShape(); unsigned rank = memrefType.getRank(); assert(rank == strides.size()); bool dynamicStrides = false; for (int i = rank - 2; i >= 0; --i) { // If size at dim 'i + 1' is dynamic, set the 'dynamicStrides' flag. if (ShapedType::isDynamic(shape[i + 1])) dynamicStrides = true; // If stride at dim 'i' is not dynamic, return error. if (dynamicStrides && strides[i] != MemRefType::getDynamicStrideOrOffset()) return failure(); } return success(); } static LogicalResult verify(ViewOp op) { auto baseType = op.getOperand(0).getType().cast(); auto viewType = op.getResult().getType().cast(); // The base memref should have identity layout map (or none). if (baseType.getAffineMaps().size() > 1 || (baseType.getAffineMaps().size() == 1 && !baseType.getAffineMaps()[0].isIdentity())) return op.emitError("unsupported map for base memref type ") << baseType; // The base memref and the view memref should be in the same memory space. if (baseType.getMemorySpace() != viewType.getMemorySpace()) return op.emitError("different memory spaces specified for base memref " "type ") << baseType << " and view memref type " << viewType; // Verify that the result memref type has a strided layout map. int64_t offset; SmallVector strides; if (failed(getStridesAndOffset(viewType, strides, offset))) return op.emitError("result type ") << viewType << " is not strided"; // Verify that we have the correct number of operands for the result type. unsigned memrefOperandCount = 1; unsigned numDynamicDims = viewType.getNumDynamicDims(); unsigned dynamicOffsetCount = offset == MemRefType::getDynamicStrideOrOffset() ? 1 : 0; if (op.getNumOperands() != memrefOperandCount + numDynamicDims + dynamicOffsetCount) return op.emitError("incorrect number of operands for type ") << viewType; // Verify dynamic strides symbols were added to correct dimensions based // on dynamic sizes. if (failed(verifyDynamicStrides(viewType, strides))) return op.emitError("incorrect dynamic strides in view memref type ") << viewType; return success(); } namespace { struct ViewOpShapeFolder : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; PatternMatchResult matchAndRewrite(ViewOp viewOp, PatternRewriter &rewriter) const override { // Return if none of the operands are constants. if (llvm::none_of(viewOp.getOperands(), [](Value operand) { return matchPattern(operand, m_ConstantIndex()); })) return matchFailure(); // Get result memref type. auto memrefType = viewOp.getType(); if (memrefType.getAffineMaps().size() > 1) return matchFailure(); auto map = memrefType.getAffineMaps().empty() ? AffineMap::getMultiDimIdentityMap(memrefType.getRank(), rewriter.getContext()) : memrefType.getAffineMaps()[0]; // Get offset from old memref view type 'memRefType'. int64_t oldOffset; SmallVector oldStrides; if (failed(getStridesAndOffset(memrefType, oldStrides, oldOffset))) return matchFailure(); SmallVector newOperands; // Fold dynamic offset operand if it is produced by a constant. auto dynamicOffset = viewOp.getDynamicOffset(); int64_t newOffset = oldOffset; unsigned dynamicOffsetOperandCount = 0; if (dynamicOffset != nullptr) { auto *defOp = dynamicOffset.getDefiningOp(); if (auto constantIndexOp = dyn_cast_or_null(defOp)) { // Dynamic offset will be folded into the map. newOffset = constantIndexOp.getValue(); } else { // Unable to fold dynamic offset. Add it to 'newOperands' list. newOperands.push_back(dynamicOffset); dynamicOffsetOperandCount = 1; } } // Fold any dynamic dim operands which are produced by a constant. SmallVector newShapeConstants; newShapeConstants.reserve(memrefType.getRank()); unsigned dynamicDimPos = viewOp.getDynamicSizesOperandStart(); unsigned rank = memrefType.getRank(); for (unsigned dim = 0, e = rank; dim < e; ++dim) { int64_t dimSize = memrefType.getDimSize(dim); // If this is already static dimension, keep it. if (!ShapedType::isDynamic(dimSize)) { newShapeConstants.push_back(dimSize); continue; } auto *defOp = viewOp.getOperand(dynamicDimPos).getDefiningOp(); if (auto constantIndexOp = dyn_cast_or_null(defOp)) { // Dynamic shape dimension will be folded. newShapeConstants.push_back(constantIndexOp.getValue()); } else { // Dynamic shape dimension not folded; copy operand from old memref. newShapeConstants.push_back(dimSize); newOperands.push_back(viewOp.getOperand(dynamicDimPos)); } dynamicDimPos++; } // Compute new strides based on 'newShapeConstants'. SmallVector newStrides(rank); newStrides[rank - 1] = 1; bool dynamicStrides = false; for (int i = rank - 2; i >= 0; --i) { if (ShapedType::isDynamic(newShapeConstants[i + 1])) dynamicStrides = true; if (dynamicStrides) newStrides[i] = MemRefType::getDynamicStrideOrOffset(); else newStrides[i] = newShapeConstants[i + 1] * newStrides[i + 1]; } // Regenerate strided layout map with 'newStrides' and 'newOffset'. map = makeStridedLinearLayoutMap(newStrides, newOffset, rewriter.getContext()); // Create new memref type with constant folded dims and/or offset/strides. MemRefType newMemRefType = MemRefType::Builder(memrefType) .setShape(newShapeConstants) .setAffineMaps({map}); (void)dynamicOffsetOperandCount; // unused in opt mode assert(static_cast(newOperands.size()) == dynamicOffsetOperandCount + newMemRefType.getNumDynamicDims()); // Create new ViewOp. auto newViewOp = rewriter.create(viewOp.getLoc(), newMemRefType, viewOp.getOperand(0), newOperands); // Insert a cast so we have the same type as the old memref type. rewriter.replaceOpWithNewOp(viewOp, newViewOp, viewOp.getType()); return matchSuccess(); } }; struct ViewOpMemrefCastFolder : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; PatternMatchResult matchAndRewrite(ViewOp viewOp, PatternRewriter &rewriter) const override { Value memrefOperand = viewOp.getOperand(0); MemRefCastOp memrefCastOp = dyn_cast_or_null(memrefOperand.getDefiningOp()); if (!memrefCastOp) return matchFailure(); Value allocOperand = memrefCastOp.getOperand(); AllocOp allocOp = dyn_cast_or_null(allocOperand.getDefiningOp()); if (!allocOp) return matchFailure(); rewriter.replaceOpWithNewOp(viewOp, viewOp.getType(), allocOperand, viewOp.operands()); return matchSuccess(); } }; } // end anonymous namespace void ViewOp::getCanonicalizationPatterns(OwningRewritePatternList &results, MLIRContext *context) { results.insert(context); } //===----------------------------------------------------------------------===// // SubViewOp //===----------------------------------------------------------------------===// // Returns a MemRefType with dynamic sizes and offset and the same stride as the // `memRefType` passed as argument. // TODO(andydavis,ntv) Evolve to a more powerful inference that can also keep // sizes and offset static. static Type inferSubViewResultType(MemRefType memRefType) { auto rank = memRefType.getRank(); int64_t offset; SmallVector strides; auto res = getStridesAndOffset(memRefType, strides, offset); assert(succeeded(res) && "SubViewOp expected strided memref type"); (void)res; // Assume sizes and offset are fully dynamic for now until canonicalization // occurs on the ranges. Typed strides don't change though. offset = MemRefType::getDynamicStrideOrOffset(); // Overwrite strides because verifier will not pass. // TODO(b/144419106): don't force degrade the strides to fully dynamic. for (auto &stride : strides) stride = MemRefType::getDynamicStrideOrOffset(); auto stridedLayout = makeStridedLinearLayoutMap(strides, offset, memRefType.getContext()); SmallVector sizes(rank, ShapedType::kDynamicSize); return MemRefType::Builder(memRefType) .setShape(sizes) .setAffineMaps(stridedLayout); } void mlir::SubViewOp::build(Builder *b, OperationState &result, Value source, ValueRange offsets, ValueRange sizes, ValueRange strides, Type resultType, ArrayRef attrs) { if (!resultType) resultType = inferSubViewResultType(source.getType().cast()); auto segmentAttr = b->getI32VectorAttr( {1, static_cast(offsets.size()), static_cast(sizes.size()), static_cast(strides.size())}); build(b, result, resultType, source, offsets, sizes, strides, segmentAttr); result.addAttributes(attrs); } void mlir::SubViewOp::build(Builder *b, OperationState &result, Type resultType, Value source) { build(b, result, source, /*offsets=*/{}, /*sizes=*/{}, /*strides=*/{}, resultType); } static ParseResult parseSubViewOp(OpAsmParser &parser, OperationState &result) { OpAsmParser::OperandType srcInfo; SmallVector offsetsInfo; SmallVector sizesInfo; SmallVector stridesInfo; auto indexType = parser.getBuilder().getIndexType(); Type srcType, dstType; if (parser.parseOperand(srcInfo) || parser.parseOperandList(offsetsInfo, OpAsmParser::Delimiter::Square) || parser.parseOperandList(sizesInfo, OpAsmParser::Delimiter::Square) || parser.parseOperandList(stridesInfo, OpAsmParser::Delimiter::Square)) { return failure(); } auto builder = parser.getBuilder(); result.addAttribute( SubViewOp::getOperandSegmentSizeAttr(), builder.getI32VectorAttr({1, static_cast(offsetsInfo.size()), static_cast(sizesInfo.size()), static_cast(stridesInfo.size())})); return failure( parser.parseOptionalAttrDict(result.attributes) || parser.parseColonType(srcType) || parser.resolveOperand(srcInfo, srcType, result.operands) || parser.resolveOperands(offsetsInfo, indexType, result.operands) || parser.resolveOperands(sizesInfo, indexType, result.operands) || parser.resolveOperands(stridesInfo, indexType, result.operands) || parser.parseKeywordType("to", dstType) || parser.addTypeToList(dstType, result.types)); } static void print(OpAsmPrinter &p, SubViewOp op) { p << op.getOperationName() << ' ' << op.getOperand(0) << '[' << op.offsets() << "][" << op.sizes() << "][" << op.strides() << ']'; std::array elidedAttrs = { SubViewOp::getOperandSegmentSizeAttr()}; p.printOptionalAttrDict(op.getAttrs(), elidedAttrs); p << " : " << op.getOperand(0).getType() << " to " << op.getType(); } static LogicalResult verify(SubViewOp op) { auto baseType = op.getBaseMemRefType().cast(); auto subViewType = op.getType(); // The rank of the base and result subview must match. if (baseType.getRank() != subViewType.getRank()) { return op.emitError( "expected rank of result type to match rank of base type "); } // The base memref and the view memref should be in the same memory space. if (baseType.getMemorySpace() != subViewType.getMemorySpace()) return op.emitError("different memory spaces specified for base memref " "type ") << baseType << " and subview memref type " << subViewType; // Verify that the base memref type has a strided layout map. int64_t baseOffset; SmallVector baseStrides; if (failed(getStridesAndOffset(baseType, baseStrides, baseOffset))) return op.emitError("base type ") << subViewType << " is not strided"; // Verify that the result memref type has a strided layout map. int64_t subViewOffset; SmallVector subViewStrides; if (failed(getStridesAndOffset(subViewType, subViewStrides, subViewOffset))) return op.emitError("result type ") << subViewType << " is not strided"; // Num offsets should either be zero or rank of memref. if (op.getNumOffsets() != 0 && op.getNumOffsets() != subViewType.getRank()) { return op.emitError("expected number of dynamic offsets specified to match " "the rank of the result type ") << subViewType; } // Num sizes should either be zero or rank of memref. if (op.getNumSizes() != 0 && op.getNumSizes() != subViewType.getRank()) { return op.emitError("expected number of dynamic sizes specified to match " "the rank of the result type ") << subViewType; } // Num strides should either be zero or rank of memref. if (op.getNumStrides() != 0 && op.getNumStrides() != subViewType.getRank()) { return op.emitError("expected number of dynamic strides specified to match " "the rank of the result type ") << subViewType; } // Verify that if the shape of the subview type is static, then sizes are not // dynamic values, and vice versa. if ((subViewType.hasStaticShape() && op.getNumSizes() != 0) || (op.getNumSizes() == 0 && !subViewType.hasStaticShape())) { return op.emitError("invalid to specify dynamic sizes when subview result " "type is statically shaped and viceversa"); } // Verify that if dynamic sizes are specified, then the result memref type // have full dynamic dimensions. if (op.getNumSizes() > 0) { if (llvm::any_of(subViewType.getShape(), [](int64_t dim) { return dim != ShapedType::kDynamicSize; })) { // TODO: This is based on the assumption that number of size arguments are // either 0, or the rank of the result type. It is possible to have more // fine-grained verification where only particular dimensions are // dynamic. That probably needs further changes to the shape op // specification. return op.emitError("expected shape of result type to be fully dynamic " "when sizes are specified"); } } // Verify that if dynamic offsets are specified or base memref has dynamic // offset or base memref has dynamic strides, then the subview offset is // dynamic. if ((op.getNumOffsets() > 0 || baseOffset == MemRefType::getDynamicStrideOrOffset() || llvm::is_contained(baseStrides, MemRefType::getDynamicStrideOrOffset())) && subViewOffset != MemRefType::getDynamicStrideOrOffset()) { return op.emitError( "expected result memref layout map to have dynamic offset"); } // For now, verify that if dynamic strides are specified, then all the result // memref type have dynamic strides. if (op.getNumStrides() > 0) { if (llvm::any_of(subViewStrides, [](int64_t stride) { return stride != MemRefType::getDynamicStrideOrOffset(); })) { return op.emitError("expected result type to have dynamic strides"); } } // If any of the base memref has dynamic stride, then the corresponding // stride of the subview must also have dynamic stride. assert(baseStrides.size() == subViewStrides.size()); for (auto stride : enumerate(baseStrides)) { if (stride.value() == MemRefType::getDynamicStrideOrOffset() && subViewStrides[stride.index()] != MemRefType::getDynamicStrideOrOffset()) { return op.emitError( "expected result type to have dynamic stride along a dimension if " "the base memref type has dynamic stride along that dimension"); } } return success(); } raw_ostream &mlir::operator<<(raw_ostream &os, SubViewOp::Range &range) { return os << "range " << range.offset << ":" << range.size << ":" << range.stride; } SmallVector SubViewOp::getRanges() { SmallVector res; unsigned rank = getType().getRank(); res.reserve(rank); for (unsigned i = 0; i < rank; ++i) res.emplace_back(Range{*(offsets().begin() + i), *(sizes().begin() + i), *(strides().begin() + i)}); return res; } LogicalResult SubViewOp::getStaticStrides(SmallVectorImpl &staticStrides) { // If the strides are dynamic return failure. if (getNumStrides()) return failure(); // When static, the stride operands can be retrieved by taking the strides of // the result of the subview op, and dividing the strides of the base memref. int64_t resultOffset, baseOffset; SmallVector resultStrides, baseStrides; if (failed( getStridesAndOffset(getBaseMemRefType(), baseStrides, baseOffset)) || llvm::is_contained(baseStrides, MemRefType::getDynamicStrideOrOffset()) || failed(getStridesAndOffset(getType(), resultStrides, resultOffset))) return failure(); assert(static_cast(resultStrides.size()) == getType().getRank() && baseStrides.size() == resultStrides.size() && "base and result memrefs must have the same rank"); assert(!llvm::is_contained(resultStrides, MemRefType::getDynamicStrideOrOffset()) && "strides of subview op must be static, when there are no dynamic " "strides specified"); staticStrides.resize(getType().getRank()); for (auto resultStride : enumerate(resultStrides)) { auto baseStride = baseStrides[resultStride.index()]; // The result stride is expected to be a multiple of the base stride. Abort // if that is not the case. if (resultStride.value() < baseStride || resultStride.value() % baseStride != 0) return failure(); staticStrides[resultStride.index()] = resultStride.value() / baseStride; } return success(); } //===----------------------------------------------------------------------===// // AssumeAlignmentOp //===----------------------------------------------------------------------===// static LogicalResult verify(AssumeAlignmentOp op) { unsigned alignment = op.alignment().getZExtValue(); if (!llvm::isPowerOf2_32(alignment)) return op.emitOpError("alignment must be power of 2"); return success(); } namespace { /// Pattern to rewrite a subview op with constant size arguments. class SubViewOpShapeFolder final : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; PatternMatchResult matchAndRewrite(SubViewOp subViewOp, PatternRewriter &rewriter) const override { MemRefType subViewType = subViewOp.getType(); // Follow all or nothing approach for shapes for now. If all the operands // for sizes are constants then fold it into the type of the result memref. if (subViewType.hasStaticShape() || llvm::any_of(subViewOp.sizes(), [](Value operand) { return !matchPattern(operand, m_ConstantIndex()); })) { return matchFailure(); } SmallVector staticShape(subViewOp.getNumSizes()); for (auto size : llvm::enumerate(subViewOp.sizes())) { auto defOp = size.value().getDefiningOp(); assert(defOp); staticShape[size.index()] = cast(defOp).getValue(); } MemRefType newMemRefType = MemRefType::Builder(subViewType).setShape(staticShape); auto newSubViewOp = rewriter.create( subViewOp.getLoc(), subViewOp.source(), subViewOp.offsets(), ArrayRef(), subViewOp.strides(), newMemRefType); // Insert a memref_cast for compatibility of the uses of the op. rewriter.replaceOpWithNewOp(subViewOp, newSubViewOp, subViewOp.getType()); return matchSuccess(); } }; // Pattern to rewrite a subview op with constant stride arguments. class SubViewOpStrideFolder final : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; PatternMatchResult matchAndRewrite(SubViewOp subViewOp, PatternRewriter &rewriter) const override { if (subViewOp.getNumStrides() == 0) { return matchFailure(); } // Follow all or nothing approach for strides for now. If all the operands // for strides are constants then fold it into the strides of the result // memref. int64_t baseOffset, resultOffset; SmallVector baseStrides, resultStrides; MemRefType subViewType = subViewOp.getType(); if (failed(getStridesAndOffset(subViewOp.getBaseMemRefType(), baseStrides, baseOffset)) || failed(getStridesAndOffset(subViewType, resultStrides, resultOffset)) || llvm::is_contained(baseStrides, MemRefType::getDynamicStrideOrOffset()) || llvm::any_of(subViewOp.strides(), [](Value stride) { return !matchPattern(stride, m_ConstantIndex()); })) { return matchFailure(); } SmallVector staticStrides(subViewOp.getNumStrides()); for (auto stride : llvm::enumerate(subViewOp.strides())) { auto defOp = stride.value().getDefiningOp(); assert(defOp); assert(baseStrides[stride.index()] > 0); staticStrides[stride.index()] = cast(defOp).getValue() * baseStrides[stride.index()]; } AffineMap layoutMap = makeStridedLinearLayoutMap( staticStrides, resultOffset, rewriter.getContext()); MemRefType newMemRefType = MemRefType::Builder(subViewType).setAffineMaps(layoutMap); auto newSubViewOp = rewriter.create( subViewOp.getLoc(), subViewOp.source(), subViewOp.offsets(), subViewOp.sizes(), ArrayRef(), newMemRefType); // Insert a memref_cast for compatibility of the uses of the op. rewriter.replaceOpWithNewOp(subViewOp, newSubViewOp, subViewOp.getType()); return matchSuccess(); } }; // Pattern to rewrite a subview op with constant offset arguments. class SubViewOpOffsetFolder final : public OpRewritePattern { public: using OpRewritePattern::OpRewritePattern; PatternMatchResult matchAndRewrite(SubViewOp subViewOp, PatternRewriter &rewriter) const override { if (subViewOp.getNumOffsets() == 0) { return matchFailure(); } // Follow all or nothing approach for offsets for now. If all the operands // for offsets are constants then fold it into the offset of the result // memref. int64_t baseOffset, resultOffset; SmallVector baseStrides, resultStrides; MemRefType subViewType = subViewOp.getType(); if (failed(getStridesAndOffset(subViewOp.getBaseMemRefType(), baseStrides, baseOffset)) || failed(getStridesAndOffset(subViewType, resultStrides, resultOffset)) || llvm::is_contained(baseStrides, MemRefType::getDynamicStrideOrOffset()) || baseOffset == MemRefType::getDynamicStrideOrOffset() || llvm::any_of(subViewOp.offsets(), [](Value stride) { return !matchPattern(stride, m_ConstantIndex()); })) { return matchFailure(); } auto staticOffset = baseOffset; for (auto offset : llvm::enumerate(subViewOp.offsets())) { auto defOp = offset.value().getDefiningOp(); assert(defOp); assert(baseStrides[offset.index()] > 0); staticOffset += cast(defOp).getValue() * baseStrides[offset.index()]; } AffineMap layoutMap = makeStridedLinearLayoutMap( resultStrides, staticOffset, rewriter.getContext()); MemRefType newMemRefType = MemRefType::Builder(subViewType).setAffineMaps(layoutMap); auto newSubViewOp = rewriter.create( subViewOp.getLoc(), subViewOp.source(), ArrayRef(), subViewOp.sizes(), subViewOp.strides(), newMemRefType); // Insert a memref_cast for compatibility of the uses of the op. rewriter.replaceOpWithNewOp(subViewOp, newSubViewOp, subViewOp.getType()); return matchSuccess(); } }; } // end anonymous namespace void SubViewOp::getCanonicalizationPatterns(OwningRewritePatternList &results, MLIRContext *context) { results.insert(context); } //===----------------------------------------------------------------------===// // ZeroExtendIOp //===----------------------------------------------------------------------===// static LogicalResult verify(ZeroExtendIOp op) { auto srcType = getElementTypeOrSelf(op.getOperand().getType()); auto dstType = getElementTypeOrSelf(op.getType()); if (srcType.isa()) return op.emitError() << srcType << " is not a valid operand type"; if (dstType.isa()) return op.emitError() << dstType << " is not a valid result type"; if (srcType.cast().getWidth() >= dstType.cast().getWidth()) return op.emitError("result type ") << dstType << " must be wider than operand type " << srcType; return success(); } //===----------------------------------------------------------------------===// // FPExtOp //===----------------------------------------------------------------------===// bool FPExtOp::areCastCompatible(Type a, Type b) { if (auto fa = a.dyn_cast()) if (auto fb = b.dyn_cast()) return fa.getWidth() < fb.getWidth(); return false; } //===----------------------------------------------------------------------===// // FPTruncOp //===----------------------------------------------------------------------===// bool FPTruncOp::areCastCompatible(Type a, Type b) { if (auto fa = a.dyn_cast()) if (auto fb = b.dyn_cast()) return fa.getWidth() > fb.getWidth(); return false; } +//===----------------------------------------------------------------------===// +// AtomicRMWOp +//===----------------------------------------------------------------------===// + +static LogicalResult verify(AtomicRMWOp op) { + if (op.getMemRefType().getRank() != op.getNumOperands() - 2) + return op.emitOpError( + "expects the number of subscripts to be equal to memref rank"); + switch (op.kind()) { + case AtomicRMWKind::addf: + case AtomicRMWKind::maxf: + case AtomicRMWKind::minf: + case AtomicRMWKind::mulf: + if (!op.value().getType().isa()) + return op.emitOpError() + << "with kind '" << stringifyAtomicRMWKind(op.kind()) + << "' expects a floating-point type"; + break; + case AtomicRMWKind::addi: + case AtomicRMWKind::maxs: + case AtomicRMWKind::maxu: + case AtomicRMWKind::mins: + case AtomicRMWKind::minu: + case AtomicRMWKind::muli: + if (!op.value().getType().isa()) + return op.emitOpError() + << "with kind '" << stringifyAtomicRMWKind(op.kind()) + << "' expects an integer type"; + break; + default: + break; + } + return success(); +} + //===----------------------------------------------------------------------===// // TableGen'd op method definitions //===----------------------------------------------------------------------===// #define GET_OP_CLASSES #include "mlir/Dialect/StandardOps/IR/Ops.cpp.inc" diff --git a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir index 8839514937e0..27c249372b15 100644 --- a/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/StandardToLLVM/convert-to-llvmir.mlir @@ -1,872 +1,912 @@ // RUN: mlir-opt -convert-std-to-llvm %s -split-input-file | FileCheck %s // CHECK-LABEL: func @empty() { // CHECK-NEXT: llvm.return // CHECK-NEXT: } func @empty() { ^bb0: return } // CHECK-LABEL: func @body(!llvm.i64) func @body(index) // CHECK-LABEL: func @simple_loop() { func @simple_loop() { ^bb0: // CHECK-NEXT: llvm.br ^bb1 br ^bb1 // CHECK-NEXT: ^bb1: // pred: ^bb0 // CHECK-NEXT: {{.*}} = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: {{.*}} = llvm.mlir.constant(42 : index) : !llvm.i64 // CHECK-NEXT: llvm.br ^bb2({{.*}} : !llvm.i64) ^bb1: // pred: ^bb0 %c1 = constant 1 : index %c42 = constant 42 : index br ^bb2(%c1 : index) // CHECK: ^bb2({{.*}}: !llvm.i64): // 2 preds: ^bb1, ^bb3 // CHECK-NEXT: {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : !llvm.i64 // CHECK-NEXT: llvm.cond_br {{.*}}, ^bb3, ^bb4 ^bb2(%0: index): // 2 preds: ^bb1, ^bb3 %1 = cmpi "slt", %0, %c42 : index cond_br %1, ^bb3, ^bb4 // CHECK: ^bb3: // pred: ^bb2 // CHECK-NEXT: llvm.call @body({{.*}}) : (!llvm.i64) -> () // CHECK-NEXT: {{.*}} = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: {{.*}} = llvm.add {{.*}}, {{.*}} : !llvm.i64 // CHECK-NEXT: llvm.br ^bb2({{.*}} : !llvm.i64) ^bb3: // pred: ^bb2 call @body(%0) : (index) -> () %c1_0 = constant 1 : index %2 = addi %0, %c1_0 : index br ^bb2(%2 : index) // CHECK: ^bb4: // pred: ^bb2 // CHECK-NEXT: llvm.return ^bb4: // pred: ^bb2 return } // CHECK-LABEL: func @simple_caller() { // CHECK-NEXT: llvm.call @simple_loop() : () -> () // CHECK-NEXT: llvm.return // CHECK-NEXT: } func @simple_caller() { ^bb0: call @simple_loop() : () -> () return } // Check that function call attributes persist during conversion. // CHECK-LABEL: @call_with_attributes func @call_with_attributes() { // CHECK: llvm.call @simple_loop() {baz = [1, 2, 3, 4], foo = "bar"} : () -> () call @simple_loop() {foo="bar", baz=[1,2,3,4]} : () -> () return } // CHECK-LABEL: func @ml_caller() { // CHECK-NEXT: llvm.call @simple_loop() : () -> () // CHECK-NEXT: llvm.call @more_imperfectly_nested_loops() : () -> () // CHECK-NEXT: llvm.return // CHECK-NEXT: } func @ml_caller() { ^bb0: call @simple_loop() : () -> () call @more_imperfectly_nested_loops() : () -> () return } // CHECK-LABEL: func @body_args(!llvm.i64) -> !llvm.i64 func @body_args(index) -> index // CHECK-LABEL: func @other(!llvm.i64, !llvm.i32) -> !llvm.i32 func @other(index, i32) -> i32 // CHECK-LABEL: func @func_args(%arg0: !llvm.i32, %arg1: !llvm.i32) -> !llvm.i32 { // CHECK-NEXT: {{.*}} = llvm.mlir.constant(0 : i32) : !llvm.i32 // CHECK-NEXT: llvm.br ^bb1 func @func_args(i32, i32) -> i32 { ^bb0(%arg0: i32, %arg1: i32): %c0_i32 = constant 0 : i32 br ^bb1 // CHECK-NEXT: ^bb1: // pred: ^bb0 // CHECK-NEXT: {{.*}} = llvm.mlir.constant(0 : index) : !llvm.i64 // CHECK-NEXT: {{.*}} = llvm.mlir.constant(42 : index) : !llvm.i64 // CHECK-NEXT: llvm.br ^bb2({{.*}} : !llvm.i64) ^bb1: // pred: ^bb0 %c0 = constant 0 : index %c42 = constant 42 : index br ^bb2(%c0 : index) // CHECK-NEXT: ^bb2({{.*}}: !llvm.i64): // 2 preds: ^bb1, ^bb3 // CHECK-NEXT: {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : !llvm.i64 // CHECK-NEXT: llvm.cond_br {{.*}}, ^bb3, ^bb4 ^bb2(%0: index): // 2 preds: ^bb1, ^bb3 %1 = cmpi "slt", %0, %c42 : index cond_br %1, ^bb3, ^bb4 // CHECK-NEXT: ^bb3: // pred: ^bb2 // CHECK-NEXT: {{.*}} = llvm.call @body_args({{.*}}) : (!llvm.i64) -> !llvm.i64 // CHECK-NEXT: {{.*}} = llvm.call @other({{.*}}, %arg0) : (!llvm.i64, !llvm.i32) -> !llvm.i32 // CHECK-NEXT: {{.*}} = llvm.call @other({{.*}}, {{.*}}) : (!llvm.i64, !llvm.i32) -> !llvm.i32 // CHECK-NEXT: {{.*}} = llvm.call @other({{.*}}, %arg1) : (!llvm.i64, !llvm.i32) -> !llvm.i32 // CHECK-NEXT: {{.*}} = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: {{.*}} = llvm.add {{.*}}, {{.*}} : !llvm.i64 // CHECK-NEXT: llvm.br ^bb2({{.*}} : !llvm.i64) ^bb3: // pred: ^bb2 %2 = call @body_args(%0) : (index) -> index %3 = call @other(%2, %arg0) : (index, i32) -> i32 %4 = call @other(%2, %3) : (index, i32) -> i32 %5 = call @other(%2, %arg1) : (index, i32) -> i32 %c1 = constant 1 : index %6 = addi %0, %c1 : index br ^bb2(%6 : index) // CHECK-NEXT: ^bb4: // pred: ^bb2 // CHECK-NEXT: {{.*}} = llvm.mlir.constant(0 : index) : !llvm.i64 // CHECK-NEXT: {{.*}} = llvm.call @other({{.*}}, {{.*}}) : (!llvm.i64, !llvm.i32) -> !llvm.i32 // CHECK-NEXT: llvm.return {{.*}} : !llvm.i32 ^bb4: // pred: ^bb2 %c0_0 = constant 0 : index %7 = call @other(%c0_0, %c0_i32) : (index, i32) -> i32 return %7 : i32 } // CHECK-LABEL: func @pre(!llvm.i64) func @pre(index) // CHECK-LABEL: func @body2(!llvm.i64, !llvm.i64) func @body2(index, index) // CHECK-LABEL: func @post(!llvm.i64) func @post(index) // CHECK-LABEL: func @imperfectly_nested_loops() { // CHECK-NEXT: llvm.br ^bb1 func @imperfectly_nested_loops() { ^bb0: br ^bb1 // CHECK-NEXT: ^bb1: // pred: ^bb0 // CHECK-NEXT: {{.*}} = llvm.mlir.constant(0 : index) : !llvm.i64 // CHECK-NEXT: {{.*}} = llvm.mlir.constant(42 : index) : !llvm.i64 // CHECK-NEXT: llvm.br ^bb2({{.*}} : !llvm.i64) ^bb1: // pred: ^bb0 %c0 = constant 0 : index %c42 = constant 42 : index br ^bb2(%c0 : index) // CHECK-NEXT: ^bb2({{.*}}: !llvm.i64): // 2 preds: ^bb1, ^bb7 // CHECK-NEXT: {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : !llvm.i64 // CHECK-NEXT: llvm.cond_br {{.*}}, ^bb3, ^bb8 ^bb2(%0: index): // 2 preds: ^bb1, ^bb7 %1 = cmpi "slt", %0, %c42 : index cond_br %1, ^bb3, ^bb8 // CHECK-NEXT: ^bb3: // CHECK-NEXT: llvm.call @pre({{.*}}) : (!llvm.i64) -> () // CHECK-NEXT: llvm.br ^bb4 ^bb3: // pred: ^bb2 call @pre(%0) : (index) -> () br ^bb4 // CHECK-NEXT: ^bb4: // pred: ^bb3 // CHECK-NEXT: {{.*}} = llvm.mlir.constant(7 : index) : !llvm.i64 // CHECK-NEXT: {{.*}} = llvm.mlir.constant(56 : index) : !llvm.i64 // CHECK-NEXT: llvm.br ^bb5({{.*}} : !llvm.i64) ^bb4: // pred: ^bb3 %c7 = constant 7 : index %c56 = constant 56 : index br ^bb5(%c7 : index) // CHECK-NEXT: ^bb5({{.*}}: !llvm.i64): // 2 preds: ^bb4, ^bb6 // CHECK-NEXT: {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : !llvm.i64 // CHECK-NEXT: llvm.cond_br {{.*}}, ^bb6, ^bb7 ^bb5(%2: index): // 2 preds: ^bb4, ^bb6 %3 = cmpi "slt", %2, %c56 : index cond_br %3, ^bb6, ^bb7 // CHECK-NEXT: ^bb6: // pred: ^bb5 // CHECK-NEXT: llvm.call @body2({{.*}}, {{.*}}) : (!llvm.i64, !llvm.i64) -> () // CHECK-NEXT: {{.*}} = llvm.mlir.constant(2 : index) : !llvm.i64 // CHECK-NEXT: {{.*}} = llvm.add {{.*}}, {{.*}} : !llvm.i64 // CHECK-NEXT: llvm.br ^bb5({{.*}} : !llvm.i64) ^bb6: // pred: ^bb5 call @body2(%0, %2) : (index, index) -> () %c2 = constant 2 : index %4 = addi %2, %c2 : index br ^bb5(%4 : index) // CHECK-NEXT: ^bb7: // pred: ^bb5 // CHECK-NEXT: llvm.call @post({{.*}}) : (!llvm.i64) -> () // CHECK-NEXT: {{.*}} = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: {{.*}} = llvm.add {{.*}}, {{.*}} : !llvm.i64 // CHECK-NEXT: llvm.br ^bb2({{.*}} : !llvm.i64) ^bb7: // pred: ^bb5 call @post(%0) : (index) -> () %c1 = constant 1 : index %5 = addi %0, %c1 : index br ^bb2(%5 : index) // CHECK-NEXT: ^bb8: // pred: ^bb2 // CHECK-NEXT: llvm.return ^bb8: // pred: ^bb2 return } // CHECK-LABEL: func @mid(!llvm.i64) func @mid(index) // CHECK-LABEL: func @body3(!llvm.i64, !llvm.i64) func @body3(index, index) // A complete function transformation check. // CHECK-LABEL: func @more_imperfectly_nested_loops() { // CHECK-NEXT: llvm.br ^bb1 // CHECK-NEXT:^bb1: // pred: ^bb0 // CHECK-NEXT: {{.*}} = llvm.mlir.constant(0 : index) : !llvm.i64 // CHECK-NEXT: {{.*}} = llvm.mlir.constant(42 : index) : !llvm.i64 // CHECK-NEXT: llvm.br ^bb2({{.*}} : !llvm.i64) // CHECK-NEXT:^bb2({{.*}}: !llvm.i64): // 2 preds: ^bb1, ^bb11 // CHECK-NEXT: {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : !llvm.i64 // CHECK-NEXT: llvm.cond_br {{.*}}, ^bb3, ^bb12 // CHECK-NEXT:^bb3: // pred: ^bb2 // CHECK-NEXT: llvm.call @pre({{.*}}) : (!llvm.i64) -> () // CHECK-NEXT: llvm.br ^bb4 // CHECK-NEXT:^bb4: // pred: ^bb3 // CHECK-NEXT: {{.*}} = llvm.mlir.constant(7 : index) : !llvm.i64 // CHECK-NEXT: {{.*}} = llvm.mlir.constant(56 : index) : !llvm.i64 // CHECK-NEXT: llvm.br ^bb5({{.*}} : !llvm.i64) // CHECK-NEXT:^bb5({{.*}}: !llvm.i64): // 2 preds: ^bb4, ^bb6 // CHECK-NEXT: {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : !llvm.i64 // CHECK-NEXT: llvm.cond_br {{.*}}, ^bb6, ^bb7 // CHECK-NEXT:^bb6: // pred: ^bb5 // CHECK-NEXT: llvm.call @body2({{.*}}, {{.*}}) : (!llvm.i64, !llvm.i64) -> () // CHECK-NEXT: {{.*}} = llvm.mlir.constant(2 : index) : !llvm.i64 // CHECK-NEXT: {{.*}} = llvm.add {{.*}}, {{.*}} : !llvm.i64 // CHECK-NEXT: llvm.br ^bb5({{.*}} : !llvm.i64) // CHECK-NEXT:^bb7: // pred: ^bb5 // CHECK-NEXT: llvm.call @mid({{.*}}) : (!llvm.i64) -> () // CHECK-NEXT: llvm.br ^bb8 // CHECK-NEXT:^bb8: // pred: ^bb7 // CHECK-NEXT: {{.*}} = llvm.mlir.constant(18 : index) : !llvm.i64 // CHECK-NEXT: {{.*}} = llvm.mlir.constant(37 : index) : !llvm.i64 // CHECK-NEXT: llvm.br ^bb9({{.*}} : !llvm.i64) // CHECK-NEXT:^bb9({{.*}}: !llvm.i64): // 2 preds: ^bb8, ^bb10 // CHECK-NEXT: {{.*}} = llvm.icmp "slt" {{.*}}, {{.*}} : !llvm.i64 // CHECK-NEXT: llvm.cond_br {{.*}}, ^bb10, ^bb11 // CHECK-NEXT:^bb10: // pred: ^bb9 // CHECK-NEXT: llvm.call @body3({{.*}}, {{.*}}) : (!llvm.i64, !llvm.i64) -> () // CHECK-NEXT: {{.*}} = llvm.mlir.constant(3 : index) : !llvm.i64 // CHECK-NEXT: {{.*}} = llvm.add {{.*}}, {{.*}} : !llvm.i64 // CHECK-NEXT: llvm.br ^bb9({{.*}} : !llvm.i64) // CHECK-NEXT:^bb11: // pred: ^bb9 // CHECK-NEXT: llvm.call @post({{.*}}) : (!llvm.i64) -> () // CHECK-NEXT: {{.*}} = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK-NEXT: {{.*}} = llvm.add {{.*}}, {{.*}} : !llvm.i64 // CHECK-NEXT: llvm.br ^bb2({{.*}} : !llvm.i64) // CHECK-NEXT:^bb12: // pred: ^bb2 // CHECK-NEXT: llvm.return // CHECK-NEXT: } func @more_imperfectly_nested_loops() { ^bb0: br ^bb1 ^bb1: // pred: ^bb0 %c0 = constant 0 : index %c42 = constant 42 : index br ^bb2(%c0 : index) ^bb2(%0: index): // 2 preds: ^bb1, ^bb11 %1 = cmpi "slt", %0, %c42 : index cond_br %1, ^bb3, ^bb12 ^bb3: // pred: ^bb2 call @pre(%0) : (index) -> () br ^bb4 ^bb4: // pred: ^bb3 %c7 = constant 7 : index %c56 = constant 56 : index br ^bb5(%c7 : index) ^bb5(%2: index): // 2 preds: ^bb4, ^bb6 %3 = cmpi "slt", %2, %c56 : index cond_br %3, ^bb6, ^bb7 ^bb6: // pred: ^bb5 call @body2(%0, %2) : (index, index) -> () %c2 = constant 2 : index %4 = addi %2, %c2 : index br ^bb5(%4 : index) ^bb7: // pred: ^bb5 call @mid(%0) : (index) -> () br ^bb8 ^bb8: // pred: ^bb7 %c18 = constant 18 : index %c37 = constant 37 : index br ^bb9(%c18 : index) ^bb9(%5: index): // 2 preds: ^bb8, ^bb10 %6 = cmpi "slt", %5, %c37 : index cond_br %6, ^bb10, ^bb11 ^bb10: // pred: ^bb9 call @body3(%0, %5) : (index, index) -> () %c3 = constant 3 : index %7 = addi %5, %c3 : index br ^bb9(%7 : index) ^bb11: // pred: ^bb9 call @post(%0) : (index) -> () %c1 = constant 1 : index %8 = addi %0, %c1 : index br ^bb2(%8 : index) ^bb12: // pred: ^bb2 return } // CHECK-LABEL: func @get_i64() -> !llvm.i64 func @get_i64() -> (i64) // CHECK-LABEL: func @get_f32() -> !llvm.float func @get_f32() -> (f32) // CHECK-LABEL: func @get_memref() -> !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }"> func @get_memref() -> (memref<42x?x10x?xf32>) // CHECK-LABEL: func @multireturn() -> !llvm<"{ i64, float, { float*, float*, i64, [4 x i64], [4 x i64] } }"> { func @multireturn() -> (i64, f32, memref<42x?x10x?xf32>) { ^bb0: // CHECK-NEXT: {{.*}} = llvm.call @get_i64() : () -> !llvm.i64 // CHECK-NEXT: {{.*}} = llvm.call @get_f32() : () -> !llvm.float // CHECK-NEXT: {{.*}} = llvm.call @get_memref() : () -> !llvm<"{ float*, float*, i64, [4 x i64], [4 x i64] }"> %0 = call @get_i64() : () -> (i64) %1 = call @get_f32() : () -> (f32) %2 = call @get_memref() : () -> (memref<42x?x10x?xf32>) // CHECK-NEXT: {{.*}} = llvm.mlir.undef : !llvm<"{ i64, float, { float*, float*, i64, [4 x i64], [4 x i64] } }"> // CHECK-NEXT: {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"{ i64, float, { float*, float*, i64, [4 x i64], [4 x i64] } }"> // CHECK-NEXT: {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"{ i64, float, { float*, float*, i64, [4 x i64], [4 x i64] } }"> // CHECK-NEXT: {{.*}} = llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"{ i64, float, { float*, float*, i64, [4 x i64], [4 x i64] } }"> // CHECK-NEXT: llvm.return {{.*}} : !llvm<"{ i64, float, { float*, float*, i64, [4 x i64], [4 x i64] } }"> return %0, %1, %2 : i64, f32, memref<42x?x10x?xf32> } // CHECK-LABEL: func @multireturn_caller() { func @multireturn_caller() { ^bb0: // CHECK-NEXT: {{.*}} = llvm.call @multireturn() : () -> !llvm<"{ i64, float, { float*, float*, i64, [4 x i64], [4 x i64] } }"> // CHECK-NEXT: {{.*}} = llvm.extractvalue {{.*}}[0] : !llvm<"{ i64, float, { float*, float*, i64, [4 x i64], [4 x i64] } }"> // CHECK-NEXT: {{.*}} = llvm.extractvalue {{.*}}[1] : !llvm<"{ i64, float, { float*, float*, i64, [4 x i64], [4 x i64] } }"> // CHECK-NEXT: {{.*}} = llvm.extractvalue {{.*}}[2] : !llvm<"{ i64, float, { float*, float*, i64, [4 x i64], [4 x i64] } }"> %0:3 = call @multireturn() : () -> (i64, f32, memref<42x?x10x?xf32>) %1 = constant 42 : i64 // CHECK: {{.*}} = llvm.add {{.*}}, {{.*}} : !llvm.i64 %2 = addi %0#0, %1 : i64 %3 = constant 42.0 : f32 // CHECK: {{.*}} = llvm.fadd {{.*}}, {{.*}} : !llvm.float %4 = addf %0#1, %3 : f32 %5 = constant 0 : index return } // CHECK-LABEL: func @vector_ops(%arg0: !llvm<"<4 x float>">, %arg1: !llvm<"<4 x i1>">, %arg2: !llvm<"<4 x i64>">, %arg3: !llvm<"<4 x i64>">) -> !llvm<"<4 x float>"> { func @vector_ops(%arg0: vector<4xf32>, %arg1: vector<4xi1>, %arg2: vector<4xi64>, %arg3: vector<4xi64>) -> vector<4xf32> { // CHECK-NEXT: %0 = llvm.mlir.constant(dense<4.200000e+01> : vector<4xf32>) : !llvm<"<4 x float>"> %0 = constant dense<42.> : vector<4xf32> // CHECK-NEXT: %1 = llvm.fadd %arg0, %0 : !llvm<"<4 x float>"> %1 = addf %arg0, %0 : vector<4xf32> // CHECK-NEXT: %2 = llvm.sdiv %arg2, %arg2 : !llvm<"<4 x i64>"> %3 = divi_signed %arg2, %arg2 : vector<4xi64> // CHECK-NEXT: %3 = llvm.udiv %arg2, %arg2 : !llvm<"<4 x i64>"> %4 = divi_unsigned %arg2, %arg2 : vector<4xi64> // CHECK-NEXT: %4 = llvm.srem %arg2, %arg2 : !llvm<"<4 x i64>"> %5 = remi_signed %arg2, %arg2 : vector<4xi64> // CHECK-NEXT: %5 = llvm.urem %arg2, %arg2 : !llvm<"<4 x i64>"> %6 = remi_unsigned %arg2, %arg2 : vector<4xi64> // CHECK-NEXT: %6 = llvm.fdiv %arg0, %0 : !llvm<"<4 x float>"> %7 = divf %arg0, %0 : vector<4xf32> // CHECK-NEXT: %7 = llvm.frem %arg0, %0 : !llvm<"<4 x float>"> %8 = remf %arg0, %0 : vector<4xf32> // CHECK-NEXT: %8 = llvm.and %arg2, %arg3 : !llvm<"<4 x i64>"> %9 = and %arg2, %arg3 : vector<4xi64> // CHECK-NEXT: %9 = llvm.or %arg2, %arg3 : !llvm<"<4 x i64>"> %10 = or %arg2, %arg3 : vector<4xi64> // CHECK-NEXT: %10 = llvm.xor %arg2, %arg3 : !llvm<"<4 x i64>"> %11 = xor %arg2, %arg3 : vector<4xi64> // CHECK-NEXT: %11 = llvm.shl %arg2, %arg2 : !llvm<"<4 x i64>"> %12 = shift_left %arg2, %arg2 : vector<4xi64> // CHECK-NEXT: %12 = llvm.ashr %arg2, %arg2 : !llvm<"<4 x i64>"> %13 = shift_right_signed %arg2, %arg2 : vector<4xi64> // CHECK-NEXT: %13 = llvm.lshr %arg2, %arg2 : !llvm<"<4 x i64>"> %14 = shift_right_unsigned %arg2, %arg2 : vector<4xi64> return %1 : vector<4xf32> } // CHECK-LABEL: @ops func @ops(f32, f32, i32, i32, f64) -> (f32, i32) { ^bb0(%arg0: f32, %arg1: f32, %arg2: i32, %arg3: i32, %arg4: f64): // CHECK-NEXT: %0 = llvm.fsub %arg0, %arg1 : !llvm.float %0 = subf %arg0, %arg1: f32 // CHECK-NEXT: %1 = llvm.sub %arg2, %arg3 : !llvm.i32 %1 = subi %arg2, %arg3: i32 // CHECK-NEXT: %2 = llvm.icmp "slt" %arg2, %1 : !llvm.i32 %2 = cmpi "slt", %arg2, %1 : i32 // CHECK-NEXT: %3 = llvm.sdiv %arg2, %arg3 : !llvm.i32 %4 = divi_signed %arg2, %arg3 : i32 // CHECK-NEXT: %4 = llvm.udiv %arg2, %arg3 : !llvm.i32 %5 = divi_unsigned %arg2, %arg3 : i32 // CHECK-NEXT: %5 = llvm.srem %arg2, %arg3 : !llvm.i32 %6 = remi_signed %arg2, %arg3 : i32 // CHECK-NEXT: %6 = llvm.urem %arg2, %arg3 : !llvm.i32 %7 = remi_unsigned %arg2, %arg3 : i32 // CHECK-NEXT: %7 = llvm.select %2, %arg2, %arg3 : !llvm.i1, !llvm.i32 %8 = select %2, %arg2, %arg3 : i32 // CHECK-NEXT: %8 = llvm.fdiv %arg0, %arg1 : !llvm.float %9 = divf %arg0, %arg1 : f32 // CHECK-NEXT: %9 = llvm.frem %arg0, %arg1 : !llvm.float %10 = remf %arg0, %arg1 : f32 // CHECK-NEXT: %10 = llvm.and %arg2, %arg3 : !llvm.i32 %11 = and %arg2, %arg3 : i32 // CHECK-NEXT: %11 = llvm.or %arg2, %arg3 : !llvm.i32 %12 = or %arg2, %arg3 : i32 // CHECK-NEXT: %12 = llvm.xor %arg2, %arg3 : !llvm.i32 %13 = xor %arg2, %arg3 : i32 // CHECK-NEXT: %13 = "llvm.intr.exp"(%arg0) : (!llvm.float) -> !llvm.float %14 = std.exp %arg0 : f32 // CHECK-NEXT: %14 = llvm.call @tanhf(%arg0) : (!llvm.float) -> !llvm.float %15 = std.tanh %arg0 : f32 // CHECK-NEXT: %15 = llvm.mlir.constant(7.900000e-01 : f64) : !llvm.double %16 = constant 7.9e-01 : f64 // CHECK-NEXT: %16 = llvm.call @tanh(%15) : (!llvm.double) -> !llvm.double %17 = std.tanh %16 : f64 // CHECK-NEXT: %17 = llvm.shl %arg2, %arg3 : !llvm.i32 %18 = shift_left %arg2, %arg3 : i32 // CHECK-NEXT: %18 = llvm.ashr %arg2, %arg3 : !llvm.i32 %19 = shift_right_signed %arg2, %arg3 : i32 // CHECK-NEXT: %19 = llvm.lshr %arg2, %arg3 : !llvm.i32 %20 = shift_right_unsigned %arg2, %arg3 : i32 // CHECK-NEXT: %{{[0-9]+}} = "llvm.intr.sqrt"(%arg0) : (!llvm.float) -> !llvm.float %21 = std.sqrt %arg0 : f32 // CHECK-NEXT: %{{[0-9]+}} = "llvm.intr.sqrt"(%arg4) : (!llvm.double) -> !llvm.double %22 = std.sqrt %arg4 : f64 return %0, %4 : f32, i32 } // Checking conversion of index types to integers using i1, assuming no target // system would have a 1-bit address space. Otherwise, we would have had to // make this test dependent on the pointer size on the target system. // CHECK-LABEL: @index_cast func @index_cast(%arg0: index, %arg1: i1) { // CHECK-NEXT: = llvm.trunc %arg0 : !llvm.i{{.*}} to !llvm.i1 %0 = index_cast %arg0: index to i1 // CHECK-NEXT: = llvm.sext %arg1 : !llvm.i1 to !llvm.i{{.*}} %1 = index_cast %arg1: i1 to index return } // Checking conversion of integer types to floating point. // CHECK-LABEL: @sitofp func @sitofp(%arg0 : i32, %arg1 : i64) { // CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.i{{.*}} to !llvm.float %0 = sitofp %arg0: i32 to f32 // CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.i{{.*}} to !llvm.double %1 = sitofp %arg0: i32 to f64 // CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.i{{.*}} to !llvm.float %2 = sitofp %arg1: i64 to f32 // CHECK-NEXT: = llvm.sitofp {{.*}} : !llvm.i{{.*}} to !llvm.double %3 = sitofp %arg1: i64 to f64 return } // Checking conversion of integer types to floating point. // CHECK-LABEL: @fpext func @fpext(%arg0 : f16, %arg1 : f32) { // CHECK-NEXT: = llvm.fpext {{.*}} : !llvm.half to !llvm.float %0 = fpext %arg0: f16 to f32 // CHECK-NEXT: = llvm.fpext {{.*}} : !llvm.half to !llvm.double %1 = fpext %arg0: f16 to f64 // CHECK-NEXT: = llvm.fpext {{.*}} : !llvm.float to !llvm.double %2 = fpext %arg1: f32 to f64 return } // Checking conversion of integer types to floating point. // CHECK-LABEL: @fptrunc func @fptrunc(%arg0 : f32, %arg1 : f64) { // CHECK-NEXT: = llvm.fptrunc {{.*}} : !llvm.float to !llvm.half %0 = fptrunc %arg0: f32 to f16 // CHECK-NEXT: = llvm.fptrunc {{.*}} : !llvm.double to !llvm.half %1 = fptrunc %arg1: f64 to f16 // CHECK-NEXT: = llvm.fptrunc {{.*}} : !llvm.double to !llvm.float %2 = fptrunc %arg1: f64 to f32 return } // Check sign and zero extension and truncation of integers. // CHECK-LABEL: @integer_extension_and_truncation func @integer_extension_and_truncation() { // CHECK-NEXT: %0 = llvm.mlir.constant(-3 : i3) : !llvm.i3 %0 = constant 5 : i3 // CHECK-NEXT: = llvm.sext %0 : !llvm.i3 to !llvm.i6 %1 = sexti %0 : i3 to i6 // CHECK-NEXT: = llvm.zext %0 : !llvm.i3 to !llvm.i6 %2 = zexti %0 : i3 to i6 // CHECK-NEXT: = llvm.trunc %0 : !llvm.i3 to !llvm.i2 %3 = trunci %0 : i3 to i2 return } // CHECK-LABEL: @dfs_block_order func @dfs_block_order(%arg0: i32) -> (i32) { // CHECK-NEXT: %[[CST:.*]] = llvm.mlir.constant(42 : i32) : !llvm.i32 %0 = constant 42 : i32 // CHECK-NEXT: llvm.br ^bb2 br ^bb2 // CHECK-NEXT: ^bb1: // CHECK-NEXT: %[[ADD:.*]] = llvm.add %arg0, %[[CST]] : !llvm.i32 // CHECK-NEXT: llvm.return %[[ADD]] : !llvm.i32 ^bb1: %2 = addi %arg0, %0 : i32 return %2 : i32 // CHECK-NEXT: ^bb2: ^bb2: // CHECK-NEXT: llvm.br ^bb1 br ^bb1 } // CHECK-LABEL: func @cond_br_same_target(%arg0: !llvm.i1, %arg1: !llvm.i32, %arg2: !llvm.i32) func @cond_br_same_target(%arg0: i1, %arg1: i32, %arg2 : i32) -> (i32) { // CHECK-NEXT: llvm.cond_br %arg0, ^[[origBlock:bb[0-9]+]](%arg1 : !llvm.i32), ^[[dummyBlock:bb[0-9]+]] cond_br %arg0, ^bb1(%arg1 : i32), ^bb1(%arg2 : i32) // CHECK: ^[[origBlock]](%0: !llvm.i32): // CHECK-NEXT: llvm.return %0 : !llvm.i32 ^bb1(%0 : i32): return %0 : i32 // CHECK: ^[[dummyBlock]]: // CHECK-NEXT: llvm.br ^[[origBlock]](%arg2 : !llvm.i32) } // CHECK-LABEL: func @fcmp(%arg0: !llvm.float, %arg1: !llvm.float) { func @fcmp(f32, f32) -> () { ^bb0(%arg0: f32, %arg1: f32): // CHECK: llvm.fcmp "oeq" %arg0, %arg1 : !llvm.float // CHECK-NEXT: llvm.fcmp "ogt" %arg0, %arg1 : !llvm.float // CHECK-NEXT: llvm.fcmp "oge" %arg0, %arg1 : !llvm.float // CHECK-NEXT: llvm.fcmp "olt" %arg0, %arg1 : !llvm.float // CHECK-NEXT: llvm.fcmp "ole" %arg0, %arg1 : !llvm.float // CHECK-NEXT: llvm.fcmp "one" %arg0, %arg1 : !llvm.float // CHECK-NEXT: llvm.fcmp "ord" %arg0, %arg1 : !llvm.float // CHECK-NEXT: llvm.fcmp "ueq" %arg0, %arg1 : !llvm.float // CHECK-NEXT: llvm.fcmp "ugt" %arg0, %arg1 : !llvm.float // CHECK-NEXT: llvm.fcmp "uge" %arg0, %arg1 : !llvm.float // CHECK-NEXT: llvm.fcmp "ult" %arg0, %arg1 : !llvm.float // CHECK-NEXT: llvm.fcmp "ule" %arg0, %arg1 : !llvm.float // CHECK-NEXT: llvm.fcmp "une" %arg0, %arg1 : !llvm.float // CHECK-NEXT: llvm.fcmp "uno" %arg0, %arg1 : !llvm.float // CHECK-NEXT: llvm.return %1 = cmpf "oeq", %arg0, %arg1 : f32 %2 = cmpf "ogt", %arg0, %arg1 : f32 %3 = cmpf "oge", %arg0, %arg1 : f32 %4 = cmpf "olt", %arg0, %arg1 : f32 %5 = cmpf "ole", %arg0, %arg1 : f32 %6 = cmpf "one", %arg0, %arg1 : f32 %7 = cmpf "ord", %arg0, %arg1 : f32 %8 = cmpf "ueq", %arg0, %arg1 : f32 %9 = cmpf "ugt", %arg0, %arg1 : f32 %10 = cmpf "uge", %arg0, %arg1 : f32 %11 = cmpf "ult", %arg0, %arg1 : f32 %12 = cmpf "ule", %arg0, %arg1 : f32 %13 = cmpf "une", %arg0, %arg1 : f32 %14 = cmpf "uno", %arg0, %arg1 : f32 return } // CHECK-LABEL: @vec_bin func @vec_bin(%arg0: vector<2x2x2xf32>) -> vector<2x2x2xf32> { %0 = addf %arg0, %arg0 : vector<2x2x2xf32> return %0 : vector<2x2x2xf32> // CHECK-NEXT: llvm.mlir.undef : !llvm<"[2 x [2 x <2 x float>]]"> // This block appears 2x2 times // CHECK-NEXT: llvm.extractvalue %{{.*}}[0, 0] : !llvm<"[2 x [2 x <2 x float>]]"> // CHECK-NEXT: llvm.extractvalue %{{.*}}[0, 0] : !llvm<"[2 x [2 x <2 x float>]]"> // CHECK-NEXT: llvm.fadd %{{.*}} : !llvm<"<2 x float>"> // CHECK-NEXT: llvm.insertvalue %{{.*}}[0, 0] : !llvm<"[2 x [2 x <2 x float>]]"> // We check the proper indexing of extract/insert in the remaining 3 positions. // CHECK: llvm.extractvalue %{{.*}}[0, 1] : !llvm<"[2 x [2 x <2 x float>]]"> // CHECK: llvm.insertvalue %{{.*}}[0, 1] : !llvm<"[2 x [2 x <2 x float>]]"> // CHECK: llvm.extractvalue %{{.*}}[1, 0] : !llvm<"[2 x [2 x <2 x float>]]"> // CHECK: llvm.insertvalue %{{.*}}[1, 0] : !llvm<"[2 x [2 x <2 x float>]]"> // CHECK: llvm.extractvalue %{{.*}}[1, 1] : !llvm<"[2 x [2 x <2 x float>]]"> // CHECK: llvm.insertvalue %{{.*}}[1, 1] : !llvm<"[2 x [2 x <2 x float>]]"> // And we're done // CHECK-NEXT: return } // CHECK-LABEL: @splat // CHECK-SAME: %[[A:arg[0-9]+]]: !llvm<"<4 x float>"> // CHECK-SAME: %[[ELT:arg[0-9]+]]: !llvm.float func @splat(%a: vector<4xf32>, %b: f32) -> vector<4xf32> { %vb = splat %b : vector<4xf32> %r = mulf %a, %vb : vector<4xf32> return %r : vector<4xf32> } // CHECK-NEXT: %[[UNDEF:[0-9]+]] = llvm.mlir.undef : !llvm<"<4 x float>"> // CHECK-NEXT: %[[ZERO:[0-9]+]] = llvm.mlir.constant(0 : i32) : !llvm.i32 // CHECK-NEXT: %[[V:[0-9]+]] = llvm.insertelement %[[ELT]], %[[UNDEF]][%[[ZERO]] : !llvm.i32] : !llvm<"<4 x float>"> // CHECK-NEXT: %[[SPLAT:[0-9]+]] = llvm.shufflevector %[[V]], %[[UNDEF]] [0 : i32, 0 : i32, 0 : i32, 0 : i32] // CHECK-NEXT: %[[SCALE:[0-9]+]] = llvm.fmul %[[A]], %[[SPLAT]] : !llvm<"<4 x float>"> // CHECK-NEXT: llvm.return %[[SCALE]] : !llvm<"<4 x float>"> // CHECK-LABEL: func @view( // CHECK: %[[ARG0:.*]]: !llvm.i64, %[[ARG1:.*]]: !llvm.i64, %[[ARG2:.*]]: !llvm.i64 func @view(%arg0 : index, %arg1 : index, %arg2 : index) { // CHECK: llvm.mlir.constant(2048 : index) : !llvm.i64 // CHECK: llvm.mlir.undef : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }"> %0 = alloc() : memref<2048xi8> // Test two dynamic sizes and dynamic offset. // CHECK: llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }"> // CHECK: llvm.bitcast %{{.*}} : !llvm<"i8*"> to !llvm<"float*"> // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.insertvalue %[[ARG2]], %{{.*}}[2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.insertvalue %[[ARG1]], %{{.*}}[3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.insertvalue %[[ARG0]], %{{.*}}[3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.mul %{{.*}}, %[[ARG1]] // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> %1 = view %0[%arg2][%arg0, %arg1] : memref<2048xi8> to memref (d0 * s0 + d1 + s1)>> // Test two dynamic sizes and static offset. // CHECK: llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }"> // CHECK: llvm.bitcast %{{.*}} : !llvm<"i8*"> to !llvm<"float*"> // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.insertvalue %arg0, %{{.*}}[3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.mul %{{.*}}, %[[ARG1]] // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> %2 = view %0[][%arg0, %arg1] : memref<2048xi8> to memref (d0 * s0 + d1)>> // Test one dynamic size and dynamic offset. // CHECK: llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }"> // CHECK: llvm.bitcast %{{.*}} : !llvm<"i8*"> to !llvm<"float*"> // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.insertvalue %[[ARG2]], %{{.*}}[2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.insertvalue %[[ARG1]], %{{.*}}[3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.mlir.constant(4 : index) : !llvm.i64 // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.mul %{{.*}}, %[[ARG1]] // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> %3 = view %0[%arg2][%arg1] : memref<2048xi8> to memref<4x?xf32, affine_map<(d0, d1)[s0, s1] -> (d0 * s0 + d1 + s1)>> // Test one dynamic size and static offset. // CHECK: llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }"> // CHECK: llvm.bitcast %{{.*}} : !llvm<"i8*"> to !llvm<"float*"> // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.mlir.constant(16 : index) : !llvm.i64 // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.insertvalue %[[ARG0]], %{{.*}}[3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.mlir.constant(4 : index) : !llvm.i64 // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> %4 = view %0[][%arg0] : memref<2048xi8> to memref (d0 * 4 + d1)>> // Test static sizes and static offset. // CHECK: llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }"> // CHECK: llvm.bitcast %{{.*}} : !llvm<"i8*"> to !llvm<"float*"> // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.mlir.constant(0 : index) : !llvm.i64 // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.mlir.constant(4 : index) : !llvm.i64 // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.mlir.constant(64 : index) : !llvm.i64 // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.mlir.constant(4 : index) : !llvm.i64 // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> %5 = view %0[][] : memref<2048xi8> to memref<64x4xf32, affine_map<(d0, d1) -> (d0 * 4 + d1)>> // Test dynamic everything. // CHECK: llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm<"{ i8*, i8*, i64, [1 x i64], [1 x i64] }"> // CHECK: llvm.bitcast %{{.*}} : !llvm<"i8*"> to !llvm<"float*"> // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.insertvalue %[[ARG2]], %{{.*}}[2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.insertvalue %[[ARG1]], %{{.*}}[3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[STRIDE_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64 // CHECK: llvm.insertvalue %[[STRIDE_1]], %{{.*}}[4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.insertvalue %[[ARG0]], %{{.*}}[3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: llvm.mul %[[STRIDE_1]], %[[ARG1]] : !llvm.i64 // CHECK: llvm.insertvalue %{{.*}}, %{{.*}}[4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> %6 = view %0[%arg2][%arg0, %arg1] : memref<2048xi8> to memref (d0 * s0 + d1 + s1)>> return } // CHECK-LABEL: func @subview( // CHECK-COUNT-2: !llvm<"float*">, // CHECK-COUNT-5: {{%[a-zA-Z0-9]*}}: !llvm.i64, // CHECK: %[[ARG0:[a-zA-Z0-9]*]]: !llvm.i64, // CHECK: %[[ARG1:[a-zA-Z0-9]*]]: !llvm.i64, // CHECK: %[[ARG2:.*]]: !llvm.i64) func @subview(%0 : memref<64x4xf32, affine_map<(d0, d1) -> (d0 * 4 + d1)>>, %arg0 : index, %arg1 : index, %arg2 : index) { // The last "insertvalue" that populates the memref descriptor from the function arguments. // CHECK: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[DESC0:.*]] = llvm.insertvalue %{{.*}}, %[[DESC]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[DESC1:.*]] = llvm.insertvalue %{{.*}}, %[[DESC0]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[OFFINC:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : !llvm.i64 // CHECK: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : !llvm.i64 // CHECK: %[[OFFINC1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : !llvm.i64 // CHECK: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : !llvm.i64 // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1]], %[[DESC2]][3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : !llvm.i64 // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0]], %[[DESC4]][3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : !llvm.i64 // CHECK: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> %1 = subview %0[%arg0, %arg1][%arg0, %arg1][%arg0, %arg1] : memref<64x4xf32, affine_map<(d0, d1) -> (d0 * 4 + d1)>> to memref (d0 * s1 + d1 * s2 + s0)>> return } // CHECK-LABEL: func @subview_const_size( func @subview_const_size(%0 : memref<64x4xf32, affine_map<(d0, d1) -> (d0 * 4 + d1)>>, %arg0 : index, %arg1 : index, %arg2 : index) { // The last "insertvalue" that populates the memref descriptor from the function arguments. // CHECK: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[DESC0:.*]] = llvm.insertvalue %{{.*}}, %[[DESC]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[DESC1:.*]] = llvm.insertvalue %{{.*}}, %[[DESC0]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[CST4:.*]] = llvm.mlir.constant(4 : i64) // CHECK: %[[CST2:.*]] = llvm.mlir.constant(2 : i64) // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[OFFINC:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : !llvm.i64 // CHECK: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : !llvm.i64 // CHECK: %[[OFFINC1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : !llvm.i64 // CHECK: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : !llvm.i64 // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[CST2]], %[[DESC2]][3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[DESCSTRIDE1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : !llvm.i64 // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[DESCSTRIDE1]], %[[DESC3]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[CST4]], %[[DESC4]][3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[DESCSTRIDE0:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : !llvm.i64 // CHECK: llvm.insertvalue %[[DESCSTRIDE0]], %[[DESC5]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> %1 = subview %0[%arg0, %arg1][][%arg0, %arg1] : memref<64x4xf32, affine_map<(d0, d1) -> (d0 * 4 + d1)>> to memref<4x2xf32, affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + d1 * s2 + s0)>> return } // CHECK-LABEL: func @subview_const_stride( func @subview_const_stride(%0 : memref<64x4xf32, affine_map<(d0, d1) -> (d0 * 4 + d1)>>, %arg0 : index, %arg1 : index, %arg2 : index) { // The last "insertvalue" that populates the memref descriptor from the function arguments. // CHECK: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[DESC0:.*]] = llvm.insertvalue %{{.*}}, %[[DESC]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[DESC1:.*]] = llvm.insertvalue %{{.*}}, %[[DESC0]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[OFF:.*]] = llvm.extractvalue %[[MEMREF]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[OFFINC:.*]] = llvm.mul %[[ARG0]], %[[STRIDE0]] : !llvm.i64 // CHECK: %[[OFF1:.*]] = llvm.add %[[OFF]], %[[OFFINC]] : !llvm.i64 // CHECK: %[[OFFINC1:.*]] = llvm.mul %[[ARG1]], %[[STRIDE1]] : !llvm.i64 // CHECK: %[[OFF2:.*]] = llvm.add %[[OFF1]], %[[OFFINC1]] : !llvm.i64 // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[OFF2]], %[[DESC1]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[ARG1]], %[[DESC2]][3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[CST2:.*]] = llvm.mlir.constant(2 : i64) // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[CST2]], %[[DESC3]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[ARG0]], %[[DESC4]][3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[CST4:.*]] = llvm.mlir.constant(4 : i64) // CHECK: llvm.insertvalue %[[CST4]], %[[DESC5]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> %1 = subview %0[%arg0, %arg1][%arg0, %arg1][] : memref<64x4xf32, affine_map<(d0, d1) -> (d0 * 4 + d1)>> to memref (d0 * 4 + d1 * 2 + s0)>> return } // CHECK-LABEL: func @subview_const_stride_and_offset( func @subview_const_stride_and_offset(%0 : memref<64x4xf32, affine_map<(d0, d1) -> (d0 * 4 + d1)>>) { // The last "insertvalue" that populates the memref descriptor from the function arguments. // CHECK: %[[MEMREF:.*]] = llvm.insertvalue %{{.*}}, %{{.*}}[4, 1] // CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[DESC0:.*]] = llvm.insertvalue %{{.*}}, %[[DESC]][0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[DESC1:.*]] = llvm.insertvalue %{{.*}}, %[[DESC0]][1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[STRIDE0:.*]] = llvm.extractvalue %[[MEMREF]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[STRIDE1:.*]] = llvm.extractvalue %[[MEMREF]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[CST62:.*]] = llvm.mlir.constant(62 : i64) // CHECK: %[[CST3:.*]] = llvm.mlir.constant(3 : i64) // CHECK: %[[CST8:.*]] = llvm.mlir.constant(8 : index) // CHECK: %[[DESC2:.*]] = llvm.insertvalue %[[CST8]], %[[DESC1]][2] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[DESC3:.*]] = llvm.insertvalue %[[CST3]], %[[DESC2]][3, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[CST1:.*]] = llvm.mlir.constant(1 : i64) // CHECK: %[[DESC4:.*]] = llvm.insertvalue %[[CST1]], %[[DESC3]][4, 1] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[DESC5:.*]] = llvm.insertvalue %[[CST62]], %[[DESC4]][3, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> // CHECK: %[[CST4:.*]] = llvm.mlir.constant(4 : i64) // CHECK: llvm.insertvalue %[[CST4]], %[[DESC5]][4, 0] : !llvm<"{ float*, float*, i64, [2 x i64], [2 x i64] }"> %1 = subview %0[][][] : memref<64x4xf32, affine_map<(d0, d1) -> (d0 * 4 + d1)>> to memref<62x3xf32, affine_map<(d0, d1) -> (d0 * 4 + d1 + 8)>> return } // ----- module { func @check_tanh_func_added_only_once_to_symbol_table(%f: f32, %lf: f64) -> () { %f0 = std.tanh %f : f32 %f1 = std.tanh %f0 : f32 %lf0 = std.tanh %lf : f64 %lf1 = std.tanh %lf0 : f64 return } // CHECK: module { // CHECK: llvm.func @tanh(!llvm.double) -> !llvm.double // CHECK: llvm.func @tanhf(!llvm.float) -> !llvm.float // CHECK-LABEL: func @check_tanh_func_added_only_once_to_symbol_table } // ----- +// CHECK-LABEL: func @atomic_rmw +func @atomic_rmw(%I : memref<10xi32>, %ival : i32, %F : memref<10xf32>, %fval : f32, %i : index) { + atomic_rmw "assign" %fval, %F[%i] : (f32, memref<10xf32>) -> f32 + // CHECK: llvm.atomicrmw xchg %{{.*}}, %{{.*}} acq_rel + atomic_rmw "addi" %ival, %I[%i] : (i32, memref<10xi32>) -> i32 + // CHECK: llvm.atomicrmw add %{{.*}}, %{{.*}} acq_rel + atomic_rmw "maxs" %ival, %I[%i] : (i32, memref<10xi32>) -> i32 + // CHECK: llvm.atomicrmw max %{{.*}}, %{{.*}} acq_rel + atomic_rmw "mins" %ival, %I[%i] : (i32, memref<10xi32>) -> i32 + // CHECK: llvm.atomicrmw min %{{.*}}, %{{.*}} acq_rel + atomic_rmw "maxu" %ival, %I[%i] : (i32, memref<10xi32>) -> i32 + // CHECK: llvm.atomicrmw umax %{{.*}}, %{{.*}} acq_rel + atomic_rmw "minu" %ival, %I[%i] : (i32, memref<10xi32>) -> i32 + // CHECK: llvm.atomicrmw umin %{{.*}}, %{{.*}} acq_rel + atomic_rmw "addf" %fval, %F[%i] : (f32, memref<10xf32>) -> f32 + // CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} acq_rel + return +} + +// ----- + +// CHECK-LABEL: func @cmpxchg +func @cmpxchg(%F : memref<10xf32>, %fval : f32, %i : index) -> f32 { + %x = atomic_rmw "maxf" %fval, %F[%i] : (f32, memref<10xf32>) -> f32 + // CHECK: %[[init:.*]] = llvm.load %{{.*}} : !llvm<"float*"> + // CHECK-NEXT: llvm.br ^bb1(%[[init]] : !llvm.float) + // CHECK-NEXT: ^bb1(%[[loaded:.*]]: !llvm.float): + // CHECK-NEXT: %[[cmp:.*]] = llvm.fcmp "ogt" %[[loaded]], %{{.*}} : !llvm.float + // CHECK-NEXT: %[[max:.*]] = llvm.select %[[cmp]], %[[loaded]], %{{.*}} : !llvm.i1, !llvm.float + // CHECK-NEXT: %[[pair:.*]] = llvm.cmpxchg %{{.*}}, %[[loaded]], %[[max]] acq_rel monotonic : !llvm.float + // CHECK-NEXT: %[[new:.*]] = llvm.extractvalue %[[pair]][0] : !llvm<"{ float, i1 }"> + // CHECK-NEXT: %[[ok:.*]] = llvm.extractvalue %[[pair]][1] : !llvm<"{ float, i1 }"> + // CHECK-NEXT: llvm.cond_br %[[ok]], ^bb2, ^bb1(%[[new]] : !llvm.float) + // CHECK-NEXT: ^bb2: + return %x : f32 + // CHECK-NEXT: llvm.return %[[new]] +} + +// ----- + // CHECK-LABEL: func @assume_alignment func @assume_alignment(%0 : memref<4x4xf16>) { // CHECK: %[[PTR:.*]] = llvm.extractvalue %[[MEMREF:.*]][1] : !llvm<"{ half*, half*, i64, [2 x i64], [2 x i64] }"> // CHECK-NEXT: %[[ZERO:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64 // CHECK-NEXT: %[[MASK:.*]] = llvm.mlir.constant(15 : index) : !llvm.i64 // CHECK-NEXT: %[[INT:.*]] = llvm.ptrtoint %[[PTR]] : !llvm<"half*"> to !llvm.i64 // CHECK-NEXT: %[[MASKED_PTR:.*]] = llvm.and %[[INT]], %[[MASK:.*]] : !llvm.i64 // CHECK-NEXT: %[[CONDITION:.*]] = llvm.icmp "eq" %[[MASKED_PTR]], %[[ZERO]] : !llvm.i64 // CHECK-NEXT: "llvm.intr.assume"(%[[CONDITION]]) : (!llvm.i1) -> () assume_alignment %0, 16 : memref<4x4xf16> return } diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir index 382b1602df0d..c07931f01f8c 100644 --- a/mlir/test/IR/core-ops.mlir +++ b/mlir/test/IR/core-ops.mlir @@ -1,750 +1,757 @@ // RUN: mlir-opt %s | FileCheck %s // Verify the printed output can be parsed. // RUN: mlir-opt %s | mlir-opt | FileCheck %s // Verify the generic form can be parsed. // RUN: mlir-opt -mlir-print-op-generic %s | mlir-opt | FileCheck %s // CHECK: #map0 = affine_map<(d0) -> (d0 + 1)> // CHECK: #map1 = affine_map<()[s0] -> (s0 + 1)> // CHECK-DAG: #[[VIEW_MAP1:map[0-9]+]] = affine_map<(d0, d1) -> (d0 * 4 + d1)> // CHECK-DAG: #[[VIEW_MAP2:map[0-9]+]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + d1 + s0)> // CHECK-DAG: #[[VIEW_MAP3:map[0-9]+]] = affine_map<(d0, d1)[s0] -> (d0 * s0 + d1)> // CHECK-DAG: #[[BASE_MAP0:map[0-9]+]] = affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)> // CHECK-DAG: #[[BASE_MAP3:map[0-9]+]] = affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)> // CHECK-DAG: #[[SUBVIEW_MAP0:map[0-9]+]] = affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + d1 * s2 + d2 * s3 + s0)> // CHECK-DAG: #[[BASE_MAP1:map[0-9]+]] = affine_map<(d0)[s0] -> (d0 + s0)> // CHECK-DAG: #[[SUBVIEW_MAP1:map[0-9]+]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)> // CHECK-DAG: #[[BASE_MAP2:map[0-9]+]] = affine_map<(d0, d1) -> (d0 * 22 + d1)> // CHECK-DAG: #[[SUBVIEW_MAP2:map[0-9]+]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + d1 * s2 + s0)> // CHECK-DAG: #[[SUBVIEW_MAP3:map[0-9]+]] = affine_map<(d0, d1, d2) -> (d0 * 16 + d1 * 4 + d2 + 8)> // CHECK-DAG: #[[SUBVIEW_MAP4:map[0-9]+]] = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)> // CHECK-DAG: #[[SUBVIEW_MAP5:map[0-9]+]] = affine_map<(d0, d1)[s0] -> (d0 * 8 + s0 + d1 * 2)> // CHECK-LABEL: func @func_with_ops(%arg0: f32) { func @func_with_ops(f32) { ^bb0(%a : f32): // CHECK: %0 = "getTensor"() : () -> tensor<4x4x?xf32> %t = "getTensor"() : () -> tensor<4x4x?xf32> // CHECK: %1 = dim %0, 2 : tensor<4x4x?xf32> %t2 = "std.dim"(%t){index = 2} : (tensor<4x4x?xf32>) -> index // CHECK: %2 = addf %arg0, %arg0 : f32 %x = "std.addf"(%a, %a) : (f32,f32) -> (f32) // CHECK: return return } // CHECK-LABEL: func @standard_instrs(%arg0: tensor<4x4x?xf32>, %arg1: f32, %arg2: i32, %arg3: index, %arg4: i64, %arg5: f16) { func @standard_instrs(tensor<4x4x?xf32>, f32, i32, index, i64, f16) { ^bb42(%t: tensor<4x4x?xf32>, %f: f32, %i: i32, %idx : index, %j: i64, %half: f16): // CHECK: %0 = dim %arg0, 2 : tensor<4x4x?xf32> %a = "std.dim"(%t){index = 2} : (tensor<4x4x?xf32>) -> index // CHECK: %1 = dim %arg0, 2 : tensor<4x4x?xf32> %a2 = dim %t, 2 : tensor<4x4x?xf32> // CHECK: %2 = addf %arg1, %arg1 : f32 %f2 = "std.addf"(%f, %f) : (f32,f32) -> f32 // CHECK: %3 = addf %2, %2 : f32 %f3 = addf %f2, %f2 : f32 // CHECK: %4 = addi %arg2, %arg2 : i32 %i2 = "std.addi"(%i, %i) : (i32,i32) -> i32 // CHECK: %5 = addi %4, %4 : i32 %i3 = addi %i2, %i2 : i32 // CHECK: %{{[0-9]+}} = addi %arg3, %arg3 : index %idx1 = addi %idx, %idx : index // CHECK: %{{[0-9]+}} = addi %arg3, %{{[0-9]+}} : index %idx2 = "std.addi"(%idx, %idx1) : (index, index) -> index // CHECK: %8 = subf %arg1, %arg1 : f32 %f4 = "std.subf"(%f, %f) : (f32,f32) -> f32 // CHECK: %9 = subf %8, %8 : f32 %f5 = subf %f4, %f4 : f32 // CHECK: %10 = subi %arg2, %arg2 : i32 %i4 = "std.subi"(%i, %i) : (i32,i32) -> i32 // CHECK: %11 = subi %10, %10 : i32 %i5 = subi %i4, %i4 : i32 // CHECK: %12 = mulf %2, %2 : f32 %f6 = mulf %f2, %f2 : f32 // CHECK: %13 = muli %4, %4 : i32 %i6 = muli %i2, %i2 : i32 // CHECK: %c42_i32 = constant 42 : i32 %x = "std.constant"(){value = 42 : i32} : () -> i32 // CHECK: %c42_i32_0 = constant 42 : i32 %7 = constant 42 : i32 // CHECK: %c43 = constant {crazy = "std.foo"} 43 : index %8 = constant {crazy = "std.foo"} 43: index // CHECK: %cst = constant 4.300000e+01 : bf16 %9 = constant 43.0 : bf16 // CHECK: %f = constant @func_with_ops : (f32) -> () %10 = constant @func_with_ops : (f32) -> () // CHECK: %f_1 = constant @affine_apply : () -> () %11 = constant @affine_apply : () -> () // CHECK: %f_2 = constant @affine_apply : () -> () %12 = constant @affine_apply : () -> () // CHECK: %cst_3 = constant dense<0> : vector<4xi32> %13 = constant dense<0> : vector<4 x i32> // CHECK: %cst_4 = constant dense<0> : tensor<42xi32> %tci32 = constant dense<0> : tensor<42 x i32> // CHECK: %cst_5 = constant dense<0> : vector<42xi32> %vci32 = constant dense<0> : vector<42 x i32> // CHECK: %{{[0-9]+}} = cmpi "eq", %{{[0-9]+}}, %{{[0-9]+}} : i32 %14 = cmpi "eq", %i3, %i4 : i32 // Predicate 1 means inequality comparison. // CHECK: %{{[0-9]+}} = cmpi "ne", %{{[0-9]+}}, %{{[0-9]+}} : i32 %15 = "std.cmpi"(%i3, %i4) {predicate = 1} : (i32, i32) -> i1 // CHECK: %{{[0-9]+}} = cmpi "slt", %cst_3, %cst_3 : vector<4xi32> %16 = cmpi "slt", %13, %13 : vector<4 x i32> // CHECK: %{{[0-9]+}} = cmpi "ne", %cst_3, %cst_3 : vector<4xi32> %17 = "std.cmpi"(%13, %13) {predicate = 1} : (vector<4 x i32>, vector<4 x i32>) -> vector<4 x i1> // CHECK: %{{[0-9]+}} = cmpi "slt", %arg3, %arg3 : index %18 = cmpi "slt", %idx, %idx : index // CHECK: %{{[0-9]+}} = cmpi "eq", %cst_4, %cst_4 : tensor<42xi32> %19 = cmpi "eq", %tci32, %tci32 : tensor<42 x i32> // CHECK: %{{[0-9]+}} = cmpi "eq", %cst_5, %cst_5 : vector<42xi32> %20 = cmpi "eq", %vci32, %vci32 : vector<42 x i32> // CHECK: %{{[0-9]+}} = select %{{[0-9]+}}, %arg3, %arg3 : index %21 = select %18, %idx, %idx : index // CHECK: %{{[0-9]+}} = select %{{[0-9]+}}, %cst_4, %cst_4 : tensor<42xi32> %22 = select %19, %tci32, %tci32 : tensor<42 x i32> // CHECK: %{{[0-9]+}} = select %{{[0-9]+}}, %cst_5, %cst_5 : vector<42xi32> %23 = select %20, %vci32, %vci32 : vector<42 x i32> // CHECK: %{{[0-9]+}} = select %{{[0-9]+}}, %arg3, %arg3 : index %24 = "std.select"(%18, %idx, %idx) : (i1, index, index) -> index // CHECK: %{{[0-9]+}} = select %{{[0-9]+}}, %cst_4, %cst_4 : tensor<42xi32> %25 = "std.select"(%19, %tci32, %tci32) : (tensor<42 x i1>, tensor<42 x i32>, tensor<42 x i32>) -> tensor<42 x i32> // CHECK: %{{[0-9]+}} = divi_signed %arg2, %arg2 : i32 %26 = divi_signed %i, %i : i32 // CHECK: %{{[0-9]+}} = divi_signed %arg3, %arg3 : index %27 = divi_signed %idx, %idx : index // CHECK: %{{[0-9]+}} = divi_signed %cst_5, %cst_5 : vector<42xi32> %28 = divi_signed %vci32, %vci32 : vector<42 x i32> // CHECK: %{{[0-9]+}} = divi_signed %cst_4, %cst_4 : tensor<42xi32> %29 = divi_signed %tci32, %tci32 : tensor<42 x i32> // CHECK: %{{[0-9]+}} = divi_signed %arg2, %arg2 : i32 %30 = "std.divi_signed"(%i, %i) : (i32, i32) -> i32 // CHECK: %{{[0-9]+}} = divi_unsigned %arg2, %arg2 : i32 %31 = divi_unsigned %i, %i : i32 // CHECK: %{{[0-9]+}} = divi_unsigned %arg3, %arg3 : index %32 = divi_unsigned %idx, %idx : index // CHECK: %{{[0-9]+}} = divi_unsigned %cst_5, %cst_5 : vector<42xi32> %33 = divi_unsigned %vci32, %vci32 : vector<42 x i32> // CHECK: %{{[0-9]+}} = divi_unsigned %cst_4, %cst_4 : tensor<42xi32> %34 = divi_unsigned %tci32, %tci32 : tensor<42 x i32> // CHECK: %{{[0-9]+}} = divi_unsigned %arg2, %arg2 : i32 %35 = "std.divi_unsigned"(%i, %i) : (i32, i32) -> i32 // CHECK: %{{[0-9]+}} = remi_signed %arg2, %arg2 : i32 %36 = remi_signed %i, %i : i32 // CHECK: %{{[0-9]+}} = remi_signed %arg3, %arg3 : index %37 = remi_signed %idx, %idx : index // CHECK: %{{[0-9]+}} = remi_signed %cst_5, %cst_5 : vector<42xi32> %38 = remi_signed %vci32, %vci32 : vector<42 x i32> // CHECK: %{{[0-9]+}} = remi_signed %cst_4, %cst_4 : tensor<42xi32> %39 = remi_signed %tci32, %tci32 : tensor<42 x i32> // CHECK: %{{[0-9]+}} = remi_signed %arg2, %arg2 : i32 %40 = "std.remi_signed"(%i, %i) : (i32, i32) -> i32 // CHECK: %{{[0-9]+}} = remi_unsigned %arg2, %arg2 : i32 %41 = remi_unsigned %i, %i : i32 // CHECK: %{{[0-9]+}} = remi_unsigned %arg3, %arg3 : index %42 = remi_unsigned %idx, %idx : index // CHECK: %{{[0-9]+}} = remi_unsigned %cst_5, %cst_5 : vector<42xi32> %43 = remi_unsigned %vci32, %vci32 : vector<42 x i32> // CHECK: %{{[0-9]+}} = remi_unsigned %cst_4, %cst_4 : tensor<42xi32> %44 = remi_unsigned %tci32, %tci32 : tensor<42 x i32> // CHECK: %{{[0-9]+}} = remi_unsigned %arg2, %arg2 : i32 %45 = "std.remi_unsigned"(%i, %i) : (i32, i32) -> i32 // CHECK: %{{[0-9]+}} = divf %arg1, %arg1 : f32 %46 = "std.divf"(%f, %f) : (f32,f32) -> f32 // CHECK: %{{[0-9]+}} = divf %arg1, %arg1 : f32 %47 = divf %f, %f : f32 // CHECK: %{{[0-9]+}} = divf %arg0, %arg0 : tensor<4x4x?xf32> %48 = divf %t, %t : tensor<4x4x?xf32> // CHECK: %{{[0-9]+}} = remf %arg1, %arg1 : f32 %49 = "std.remf"(%f, %f) : (f32,f32) -> f32 // CHECK: %{{[0-9]+}} = remf %arg1, %arg1 : f32 %50 = remf %f, %f : f32 // CHECK: %{{[0-9]+}} = remf %arg0, %arg0 : tensor<4x4x?xf32> %51 = remf %t, %t : tensor<4x4x?xf32> // CHECK: %{{[0-9]+}} = and %arg2, %arg2 : i32 %52 = "std.and"(%i, %i) : (i32,i32) -> i32 // CHECK: %{{[0-9]+}} = and %arg2, %arg2 : i32 %53 = and %i, %i : i32 // CHECK: %{{[0-9]+}} = and %cst_5, %cst_5 : vector<42xi32> %54 = std.and %vci32, %vci32 : vector<42 x i32> // CHECK: %{{[0-9]+}} = and %cst_4, %cst_4 : tensor<42xi32> %55 = and %tci32, %tci32 : tensor<42 x i32> // CHECK: %{{[0-9]+}} = or %arg2, %arg2 : i32 %56 = "std.or"(%i, %i) : (i32,i32) -> i32 // CHECK: %{{[0-9]+}} = or %arg2, %arg2 : i32 %57 = or %i, %i : i32 // CHECK: %{{[0-9]+}} = or %cst_5, %cst_5 : vector<42xi32> %58 = std.or %vci32, %vci32 : vector<42 x i32> // CHECK: %{{[0-9]+}} = or %cst_4, %cst_4 : tensor<42xi32> %59 = or %tci32, %tci32 : tensor<42 x i32> // CHECK: %{{[0-9]+}} = xor %arg2, %arg2 : i32 %60 = "std.xor"(%i, %i) : (i32,i32) -> i32 // CHECK: %{{[0-9]+}} = xor %arg2, %arg2 : i32 %61 = xor %i, %i : i32 // CHECK: %{{[0-9]+}} = xor %cst_5, %cst_5 : vector<42xi32> %62 = std.xor %vci32, %vci32 : vector<42 x i32> // CHECK: %{{[0-9]+}} = xor %cst_4, %cst_4 : tensor<42xi32> %63 = xor %tci32, %tci32 : tensor<42 x i32> %64 = constant dense<0.> : vector<4 x f32> %tcf32 = constant dense<0.> : tensor<42 x f32> %vcf32 = constant dense<0.> : vector<4 x f32> // CHECK: %{{[0-9]+}} = cmpf "ogt", %{{[0-9]+}}, %{{[0-9]+}} : f32 %65 = cmpf "ogt", %f3, %f4 : f32 // Predicate 0 means ordered equality comparison. // CHECK: %{{[0-9]+}} = cmpf "oeq", %{{[0-9]+}}, %{{[0-9]+}} : f32 %66 = "std.cmpf"(%f3, %f4) {predicate = 1} : (f32, f32) -> i1 // CHECK: %{{[0-9]+}} = cmpf "olt", %cst_8, %cst_8 : vector<4xf32> %67 = cmpf "olt", %vcf32, %vcf32 : vector<4 x f32> // CHECK: %{{[0-9]+}} = cmpf "oeq", %cst_8, %cst_8 : vector<4xf32> %68 = "std.cmpf"(%vcf32, %vcf32) {predicate = 1} : (vector<4 x f32>, vector<4 x f32>) -> vector<4 x i1> // CHECK: %{{[0-9]+}} = cmpf "oeq", %cst_7, %cst_7 : tensor<42xf32> %69 = cmpf "oeq", %tcf32, %tcf32 : tensor<42 x f32> // CHECK: %{{[0-9]+}} = cmpf "oeq", %cst_8, %cst_8 : vector<4xf32> %70 = cmpf "oeq", %vcf32, %vcf32 : vector<4 x f32> // CHECK: %{{[0-9]+}} = rank %arg0 : tensor<4x4x?xf32> %71 = "std.rank"(%t) : (tensor<4x4x?xf32>) -> index // CHECK: %{{[0-9]+}} = rank %arg0 : tensor<4x4x?xf32> %72 = rank %t : tensor<4x4x?xf32> // CHECK: = constant unit %73 = constant unit // CHECK: constant true %74 = constant true // CHECK: constant false %75 = constant false // CHECK: = index_cast {{.*}} : index to i64 %76 = index_cast %idx : index to i64 // CHECK: = index_cast {{.*}} : i32 to index %77 = index_cast %i : i32 to index // CHECK: = sitofp {{.*}} : i32 to f32 %78 = sitofp %i : i32 to f32 // CHECK: = sitofp {{.*}} : i32 to f64 %79 = sitofp %i : i32 to f64 // CHECK: = sitofp {{.*}} : i64 to f32 %80 = sitofp %j : i64 to f32 // CHECK: = sitofp {{.*}} : i64 to f64 %81 = sitofp %j : i64 to f64 // CHECK: = sexti %arg2 : i32 to i64 %82 = "std.sexti"(%i) : (i32) -> i64 // CHECK: = sexti %arg2 : i32 to i64 %83 = sexti %i : i32 to i64 // CHECK: %{{[0-9]+}} = sexti %cst_5 : vector<42xi32> %84 = sexti %vci32 : vector<42 x i32> to vector<42 x i64> // CHECK: %{{[0-9]+}} = sexti %cst_4 : tensor<42xi32> %85 = sexti %tci32 : tensor<42 x i32> to tensor<42 x i64> // CHECK: = zexti %arg2 : i32 to i64 %86 = "std.zexti"(%i) : (i32) -> i64 // CHECK: = zexti %arg2 : i32 to i64 %87 = zexti %i : i32 to i64 // CHECK: %{{[0-9]+}} = zexti %cst_5 : vector<42xi32> %88 = zexti %vci32 : vector<42 x i32> to vector<42 x i64> // CHECK: %{{[0-9]+}} = zexti %cst_4 : tensor<42xi32> %89 = zexti %tci32 : tensor<42 x i32> to tensor<42 x i64> // CHECK: = trunci %arg2 : i32 to i16 %90 = "std.trunci"(%i) : (i32) -> i16 // CHECK: = trunci %arg2 : i32 to i16 %91 = trunci %i : i32 to i16 // CHECK: %{{[0-9]+}} = trunci %cst_5 : vector<42xi32> %92 = trunci %vci32 : vector<42 x i32> to vector<42 x i16> // CHECK: %{{[0-9]+}} = trunci %cst_4 : tensor<42xi32> %93 = trunci %tci32 : tensor<42 x i32> to tensor<42 x i16> // CHECK: = fpext {{.*}} : f16 to f32 %94 = fpext %half : f16 to f32 // CHECK: = fptrunc {{.*}} : f32 to f16 %95 = fptrunc %f : f32 to f16 // CHECK: %{{[0-9]+}} = exp %arg1 : f32 %96 = "std.exp"(%f) : (f32) -> f32 // CHECK: %{{[0-9]+}} = exp %arg1 : f32 %97 = exp %f : f32 // CHECK: %{{[0-9]+}} = exp %cst_8 : vector<4xf32> %98 = exp %vcf32 : vector<4xf32> // CHECK: %{{[0-9]+}} = exp %arg0 : tensor<4x4x?xf32> %99 = exp %t : tensor<4x4x?xf32> // CHECK: %{{[0-9]+}} = absf %arg1 : f32 %100 = "std.absf"(%f) : (f32) -> f32 // CHECK: %{{[0-9]+}} = absf %arg1 : f32 %101 = absf %f : f32 // CHECK: %{{[0-9]+}} = absf %cst_8 : vector<4xf32> %102 = absf %vcf32 : vector<4xf32> // CHECK: %{{[0-9]+}} = absf %arg0 : tensor<4x4x?xf32> %103 = absf %t : tensor<4x4x?xf32> // CHECK: %{{[0-9]+}} = ceilf %arg1 : f32 %104 = "std.ceilf"(%f) : (f32) -> f32 // CHECK: %{{[0-9]+}} = ceilf %arg1 : f32 %105 = ceilf %f : f32 // CHECK: %{{[0-9]+}} = ceilf %cst_8 : vector<4xf32> %106 = ceilf %vcf32 : vector<4xf32> // CHECK: %{{[0-9]+}} = ceilf %arg0 : tensor<4x4x?xf32> %107 = ceilf %t : tensor<4x4x?xf32> // CHECK: %{{[0-9]+}} = cos %arg1 : f32 %108 = "std.cos"(%f) : (f32) -> f32 // CHECK: %{{[0-9]+}} = cos %arg1 : f32 %109 = cos %f : f32 // CHECK: %{{[0-9]+}} = cos %cst_8 : vector<4xf32> %110 = cos %vcf32 : vector<4xf32> // CHECK: %{{[0-9]+}} = cos %arg0 : tensor<4x4x?xf32> %111 = cos %t : tensor<4x4x?xf32> // CHECK: %{{[0-9]+}} = negf %arg1 : f32 %112 = "std.negf"(%f) : (f32) -> f32 // CHECK: %{{[0-9]+}} = negf %arg1 : f32 %113 = negf %f : f32 // CHECK: %{{[0-9]+}} = negf %cst_8 : vector<4xf32> %114 = negf %vcf32 : vector<4xf32> // CHECK: %{{[0-9]+}} = negf %arg0 : tensor<4x4x?xf32> %115 = negf %t : tensor<4x4x?xf32> // CHECK: %{{[0-9]+}} = copysign %arg1, %arg1 : f32 %116 = "std.copysign"(%f, %f) : (f32, f32) -> f32 // CHECK: %{{[0-9]+}} = copysign %arg1, %arg1 : f32 %117 = copysign %f, %f : f32 // CHECK: %{{[0-9]+}} = copysign %cst_8, %cst_8 : vector<4xf32> %118 = copysign %vcf32, %vcf32 : vector<4xf32> // CHECK: %{{[0-9]+}} = copysign %arg0, %arg0 : tensor<4x4x?xf32> %119 = copysign %t, %t : tensor<4x4x?xf32> // CHECK: %{{[0-9]+}} = tanh %arg1 : f32 %120 = "std.tanh"(%f) : (f32) -> f32 // CHECK: %{{[0-9]+}} = tanh %arg1 : f32 %121 = tanh %f : f32 // CHECK: %{{[0-9]+}} = tanh %cst_8 : vector<4xf32> %122 = tanh %vcf32 : vector<4xf32> // CHECK: %{{[0-9]+}} = tanh %arg0 : tensor<4x4x?xf32> %123 = tanh %t : tensor<4x4x?xf32> // CHECK: %{{[0-9]+}} = shift_left %arg2, %arg2 : i32 %124 = "std.shift_left"(%i, %i) : (i32, i32) -> i32 // CHECK:%{{[0-9]+}} = shift_left %4, %4 : i32 %125 = shift_left %i2, %i2 : i32 // CHECK: %{{[0-9]+}} = shift_left %arg3, %arg3 : index %126 = shift_left %idx, %idx : index // CHECK: %{{[0-9]+}} = shift_left %cst_5, %cst_5 : vector<42xi32> %127 = shift_left %vci32, %vci32 : vector<42 x i32> // CHECK: %{{[0-9]+}} = shift_left %cst_4, %cst_4 : tensor<42xi32> %128 = shift_left %tci32, %tci32 : tensor<42 x i32> // CHECK: %{{[0-9]+}} = shift_right_signed %arg2, %arg2 : i32 %129 = "std.shift_right_signed"(%i, %i) : (i32, i32) -> i32 // CHECK:%{{[0-9]+}} = shift_right_signed %4, %4 : i32 %130 = shift_right_signed %i2, %i2 : i32 // CHECK: %{{[0-9]+}} = shift_right_signed %arg3, %arg3 : index %131 = shift_right_signed %idx, %idx : index // CHECK: %{{[0-9]+}} = shift_right_signed %cst_5, %cst_5 : vector<42xi32> %132 = shift_right_signed %vci32, %vci32 : vector<42 x i32> // CHECK: %{{[0-9]+}} = shift_right_signed %cst_4, %cst_4 : tensor<42xi32> %133 = shift_right_signed %tci32, %tci32 : tensor<42 x i32> // CHECK: %{{[0-9]+}} = shift_right_unsigned %arg2, %arg2 : i32 %134 = "std.shift_right_unsigned"(%i, %i) : (i32, i32) -> i32 // CHECK:%{{[0-9]+}} = shift_right_unsigned %4, %4 : i32 %135 = shift_right_unsigned %i2, %i2 : i32 // CHECK: %{{[0-9]+}} = shift_right_unsigned %arg3, %arg3 : index %136 = shift_right_unsigned %idx, %idx : index // CHECK: %{{[0-9]+}} = shift_right_unsigned %cst_5, %cst_5 : vector<42xi32> %137 = shift_right_unsigned %vci32, %vci32 : vector<42 x i32> // CHECK: %{{[0-9]+}} = shift_right_unsigned %cst_4, %cst_4 : tensor<42xi32> %138 = shift_right_unsigned %tci32, %tci32 : tensor<42 x i32> // CHECK: %{{[0-9]+}} = sqrt %arg1 : f32 %139 = "std.sqrt"(%f) : (f32) -> f32 // CHECK: %{{[0-9]+}} = sqrt %arg1 : f32 %140 = sqrt %f : f32 // CHECK: %{{[0-9]+}} = sqrt %cst_8 : vector<4xf32> %141 = sqrt %vcf32 : vector<4xf32> // CHECK: %{{[0-9]+}} = sqrt %arg0 : tensor<4x4x?xf32> %142 = sqrt %t : tensor<4x4x?xf32> return } // CHECK-LABEL: func @affine_apply() { func @affine_apply() { %i = "std.constant"() {value = 0: index} : () -> index %j = "std.constant"() {value = 1: index} : () -> index // CHECK: affine.apply #map0(%c0) %a = "affine.apply" (%i) { map = affine_map<(d0) -> (d0 + 1)> } : (index) -> (index) // CHECK: affine.apply #map1()[%c0] %b = affine.apply affine_map<()[x] -> (x+1)>()[%i] return } // CHECK-LABEL: func @load_store_prefetch func @load_store_prefetch(memref<4x4xi32>, index) { ^bb0(%0: memref<4x4xi32>, %1: index): // CHECK: %0 = load %arg0[%arg1, %arg1] : memref<4x4xi32> %2 = "std.load"(%0, %1, %1) : (memref<4x4xi32>, index, index)->i32 // CHECK: %{{.*}} = load %arg0[%arg1, %arg1] : memref<4x4xi32> %3 = load %0[%1, %1] : memref<4x4xi32> // CHECK: prefetch %arg0[%arg1, %arg1], write, locality<1>, data : memref<4x4xi32> prefetch %0[%1, %1], write, locality<1>, data : memref<4x4xi32> // CHECK: prefetch %arg0[%arg1, %arg1], read, locality<3>, instr : memref<4x4xi32> prefetch %0[%1, %1], read, locality<3>, instr : memref<4x4xi32> return } // Test with zero-dimensional operands using no index in load/store. // CHECK-LABEL: func @zero_dim_no_idx func @zero_dim_no_idx(%arg0 : memref, %arg1 : memref, %arg2 : memref) { %0 = std.load %arg0[] : memref std.store %0, %arg1[] : memref return // CHECK: %0 = load %{{.*}}[] : memref // CHECK: store %{{.*}}, %{{.*}}[] : memref } // CHECK-LABEL: func @return_op(%arg0: i32) -> i32 { func @return_op(%a : i32) -> i32 { // CHECK: return %arg0 : i32 "std.return" (%a) : (i32)->() } // CHECK-LABEL: func @calls(%arg0: i32) { func @calls(%arg0: i32) { // CHECK: %0 = call @return_op(%arg0) : (i32) -> i32 %x = call @return_op(%arg0) : (i32) -> i32 // CHECK: %1 = call @return_op(%0) : (i32) -> i32 %y = call @return_op(%x) : (i32) -> i32 // CHECK: %2 = call @return_op(%0) : (i32) -> i32 %z = "std.call"(%x) {callee = @return_op} : (i32) -> i32 // CHECK: %f = constant @affine_apply : () -> () %f = constant @affine_apply : () -> () // CHECK: call_indirect %f() : () -> () call_indirect %f() : () -> () // CHECK: %f_0 = constant @return_op : (i32) -> i32 %f_0 = constant @return_op : (i32) -> i32 // CHECK: %3 = call_indirect %f_0(%arg0) : (i32) -> i32 %2 = call_indirect %f_0(%arg0) : (i32) -> i32 // CHECK: %4 = call_indirect %f_0(%arg0) : (i32) -> i32 %3 = "std.call_indirect"(%f_0, %arg0) : ((i32) -> i32, i32) -> i32 return } // CHECK-LABEL: func @extract_element(%arg0: tensor<*xi32>, %arg1: tensor<4x4xf32>) -> i32 { func @extract_element(%arg0: tensor<*xi32>, %arg1 : tensor<4x4xf32>) -> i32 { %c0 = "std.constant"() {value = 0: index} : () -> index // CHECK: %0 = extract_element %arg0[%c0, %c0, %c0, %c0] : tensor<*xi32> %0 = extract_element %arg0[%c0, %c0, %c0, %c0] : tensor<*xi32> // CHECK: %1 = extract_element %arg1[%c0, %c0] : tensor<4x4xf32> %1 = extract_element %arg1[%c0, %c0] : tensor<4x4xf32> return %0 : i32 } // CHECK-LABEL: func @tensor_cast(%arg0 func @tensor_cast(%arg0: tensor<*xf32>, %arg1 : tensor<4x4xf32>, %arg2: tensor) { // CHECK: %0 = tensor_cast %arg0 : tensor<*xf32> to tensor %0 = tensor_cast %arg0 : tensor<*xf32> to tensor // CHECK: %1 = tensor_cast %arg1 : tensor<4x4xf32> to tensor<*xf32> %1 = tensor_cast %arg1 : tensor<4x4xf32> to tensor<*xf32> // CHECK: %2 = tensor_cast %arg2 : tensor to tensor<4x?xf32> %2 = tensor_cast %arg2 : tensor to tensor<4x?xf32> // CHECK: %3 = tensor_cast %2 : tensor<4x?xf32> to tensor %3 = tensor_cast %2 : tensor<4x?xf32> to tensor return } // CHECK-LABEL: func @memref_cast(%arg0 func @memref_cast(%arg0: memref<4xf32>, %arg1 : memref, %arg2 : memref<64x16x4xf32, offset: 0, strides: [64, 4, 1]>) { // CHECK: %0 = memref_cast %arg0 : memref<4xf32> to memref %0 = memref_cast %arg0 : memref<4xf32> to memref // CHECK: %1 = memref_cast %arg1 : memref to memref<4xf32> %1 = memref_cast %arg1 : memref to memref<4xf32> // CHECK: {{%.*}} = memref_cast %arg2 : memref<64x16x4xf32, #[[BASE_MAP0]]> to memref<64x16x4xf32, #[[BASE_MAP3]]> %2 = memref_cast %arg2 : memref<64x16x4xf32, offset: 0, strides: [64, 4, 1]> to memref<64x16x4xf32, offset: ?, strides: [?, ?, ?]> // CHECK: {{%.*}} = memref_cast {{%.*}} : memref<64x16x4xf32, #[[BASE_MAP3]]> to memref<64x16x4xf32, #[[BASE_MAP0]]> %3 = memref_cast %2 : memref<64x16x4xf32, offset: ?, strides: [?, ?, ?]> to memref<64x16x4xf32, offset: 0, strides: [64, 4, 1]> // CHECK: memref_cast %{{.*}} : memref<4xf32> to memref<*xf32> %4 = memref_cast %1 : memref<4xf32> to memref<*xf32> // CHECK: memref_cast %{{.*}} : memref<*xf32> to memref<4xf32> %5 = memref_cast %4 : memref<*xf32> to memref<4xf32> return } // CHECK-LABEL: func @memref_view(%arg0 func @memref_view(%arg0 : index, %arg1 : index, %arg2 : index) { %0 = alloc() : memref<2048xi8> // Test two dynamic sizes and dynamic offset. // CHECK: %{{.*}} = std.view %0[%arg2][%arg0, %arg1] : memref<2048xi8> to memref %1 = view %0[%arg2][%arg0, %arg1] : memref<2048xi8> to memref (d0 * s1 + d1 + s0)>> // Test two dynamic sizes and static offset. // CHECK: %{{.*}} = std.view %0[][%arg0, %arg1] : memref<2048xi8> to memref %2 = view %0[][%arg0, %arg1] : memref<2048xi8> to memref (d0 * s0 + d1)>> // Test one dynamic size and dynamic offset. // CHECK: %{{.*}} = std.view %0[%arg2][%arg1] : memref<2048xi8> to memref<4x?xf32, #[[VIEW_MAP2]]> %3 = view %0[%arg2][%arg1] : memref<2048xi8> to memref<4x?xf32, affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + d1 + s0)>> // Test one dynamic size and static offset. // CHECK: %{{.*}} = std.view %0[][%arg0] : memref<2048xi8> to memref %4 = view %0[][%arg0] : memref<2048xi8> to memref (d0 * 4 + d1)>> // Test static sizes and static offset. // CHECK: %{{.*}} = std.view %0[][] : memref<2048xi8> to memref<64x4xf32, #[[VIEW_MAP1]]> %5 = view %0[][] : memref<2048xi8> to memref<64x4xf32, affine_map<(d0, d1) -> (d0 * 4 + d1)>> return } // CHECK-LABEL: func @memref_subview(%arg0 func @memref_subview(%arg0 : index, %arg1 : index, %arg2 : index) { %c0 = constant 0 : index %c1 = constant 1 : index %0 = alloc() : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>> // CHECK: std.subview %0[%c0, %c0, %c0][%arg0, %arg1, %arg2][%c1, %c1, %c1] : memref<8x16x4xf32, #[[BASE_MAP0]]> to memref %1 = subview %0[%c0, %c0, %c0][%arg0, %arg1, %arg2][%c1, %c1, %c1] : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>> to memref (d0 * s1 + d1 * s2 + d2 * s3 + s0)>> %2 = alloc()[%arg2] : memref<64xf32, affine_map<(d0)[s0] -> (d0 + s0)>> // CHECK: std.subview %2[%c1][%arg0][%c1] : memref<64xf32, #[[BASE_MAP1]]> to memref %3 = subview %2[%c1][%arg0][%c1] : memref<64xf32, affine_map<(d0)[s0] -> (d0 + s0)>> to memref (d0 * s1 + s0)>> %4 = alloc() : memref<64x22xf32, affine_map<(d0, d1) -> (d0 * 22 + d1)>> // CHECK: std.subview %4[%c0, %c1][%arg0, %arg1][%c1, %c0] : memref<64x22xf32, #[[BASE_MAP2]]> to memref %5 = subview %4[%c0, %c1][%arg0, %arg1][%c1, %c0] : memref<64x22xf32, affine_map<(d0, d1) -> (d0 * 22 + d1)>> to memref (d0 * s1 + d1 * s2 + s0)>> // CHECK: std.subview %0[][][] : memref<8x16x4xf32, #[[BASE_MAP0]]> to memref<4x4x4xf32, #[[SUBVIEW_MAP3]]> %6 = subview %0[][][] : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>> to memref<4x4x4xf32, affine_map<(d0, d1, d2) -> (d0 * 16 + d1 * 4 + d2 + 8)>> %7 = alloc(%arg1, %arg2) : memref // CHECK: std.subview {{%.*}}[][][] : memref to memref<4x4xf32, #[[SUBVIEW_MAP4]]> %8 = subview %7[][][] : memref to memref<4x4xf32, offset: ?, strides:[?, ?]> %9 = alloc() : memref<16x4xf32> // CHECK: std.subview {{%.*}}[{{%.*}}, {{%.*}}][][{{%.*}}, {{%.*}}] : memref<16x4xf32> to memref<4x4xf32, #[[SUBVIEW_MAP4]] %10 = subview %9[%arg1, %arg1][][%arg2, %arg2] : memref<16x4xf32> to memref<4x4xf32, offset: ?, strides:[?, ?]> // CHECK: std.subview {{%.*}}[{{%.*}}, {{%.*}}][][] : memref<16x4xf32> to memref<4x4xf32, #[[SUBVIEW_MAP5]] %11 = subview %9[%arg1, %arg2][][] : memref<16x4xf32> to memref<4x4xf32, offset: ?, strides:[8, 2]> return } // CHECK-LABEL: func @test_dimop(%arg0 func @test_dimop(%arg0: tensor<4x4x?xf32>) { // CHECK: %0 = dim %arg0, 2 : tensor<4x4x?xf32> %0 = dim %arg0, 2 : tensor<4x4x?xf32> // use dim as an index to ensure type correctness %1 = affine.apply affine_map<(d0) -> (d0)>(%0) return } // CHECK-LABEL: func @test_splat_op // CHECK-SAME: [[S:%arg[0-9]+]]: f32 func @test_splat_op(%s : f32) { %v = splat %s : vector<8xf32> // CHECK: splat [[S]] : vector<8xf32> %t = splat %s : tensor<8xf32> // CHECK: splat [[S]] : tensor<8xf32> %u = "std.splat"(%s) : (f32) -> vector<4xf32> // CHECK: splat [[S]] : vector<4xf32> return } // CHECK-LABEL: func @tensor_load_store func @tensor_load_store(%0 : memref<4x4xi32>) { // CHECK: %[[TENSOR:.*]] = tensor_load %[[MEMREF:.*]] : memref<4x4xi32> %1 = tensor_load %0 : memref<4x4xi32> // CHECK: tensor_store %[[TENSOR]], %[[MEMREF]] : memref<4x4xi32> tensor_store %1, %0 : memref<4x4xi32> return } +// CHECK-LABEL: func @atomic_rmw +func @atomic_rmw(%I: memref<10xf32>, %val: f32, %i : index) { + // CHECK: %{{.*}} = atomic_rmw "addf" %{{.*}}, %{{.*}}[%{{.*}}] + %x = atomic_rmw "addf" %val, %I[%i] : (f32, memref<10xf32>) -> f32 + return +} + // CHECK-LABEL: func @assume_alignment // CHECK-SAME: %[[MEMREF:.*]]: memref<4x4xf16> func @assume_alignment(%0: memref<4x4xf16>) { // CHECK: assume_alignment %[[MEMREF]], 16 : memref<4x4xf16> assume_alignment %0, 16 : memref<4x4xf16> return } diff --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir index 20f90c76e3d1..7cc0331bd484 100644 --- a/mlir/test/IR/invalid-ops.mlir +++ b/mlir/test/IR/invalid-ops.mlir @@ -1,1056 +1,1080 @@ // RUN: mlir-opt %s -split-input-file -verify-diagnostics func @dim(tensor<1xf32>) { ^bb(%0: tensor<1xf32>): "std.dim"(%0){index = "xyz"} : (tensor<1xf32>)->index // expected-error {{attribute 'index' failed to satisfy constraint: arbitrary integer attribute}} return } // ----- func @dim2(tensor<1xf32>) { ^bb(%0: tensor<1xf32>): "std.dim"(){index = "xyz"} : ()->index // expected-error {{'std.dim' op requires a single operand}} return } // ----- func @dim3(tensor<1xf32>) { ^bb(%0: tensor<1xf32>): "std.dim"(%0){index = 1} : (tensor<1xf32>)->index // expected-error {{'std.dim' op index is out of range}} return } // ----- func @rank(f32) { ^bb(%0: f32): "std.rank"(%0): (f32)->index // expected-error {{'std.rank' op operand #0 must be tensor of any type values}} return } // ----- func @constant() { ^bb: %x = "std.constant"(){value = "xyz"} : () -> i32 // expected-error {{unsupported 'value' attribute}} return } // ----- func @constant_out_of_range() { ^bb: %x = "std.constant"(){value = 100} : () -> i1 // expected-error {{requires attribute's type ('i64') to match op's return type ('i1')}} return } // ----- func @constant_wrong_type() { ^bb: %x = "std.constant"(){value = 10.} : () -> f32 // expected-error {{requires attribute's type ('f64') to match op's return type ('f32')}} return } // ----- func @affine_apply_no_map() { ^bb0: %i = constant 0 : index %x = "affine.apply" (%i) { } : (index) -> (index) // expected-error {{'affine.apply' op requires an affine map}} return } // ----- func @affine_apply_wrong_operand_count() { ^bb0: %i = constant 0 : index %x = "affine.apply" (%i) {map = affine_map<(d0, d1) -> ((d0 + 1), (d1 + 2))>} : (index) -> (index) // expected-error {{'affine.apply' op operand count and affine map dimension and symbol count must match}} return } // ----- func @affine_apply_wrong_result_count() { ^bb0: %i = constant 0 : index %j = constant 1 : index %x = "affine.apply" (%i, %j) {map = affine_map<(d0, d1) -> ((d0 + 1), (d1 + 2))>} : (index,index) -> (index) // expected-error {{'affine.apply' op mapping must produce one value}} return } // ----- func @unknown_custom_op() { ^bb0: %i = crazyThing() {value = 0} : () -> index // expected-error {{custom op 'crazyThing' is unknown}} return } // ----- func @unknown_std_op() { // expected-error@+1 {{unregistered operation 'std.foo_bar_op' found in dialect ('std') that does not allow unknown operations}} %0 = "std.foo_bar_op"() : () -> index return } // ----- func @bad_alloc_wrong_dynamic_dim_count() { ^bb0: %0 = constant 7 : index // Test alloc with wrong number of dynamic dimensions. %1 = alloc(%0)[%1] : memref<2x4xf32, affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1> // expected-error {{op 'std.alloc' dimension operand count does not equal memref dynamic dimension count}} return } // ----- func @bad_alloc_wrong_symbol_count() { ^bb0: %0 = constant 7 : index // Test alloc with wrong number of symbols %1 = alloc(%0) : memref<2x?xf32, affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>, 1> // expected-error {{operand count does not equal dimension plus symbol operand count}} return } // ----- func @test_store_zero_results() { ^bb0: %0 = alloc() : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1> %1 = constant 0 : index %2 = constant 1 : index %3 = load %0[%1, %2] : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1> // Test that store returns zero results. %4 = store %3, %0[%1, %2] : memref<1024x64xf32, affine_map<(d0, d1) -> (d0, d1)>, 1> // expected-error {{cannot name an operation with no results}} return } // ----- func @test_store_zero_results2(%x: i32, %p: memref) { "std.store"(%x,%p) : (i32, memref) -> i32 // expected-error {{'std.store' op requires zero results}} return } // ----- func @test_alloc_memref_map_rank_mismatch() { ^bb0: %0 = alloc() : memref<1024x64xf32, affine_map<(d0) -> (d0)>, 1> // expected-error {{memref affine map dimension mismatch}} return } // ----- func @intlimit2() { ^bb: %0 = "std.constant"() {value = 0} : () -> i4096 %1 = "std.constant"() {value = 1} : () -> i4097 // expected-error {{integer bitwidth is limited to 4096 bits}} return } // ----- func @calls(%arg0: i32) { %x = call @calls() : () -> i32 // expected-error {{incorrect number of operands for callee}} return } // ----- func @func_with_ops(f32) { ^bb0(%a : f32): %sf = addf %a, %a, %a : f32 // expected-error {{'std.addf' op expected 2 operands}} } // ----- func @func_with_ops(f32) { ^bb0(%a : f32): %sf = addf(%a, %a) : f32 // expected-error {{expected ':'}} } // ----- func @func_with_ops(f32) { ^bb0(%a : f32): %sf = addf{%a, %a} : f32 // expected-error {{expected attribute name}} } // ----- func @func_with_ops(f32) { ^bb0(%a : f32): // expected-error@+1 {{'std.addi' op operand #0 must be integer-like}} %sf = addi %a, %a : f32 } // ----- func @func_with_ops(i32) { ^bb0(%a : i32): %sf = addf %a, %a : i32 // expected-error {{'std.addf' op operand #0 must be floating-point-like}} } // ----- func @func_with_ops(i32) { ^bb0(%a : i32): // expected-error@+1 {{failed to satisfy constraint: allowed 64-bit integer cases: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9}} %r = "std.cmpi"(%a, %a) {predicate = 42} : (i32, i32) -> i1 } // ----- // Comparison are defined for arguments of the same type. func @func_with_ops(i32, i64) { ^bb0(%a : i32, %b : i64): // expected-note {{prior use here}} %r = cmpi "eq", %a, %b : i32 // expected-error {{use of value '%b' expects different type than prior uses}} } // ----- // Comparisons must have the "predicate" attribute. func @func_with_ops(i32, i32) { ^bb0(%a : i32, %b : i32): %r = cmpi %a, %b : i32 // expected-error {{expected non-function type}} } // ----- // Integer comparisons are not recognized for float types. func @func_with_ops(f32, f32) { ^bb0(%a : f32, %b : f32): %r = cmpi "eq", %a, %b : f32 // expected-error {{'lhs' must be integer-like, but got 'f32'}} } // ----- // Result type must be boolean like. func @func_with_ops(i32, i32) { ^bb0(%a : i32, %b : i32): %r = "std.cmpi"(%a, %b) {predicate = 0} : (i32, i32) -> i32 // expected-error {{op result #0 must be bool-like}} } // ----- func @func_with_ops(i32, i32) { ^bb0(%a : i32, %b : i32): // expected-error@+1 {{requires attribute 'predicate'}} %r = "std.cmpi"(%a, %b) {foo = 1} : (i32, i32) -> i1 } // ----- func @func_with_ops() { ^bb0: %c = constant dense<0> : vector<42 x i32> // expected-error@+1 {{op requires the same shape for all operands and results}} %r = "std.cmpi"(%c, %c) {predicate = 0} : (vector<42 x i32>, vector<42 x i32>) -> vector<41 x i1> } // ----- func @func_with_ops(i32, i32, i32) { ^bb0(%cond : i32, %t : i32, %f : i32): // expected-error@+2 {{different type than prior uses}} // expected-note@-2 {{prior use here}} %r = select %cond, %t, %f : i32 } // ----- func @func_with_ops(i32, i32, i32) { ^bb0(%cond : i32, %t : i32, %f : i32): // expected-error@+1 {{op operand #0 must be bool-like}} %r = "std.select"(%cond, %t, %f) : (i32, i32, i32) -> i32 } // ----- func @func_with_ops(i1, i32, i64) { ^bb0(%cond : i1, %t : i32, %f : i64): // expected-error@+1 {{all of {true_value, false_value, result} have same type}} %r = "std.select"(%cond, %t, %f) : (i1, i32, i64) -> i32 } // ----- func @func_with_ops(i1, vector<42xi32>, vector<42xi32>) { ^bb0(%cond : i1, %t : vector<42xi32>, %f : vector<42xi32>): // expected-error@+1 {{requires the same shape for all operands and results}} %r = "std.select"(%cond, %t, %f) : (i1, vector<42xi32>, vector<42xi32>) -> vector<42xi32> } // ----- func @func_with_ops(i1, tensor<42xi32>, tensor) { ^bb0(%cond : i1, %t : tensor<42xi32>, %f : tensor): // expected-error@+1 {{ op requires the same shape for all operands and results}} %r = "std.select"(%cond, %t, %f) : (i1, tensor<42xi32>, tensor) -> tensor<42xi32> } // ----- func @invalid_select_shape(%cond : i1, %idx : () -> ()) { // expected-error@+1 {{'result' must be integer-like or floating-point-like, but got '() -> ()'}} %sel = select %cond, %idx, %idx : () -> () // ----- func @invalid_cmp_shape(%idx : () -> ()) { // expected-error@+1 {{'lhs' must be integer-like, but got '() -> ()'}} %cmp = cmpi "eq", %idx, %idx : () -> () // ----- func @dma_no_src_memref(%m : f32, %tag : f32, %c0 : index) { // expected-error@+1 {{expected source to be of memref type}} dma_start %m[%c0], %m[%c0], %c0, %tag[%c0] : f32, f32, f32 } // ----- func @dma_no_dst_memref(%m : f32, %tag : f32, %c0 : index) { %mref = alloc() : memref<8 x f32> // expected-error@+1 {{expected destination to be of memref type}} dma_start %mref[%c0], %m[%c0], %c0, %tag[%c0] : memref<8 x f32>, f32, f32 } // ----- func @dma_no_tag_memref(%tag : f32, %c0 : index) { %mref = alloc() : memref<8 x f32> // expected-error@+1 {{expected tag to be of memref type}} dma_start %mref[%c0], %mref[%c0], %c0, %tag[%c0] : memref<8 x f32>, memref<8 x f32>, f32 } // ----- func @dma_wait_no_tag_memref(%tag : f32, %c0 : index) { // expected-error@+1 {{expected tag to be of memref type}} dma_wait %tag[%c0], %arg0 : f32 } // ----- func @invalid_cmp_attr(%idx : i32) { // expected-error@+1 {{invalid kind of attribute specified}} %cmp = cmpi i1, %idx, %idx : i32 // ----- func @cmpf_generic_invalid_predicate_value(%a : f32) { // expected-error@+1 {{'predicate' attribute value out of range}} %r = "std.cmpf"(%a, %a) {predicate = 42} : (f32, f32) -> i1 } // ----- func @cmpf_canonical_invalid_predicate_value(%a : f32) { // expected-error@+1 {{unknown comparison predicate "foo"}} %r = cmpf "foo", %a, %a : f32 } // ----- func @cmpf_canonical_invalid_predicate_value_signed(%a : f32) { // expected-error@+1 {{unknown comparison predicate "sge"}} %r = cmpf "sge", %a, %a : f32 } // ----- func @cmpf_canonical_invalid_predicate_value_no_order(%a : f32) { // expected-error@+1 {{unknown comparison predicate "eq"}} %r = cmpf "eq", %a, %a : f32 } // ----- func @cmpf_canonical_no_predicate_attr(%a : f32, %b : f32) { %r = cmpf %a, %b : f32 // expected-error {{}} } // ----- func @cmpf_generic_no_predicate_attr(%a : f32, %b : f32) { // expected-error@+1 {{requires an integer attribute named 'predicate'}} %r = "std.cmpf"(%a, %b) {foo = 1} : (f32, f32) -> i1 } // ----- func @cmpf_wrong_type(%a : i32, %b : i32) { %r = cmpf "oeq", %a, %b : i32 // expected-error {{operand #0 must be floating-point-like}} } // ----- func @cmpf_generic_wrong_result_type(%a : f32, %b : f32) { // expected-error@+1 {{result #0 must be bool-like}} %r = "std.cmpf"(%a, %b) {predicate = 0} : (f32, f32) -> f32 } // ----- func @cmpf_canonical_wrong_result_type(%a : f32, %b : f32) -> f32 { %r = cmpf "oeq", %a, %b : f32 // expected-note {{prior use here}} // expected-error@+1 {{use of value '%r' expects different type than prior uses}} return %r : f32 } // ----- func @cmpf_result_shape_mismatch(%a : vector<42xf32>) { // expected-error@+1 {{op requires the same shape for all operands and results}} %r = "std.cmpf"(%a, %a) {predicate = 0} : (vector<42 x f32>, vector<42 x f32>) -> vector<41 x i1> } // ----- func @cmpf_operand_shape_mismatch(%a : vector<42xf32>, %b : vector<41xf32>) { // expected-error@+1 {{op requires all operands to have the same type}} %r = "std.cmpf"(%a, %b) {predicate = 0} : (vector<42 x f32>, vector<41 x f32>) -> vector<42 x i1> } // ----- func @cmpf_generic_operand_type_mismatch(%a : f32, %b : f64) { // expected-error@+1 {{op requires all operands to have the same type}} %r = "std.cmpf"(%a, %b) {predicate = 0} : (f32, f64) -> i1 } // ----- func @cmpf_canonical_type_mismatch(%a : f32, %b : f64) { // expected-note {{prior use here}} // expected-error@+1 {{use of value '%b' expects different type than prior uses}} %r = cmpf "oeq", %a, %b : f32 } // ----- func @extract_element_no_operands() { // expected-error@+1 {{op expected 1 or more operands}} %0 = "std.extract_element"() : () -> f32 return } // ----- func @extract_element_no_indices(%v : vector<3xf32>) { // expected-error@+1 {{incorrect number of indices for extract_element}} %0 = "std.extract_element"(%v) : (vector<3xf32>) -> f32 return } // ----- func @extract_element_invalid_index_type(%v : vector<3xf32>, %i : i32) { // expected-error@+1 {{operand #1 must be index}} %0 = "std.extract_element"(%v, %i) : (vector<3xf32>, i32) -> f32 return } // ----- func @extract_element_element_result_type_mismatch(%v : vector<3xf32>, %i : index) { // expected-error@+1 {{result type matches element type of aggregate}} %0 = "std.extract_element"(%v, %i) : (vector<3xf32>, index) -> f64 return } // ----- func @extract_element_vector_too_many_indices(%v : vector<3xf32>, %i : index) { // expected-error@+1 {{incorrect number of indices for extract_element}} %0 = "std.extract_element"(%v, %i, %i) : (vector<3xf32>, index, index) -> f32 return } // ----- func @extract_element_tensor_too_many_indices(%t : tensor<2x3xf32>, %i : index) { // expected-error@+1 {{incorrect number of indices for extract_element}} %0 = "std.extract_element"(%t, %i, %i, %i) : (tensor<2x3xf32>, index, index, index) -> f32 return } // ----- func @extract_element_tensor_too_few_indices(%t : tensor<2x3xf32>, %i : index) { // expected-error@+1 {{incorrect number of indices for extract_element}} %0 = "std.extract_element"(%t, %i) : (tensor<2x3xf32>, index) -> f32 return } // ----- func @index_cast_index_to_index(%arg0: index) { // expected-error@+1 {{are cast incompatible}} %0 = index_cast %arg0: index to index return } // ----- func @index_cast_float(%arg0: index, %arg1: f32) { // expected-error@+1 {{are cast incompatible}} %0 = index_cast %arg0 : index to f32 return } // ----- func @index_cast_float_to_index(%arg0: f32) { // expected-error@+1 {{are cast incompatible}} %0 = index_cast %arg0 : f32 to index return } // ----- func @sitofp_i32_to_i64(%arg0 : i32) { // expected-error@+1 {{are cast incompatible}} %0 = sitofp %arg0 : i32 to i64 return } // ----- func @sitofp_f32_to_i32(%arg0 : f32) { // expected-error@+1 {{are cast incompatible}} %0 = sitofp %arg0 : f32 to i32 return } // ----- func @fpext_f32_to_f16(%arg0 : f32) { // expected-error@+1 {{are cast incompatible}} %0 = fpext %arg0 : f32 to f16 return } // ----- func @fpext_f16_to_f16(%arg0 : f16) { // expected-error@+1 {{are cast incompatible}} %0 = fpext %arg0 : f16 to f16 return } // ----- func @fpext_i32_to_f32(%arg0 : i32) { // expected-error@+1 {{are cast incompatible}} %0 = fpext %arg0 : i32 to f32 return } // ----- func @fpext_f32_to_i32(%arg0 : f32) { // expected-error@+1 {{are cast incompatible}} %0 = fpext %arg0 : f32 to i32 return } // ----- func @fptrunc_f16_to_f32(%arg0 : f16) { // expected-error@+1 {{are cast incompatible}} %0 = fptrunc %arg0 : f16 to f32 return } // ----- func @fptrunc_f32_to_f32(%arg0 : f32) { // expected-error@+1 {{are cast incompatible}} %0 = fptrunc %arg0 : f32 to f32 return } // ----- func @fptrunc_i32_to_f32(%arg0 : i32) { // expected-error@+1 {{are cast incompatible}} %0 = fptrunc %arg0 : i32 to f32 return } // ----- func @fptrunc_f32_to_i32(%arg0 : f32) { // expected-error@+1 {{are cast incompatible}} %0 = fptrunc %arg0 : f32 to i32 return } // ----- func @sexti_index_as_operand(%arg0 : index) { // expected-error@+1 {{'index' is not a valid operand type}} %0 = sexti %arg0 : index to i128 return } // ----- func @zexti_index_as_operand(%arg0 : index) { // expected-error@+1 {{'index' is not a valid operand type}} %0 = zexti %arg0 : index to i128 return } // ----- func @trunci_index_as_operand(%arg0 : index) { // expected-error@+1 {{'index' is not a valid operand type}} %2 = trunci %arg0 : index to i128 return } // ----- func @sexti_index_as_result(%arg0 : i1) { // expected-error@+1 {{'index' is not a valid result type}} %0 = sexti %arg0 : i1 to index return } // ----- func @zexti_index_as_operand(%arg0 : i1) { // expected-error@+1 {{'index' is not a valid result type}} %0 = zexti %arg0 : i1 to index return } // ----- func @trunci_index_as_result(%arg0 : i128) { // expected-error@+1 {{'index' is not a valid result type}} %2 = trunci %arg0 : i128 to index return } // ----- func @sexti_cast_to_narrower(%arg0 : i16) { // expected-error@+1 {{must be wider}} %0 = sexti %arg0 : i16 to i15 return } // ----- func @zexti_cast_to_narrower(%arg0 : i16) { // expected-error@+1 {{must be wider}} %0 = zexti %arg0 : i16 to i15 return } // ----- func @trunci_cast_to_wider(%arg0 : i16) { // expected-error@+1 {{must be wider}} %0 = trunci %arg0 : i16 to i17 return } // ----- func @sexti_cast_to_same_width(%arg0 : i16) { // expected-error@+1 {{must be wider}} %0 = sexti %arg0 : i16 to i16 return } // ----- func @zexti_cast_to_same_width(%arg0 : i16) { // expected-error@+1 {{must be wider}} %0 = zexti %arg0 : i16 to i16 return } // ----- func @trunci_cast_to_same_width(%arg0 : i16) { // expected-error@+1 {{must be wider}} %0 = trunci %arg0 : i16 to i16 return } // ----- func @return_not_in_function() { "foo.region"() ({ // expected-error@+1 {{'std.return' op expects parent op 'func'}} return }): () -> () return } // ----- func @invalid_splat(%v : f32) { splat %v : memref<8xf32> // expected-error@-1 {{must be vector of any type values or statically shaped tensor of any type values}} return } // ----- func @invalid_splat(%v : vector<8xf32>) { %w = splat %v : tensor<8xvector<8xf32>> // expected-error@-1 {{must be integer or float type}} return } // ----- func @invalid_splat(%v : f32) { // expected-note {{prior use here}} splat %v : vector<8xf64> // expected-error@-1 {{expects different type than prior uses}} return } // ----- func @invalid_view(%arg0 : index, %arg1 : index, %arg2 : index) { %0 = alloc() : memref<2048xi8> // expected-error@+1 {{incorrect number of operands for type}} %1 = view %0[][%arg0, %arg1] : memref<2048xi8> to memref (d0 * 4 + d1 + s0)>> return } // ----- func @invalid_view(%arg0 : index, %arg1 : index, %arg2 : index) { %0 = alloc() : memref<2048xi8> // expected-error@+1 {{is not strided}} %1 = view %0[][%arg0, %arg1] : memref<2048xi8> to memref (d0, d1, s0)>> return } // ----- func @invalid_view(%arg0 : index, %arg1 : index, %arg2 : index) { %0 = alloc() : memref<2048xf32> // expected-error@+1 {{must be 1D memref of 8-bit integer values}} %1 = view %0[][%arg0, %arg1] : memref<2048xf32> to memref (d0 * 4 + d1 + s0)>> return } // ----- func @invalid_view(%arg0 : index, %arg1 : index, %arg2 : index) { %0 = alloc() : memref<2048xi8, affine_map<(d0) -> (d0 floordiv 8, d0 mod 8)>> // expected-error@+1 {{unsupported map for base memref}} %1 = view %0[][%arg0, %arg1] : memref<2048xi8, affine_map<(d0) -> (d0 floordiv 8, d0 mod 8)>> to memref (d0 * 4 + d1 + s0)>> return } // ----- func @invalid_view(%arg0 : index, %arg1 : index, %arg2 : index) { %0 = alloc() : memref<2048xi8, 2> // expected-error@+1 {{different memory spaces}} %1 = view %0[][%arg0, %arg1] : memref<2048xi8, 2> to memref (d0 * 4 + d1 + s0)>, 1> return } // ----- func @invalid_view(%arg0 : index, %arg1 : index, %arg2 : index) { %0 = alloc() : memref<2048xi8> // expected-error@+1 {{incorrect dynamic strides}} %1 = view %0[][%arg0, %arg1] : memref<2048xi8> to memref (d0 * 777 + d1 * 4 + d2)>> return } // ----- func @invalid_view(%arg0 : index, %arg1 : index, %arg2 : index) { %0 = alloc() : memref<2048xi8> // expected-error@+1 {{incorrect dynamic strides}} %1 = view %0[%arg0][] : memref<2048xi8> to memref<16x4x?xf32, affine_map<(d0, d1, d2) -> (d0 * 777 + d1 * 4 + d2)>> return } // ----- func @multiple_offsets(%arg0: index) { %0 = alloc() : memref<2048xi8> // expected-error@+1 {{expects 0 or 1 offset operand}} %1 = view %0[%arg0, %arg0][%arg0] : memref<2048xi8> to memref (d0 * 777 + d1 * 4 + d2)>> return } // ----- func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) { %0 = alloc() : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>, 2> // expected-error@+1 {{different memory spaces}} %1 = subview %0[][%arg2][] : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>, 2> to memref<8x?x4xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * s0 + d1 * 4 + d2)>> return } // ----- func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) { %0 = alloc() : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>> // expected-error@+1 {{is not strided}} %1 = subview %0[][%arg2][] : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 4 + d2)>> to memref<8x?x4xf32, affine_map<(d0, d1, d2)[s0] -> (d0 + s0, d1, d2)>> return } // ----- func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) { %0 = alloc() : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 + d1, d1 + d2, d2)>> // expected-error@+1 {{is not strided}} %1 = subview %0[][%arg2][] : memref<8x16x4xf32, affine_map<(d0, d1, d2) -> (d0 + d1, d1 + d2, d2)>> to memref<8x?x4xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * s0 + d1 * 4 + d2)>> return } // ----- func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) { %0 = alloc() : memref<8x16x4xf32> // expected-error@+1 {{expected number of dynamic offsets specified to match the rank of the result type}} %1 = subview %0[%arg0, %arg1][%arg2][] : memref<8x16x4xf32> to memref<8x?x4xf32, offset: 0, strides:[?, ?, 4]> return } // ----- func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) { %0 = alloc() : memref<8x16x4xf32> // expected-error@+1 {{expected result type to have dynamic strides}} %1 = subview %0[%arg0, %arg1, %arg2][%arg0, %arg1, %arg2][%arg0, %arg1, %arg2] : memref<8x16x4xf32> to memref return } // ----- func @invalid_subview(%arg0 : index, %arg1 : index, %arg2 : index) { %0 = alloc() : memref<8x16x4xf32> %c0 = constant 0 : index %c1 = constant 1 : index // expected-error@+1 {{expected result memref layout map to have dynamic offset}} %1 = subview %0[%c0, %c0, %c0][%arg0, %arg1, %arg2][%c1, %c1, %c1] : memref<8x16x4xf32> to memref return } // ----- func @invalid_subview(%arg0 : index, %arg1 : memref) { // expected-error@+1 {{expected rank of result type to match rank of base type}} %0 = subview %arg1[%arg0, %arg0][][%arg0, %arg0] : memref to memref } // ----- func @invalid_subview(%arg0 : index, %arg1 : memref) { // expected-error@+1 {{expected number of dynamic offsets specified to match the rank of the result type}} %0 = subview %arg1[%arg0][][] : memref to memref<4x4xf32, offset: ?, strides: [4, 1]> } // ----- func @invalid_subview(%arg0 : index, %arg1 : memref) { // expected-error@+1 {{expected number of dynamic sizes specified to match the rank of the result type}} %0 = subview %arg1[][%arg0][] : memref to memref } // ----- func @invalid_subview(%arg0 : index, %arg1 : memref) { // expected-error@+1 {{expected number of dynamic strides specified to match the rank of the result type}} %0 = subview %arg1[][][%arg0] : memref to memref } // ----- func @invalid_subview(%arg0 : index, %arg1 : memref) { // expected-error@+1 {{invalid to specify dynamic sizes when subview result type is statically shaped and viceversa}} %0 = subview %arg1[][%arg0, %arg0][] : memref to memref<4x8xf32, offset: ?, strides: [?, ?]> } // ----- func @invalid_subview(%arg0 : index, %arg1 : memref) { // expected-error@+1 {{invalid to specify dynamic sizes when subview result type is statically shaped and viceversa}} %0 = subview %arg1[][][] : memref to memref } // ----- func @invalid_subview(%arg0 : index, %arg1 : memref<16x4xf32>) { // expected-error@+1 {{expected result memref layout map to have dynamic offset}} %0 = subview %arg1[%arg0, %arg0][][] : memref<16x4xf32> to memref<4x2xf32> } // ----- func @invalid_subview(%arg0 : index, %arg1 : memref<16x4xf32, offset: ?, strides: [4, 1]>) { // expected-error@+1 {{expected result memref layout map to have dynamic offset}} %0 = subview %arg1[][][] : memref<16x4xf32, offset: ?, strides: [4, 1]> to memref<4x2xf32> } // ----- func @invalid_subview(%arg0 : index, %arg1 : memref<16x4xf32, offset: 8, strides:[?, 1]>) { // expected-error@+1 {{expected result memref layout map to have dynamic offset}} %0 = subview %arg1[][][] : memref<16x4xf32, offset: 8, strides:[?, 1]> to memref<4x2xf32> } // ----- func @invalid_subview(%arg0 : index, %arg1 : memref<16x4xf32>) { // expected-error@+1 {{expected result type to have dynamic strides}} %0 = subview %arg1[][][%arg0, %arg0] : memref<16x4xf32> to memref<4x2xf32> } // ----- func @invalid_subview(%arg0 : index, %arg1 : memref<16x4xf32, offset: 0, strides:[?, ?]>) { // expected-error@+1 {{expected result type to have dynamic stride along a dimension if the base memref type has dynamic stride along that dimension}} %0 = subview %arg1[][][] : memref<16x4xf32, offset: 0, strides:[?, ?]> to memref<4x2xf32, offset:?, strides:[2, 1]> } // ----- func @invalid_subview(%arg0 : index, %arg1 : memref) { %c0 = constant 0 : index %c1 = constant 1 : index // expected-error@+1 {{expected shape of result type to be fully dynamic when sizes are specified}} %0 = subview %arg1[%c0, %c0, %c0][%c1, %arg0, %c1][%c1, %c1, %c1] : memref to memref return } // ----- func @invalid_memref_cast(%arg0 : memref<12x4x16xf32, offset:0, strides:[64, 16, 1]>) { // expected-error@+1{{operand type 'memref<12x4x16xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 16 + d2)>>' and result type 'memref<12x4x16xf32, affine_map<(d0, d1, d2) -> (d0 * 128 + d1 * 32 + d2 * 2)>>' are cast incompatible}} %0 = memref_cast %arg0 : memref<12x4x16xf32, offset:0, strides:[64, 16, 1]> to memref<12x4x16xf32, offset:0, strides:[128, 32, 2]> return } // ----- func @invalid_memref_cast(%arg0 : memref<12x4x16xf32, offset:0, strides:[64, 16, 1]>) { // expected-error@+1{{operand type 'memref<12x4x16xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 16 + d2)>>' and result type 'memref<12x4x16xf32, affine_map<(d0, d1, d2) -> (d0 * 64 + d1 * 16 + d2 + 16)>>' are cast incompatible}} %0 = memref_cast %arg0 : memref<12x4x16xf32, offset:0, strides:[64, 16, 1]> to memref<12x4x16xf32, offset:16, strides:[64, 16, 1]> return } // ----- // incompatible element types func @invalid_memref_cast() { %0 = alloc() : memref<2x5xf32, 0> // expected-error@+1 {{operand type 'memref<2x5xf32>' and result type 'memref<*xi32>' are cast incompatible}} %1 = memref_cast %0 : memref<2x5xf32, 0> to memref<*xi32> return } // ----- func @invalid_prefetch_rw(%i : index) { %0 = alloc() : memref<10xf32> // expected-error@+1 {{rw specifier has to be 'read' or 'write'}} prefetch %0[%i], rw, locality<0>, data : memref<10xf32> return } // ----- func @invalid_prefetch_cache_type(%i : index) { %0 = alloc() : memref<10xf32> // expected-error@+1 {{cache type has to be 'data' or 'instr'}} prefetch %0[%i], read, locality<0>, false : memref<10xf32> return } // ----- func @invalid_prefetch_locality_hint(%i : index) { %0 = alloc() : memref<10xf32> // expected-error@+1 {{32-bit integer attribute whose minimum value is 0 whose maximum value is 3}} prefetch %0[%i], read, locality<5>, data : memref<10xf32> return } // ----- // incompatible memory space func @invalid_memref_cast() { %0 = alloc() : memref<2x5xf32, 0> // expected-error@+1 {{operand type 'memref<2x5xf32>' and result type 'memref<*xf32>' are cast incompatible}} %1 = memref_cast %0 : memref<2x5xf32, 0> to memref<*xf32, 1> return } // ----- // unranked to unranked func @invalid_memref_cast() { %0 = alloc() : memref<2x5xf32, 0> %1 = memref_cast %0 : memref<2x5xf32, 0> to memref<*xf32, 0> // expected-error@+1 {{operand type 'memref<*xf32>' and result type 'memref<*xf32>' are cast incompatible}} %2 = memref_cast %1 : memref<*xf32, 0> to memref<*xf32, 0> return } // ----- +func @atomic_rmw_idxs_rank_mismatch(%I: memref<16x10xf32>, %i : index, %val : f32) { + // expected-error@+1 {{expects the number of subscripts to be equal to memref rank}} + %x = atomic_rmw "addf" %val, %I[%i] : (f32, memref<16x10xf32>) -> f32 + return +} + +// ----- + +func @atomic_rmw_expects_float(%I: memref<16x10xi32>, %i : index, %val : i32) { + // expected-error@+1 {{expects a floating-point type}} + %x = atomic_rmw "addf" %val, %I[%i, %i] : (i32, memref<16x10xi32>) -> i32 + return +} + +// ----- + +func @atomic_rmw_expects_int(%I: memref<16x10xf32>, %i : index, %val : f32) { + // expected-error@+1 {{expects an integer type}} + %x = atomic_rmw "addi" %val, %I[%i, %i] : (f32, memref<16x10xf32>) -> f32 + return +} + +// ----- + // alignment is not power of 2. func @assume_alignment(%0: memref<4x4xf16>) { // expected-error@+1 {{alignment must be power of 2}} std.assume_alignment %0, 12 : memref<4x4xf16> return } // ----- // 0 alignment value. func @assume_alignment(%0: memref<4x4xf16>) { // expected-error@+1 {{'std.assume_alignment' op attribute 'alignment' failed to satisfy constraint: positive 32-bit integer attribute}} std.assume_alignment %0, 0 : memref<4x4xf16> return }