diff --git a/mlir/include/mlir/IR/OpCluster.h b/mlir/include/mlir/IR/OpCluster.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/IR/OpCluster.h @@ -0,0 +1,264 @@ +//===- OpCluster.h - MLIR Clustering Class ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Take a Series of topologically sorted Operators in the Block and cluster +// them together performing hoisting and clustering into a specific Op which +// is at a higher level in the IR hierarchy +//===----------------------------------------------------------------------===// + +#ifndef MLIR_IR_CLUSTER_H +#define MLIR_IR_CLUSTER_H + +#include "mlir/IR/AsmState.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/Diagnostics.h" +#include "mlir/IR/Dialect.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/IR/OpConnectivity.h" +#include "mlir/Support/FileUtilities.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Support/MlirOptMain.h" +#include "mlir/Support/ToolUtilities.h" + +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/StringSaver.h" +#include "llvm/Support/ToolOutputFile.h" + +#include + +using namespace std; +using namespace mlir; + +namespace mlir { + +#define TYPICAL_NO_INPUTS 4 +#define TYPICAL_NO_OUTPUTS 4 + +int findBlockArgumentIndex(bool block_or_op, Block *external_block, + Operation *external_op, + unsigned int external_block_op_index, + std::vector &arguments_origin); + +mlir::BlockAndValueMapping +calculate_input_list(Operation *op_old, std::vector &ops, + Block *block_instruction, + std::vector &arguments_origin, + std::map &cache_subgraph); + +template +Operation *Cluster(std::vector &ops, + llvm::iplist::iterator insertClusterOp, + Block &block, mlir::MLIRContext *context) { + Operation *clusterOp; + + /* Cluster Inputs */ + SmallVector cluster_input_list; + SmallVector cluster_input_list_type; + std::vector input_arguments_origin; + for (Operation *op : ops) { + std::vector producers = ExtractProducers(op); + for (auto producer : producers) { + if (producer.is_producer_block_or_operation() == + producer_type::block_op) { + if (findBlockArgumentIndex(true, producer.block_external_operation, + nullptr, producer.external_slot_index, + input_arguments_origin) < + 0) /* block & block index not added Avoiding multiple registrations + of the value */ + { + cluster_input_list.push_back(producer.value); + ArgumentInfo bao; + bao.block_or_op = true; + bao.external_block = producer.block_external_operation; + bao.external_op = nullptr; + bao.external_block_op_index = producer.external_slot_index; + bao.block_index = input_arguments_origin.size(); + input_arguments_origin.push_back(bao); + } + } else if (std::find(ops.begin(), ops.end(), + producer.external_operation) == ops.end()) { + if (findBlockArgumentIndex(false, nullptr, producer.external_operation, + producer.external_slot_index, + input_arguments_origin) < + 0) /* defining_op * slot not added Avoiding multiple registrations + of the value */ + { + cluster_input_list.push_back(producer.value); + ArgumentInfo bao; + bao.block_or_op = false; + bao.external_block = nullptr; + bao.external_op = producer.external_operation; + bao.external_block_op_index = producer.external_slot_index; + bao.block_index = input_arguments_origin.size(); + input_arguments_origin.push_back(bao); + } + } + } + } + for (auto input : cluster_input_list) + cluster_input_list_type.push_back(input.getType()); + + /* Cluster Outputs */ + SmallVector cluster_output_list; + std::vector output_arguments_origin; + for (Operation *op : ops) { + std::vector consumers = ExtractConsumers(op); + for (auto connection : consumers) { + if (std::find(ops.begin(), ops.end(), connection.external_operation) == + ops.end()) { + if (findBlockArgumentIndex(false, nullptr, connection.operation, + connection.operation_slot_index, + output_arguments_origin) < + 0) /* op & slot not added Avoiding multiple registrations of the + value */ + { + cluster_output_list.push_back(connection.value); + ArgumentInfo bao; + bao.block_or_op = false; + bao.external_block = nullptr; + bao.external_op = connection.operation; + bao.external_block_op_index = connection.operation_slot_index; + bao.block_index = output_arguments_origin.size(); + output_arguments_origin.push_back(bao); + } + } + } + } + + /* + Create a cluster instruction + Add the Cluster Instruction to the a specific place in the Basic Block + */ + OpBuilder op_builder(context); + mlir::Location loc = op_builder.getUnknownLoc(); + + /* + Make sure the NPU inputs and outputs are the correct type forcing them to + Ranked Tensor where needed Casts are inserted lower in the code + */ + SmallVector cluster_output_list_type; + for (auto output : cluster_output_list) { + if (output.getType().dyn_cast()) { + auto type_original = output.getType().dyn_cast(); + mlir::RankedTensorType rt = RankedTensorType::get( + type_original.getShape(), type_original.getElementType()); + cluster_output_list_type.push_back(rt); + } else { + assert(false && "cluster.cc: While Clustering a Npu Region the input is " + "not a known type"); + } + } + + for (auto v : cluster_input_list) + if (not(v.getType().dyn_cast())) + assert(false && "cluster.cc: While Clustering a Npu Region the input is " + "not a known type"); + + op_builder.setInsertionPoint(&block, insertClusterOp); + clusterOp = op_builder.create(loc, cluster_output_list_type, + cluster_input_list); + + Region ®ion_instruction = clusterOp->getRegion(0); + Block *block_instruction = op_builder.createBlock( + ®ion_instruction, region_instruction.begin(), cluster_input_list_type); + + /* + 2. Deep Clone operations into cluster new Basic Block + */ + + llvm::iplist::iterator insertClonedOp(block_instruction->end()); + op_builder.setInsertionPoint(block_instruction, insertClonedOp); + + SmallVector terminate_input_list; + std::map cache_subgraph; + + // Clone the Ops + for (auto op_old : ops) { + /* + Create Operand Mapping + */ + mlir::BlockAndValueMapping operand_mapping = calculate_input_list( + op_old, ops, block_instruction, input_arguments_origin, cache_subgraph); + + /* + Clone Op + */ + Operation *op_new = op_builder.clone(*op_old, operand_mapping); + + /* + If its output are going outside of the subgraph or it is Last Op of the + the subgraph they must sink into the Yield Op (terminate_input_list) 0. A + Result value from op_new could not be going outside the Subgraph. [do not + add] + 1. A Result value from op_new could be going outside the Subgraph + only [Okay we add it once] + 2. A Result value from op_new could be going outside the Subgraph + and inside the subgraph [Okay we add it once] + 3. A Result value frm op_new could be going outside the Subgraph to + N seperate nodex [Make sure we do not add multiple times] + */ + for (unsigned int i = 0; i < op_old->getNumResults(); i++) { + mlir::Value v = op_old->getResult(i); + /* Is this Result every consumed outside the Subgraph */ + for (mlir::Operation *user_op : v.getUsers()) { + if (std::find(ops.begin(), ops.end(), user_op) == ops.end()) { + terminate_input_list.push_back(op_new->getResult(i)); + break; + } + } + } + + /* + Store in Cache + */ + cache_subgraph[op_old] = op_new; + } + + // Insert a Yield Command + /* Generating Clusters with no output is not a use case at the moment so + * leaving this enabled */ + assert(terminate_input_list.size() != 0 && + "cluster_ops.cc::terminate_input_list.size() == 0"); + Operation *op_terminate = + op_builder.create(loc, terminate_input_list); + + /* + 3. Update the connections (consumers) that used to connect to + the operations that moved into clusterOp + Connect clusterOp to it consumers + This will automatically also update the final scheduleir.yield + Needs to be done after cloning of Ops so we can detect the + terminate_input_list + */ + unsigned int index = 0; + for (auto v : cluster_output_list) + v.replaceAllUsesWith(clusterOp->getResult(index++)); + + /* 4. Delete the Operations from original block: They all have been cloned + into the new instruction region and are not needed to be referenced any + more Also consumers have been updated + */ + for (auto op_old : ops) { + op_old->dropAllReferences(); + op_old->dropAllDefinedValueUses(); + op_old->erase(); /* Remove this operation from its parent block and delete + it */ + } + + return clusterOp; +} + +} // namespace mlir + +#endif // MLIR_IR_CLUSTER_H diff --git a/mlir/include/mlir/IR/OpConnectivity.h b/mlir/include/mlir/IR/OpConnectivity.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/IR/OpConnectivity.h @@ -0,0 +1,138 @@ +//===- OpConnectivity.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Utility functions to get the prodcuers and consumer operations of a given +// operation +//===----------------------------------------------------------------------===// + +#ifndef MLIR_IR_CONNECTIVITY_H +#define MLIR_IR_CONNECTIVITY_H + +#include "mlir/Dialect/Quant/QuantOps.h" +#include "mlir/Dialect/Quant/QuantTypes.h" +#include "mlir/IR/AffineMap.h" // from @llvm-project +#include "mlir/IR/Attributes.h" // from @llvm-project +#include "mlir/IR/BlockAndValueMapping.h" // from @llvm-project +#include "mlir/IR/Builders.h" // from @llvm-project +#include "mlir/IR/Location.h" // from @llvm-project +#include "mlir/IR/MLIRContext.h" // from @llvm-project +#include "mlir/IR/Operation.h" // from @llvm-project +#include "mlir/IR/PatternMatch.h" // from @llvm-project +#include "mlir/Interfaces/InferTypeOpInterface.h" // from @llvm-project +#include "mlir/Pass/Pass.h" // from @llvm-project +#include "mlir/Transforms/DialectConversion.h" // from @llvm-project + +#include +#include +#include +#include + +using namespace std; + +namespace mlir { + +/* + 0. Consumer/Producer/Block block index + 1. Operation block index + + 2. Consumer/Producer/Block slot index + 3. Operation slot index + + 4. Operation consuming/generating the value + 5. Consumer/Producer operation generating/consuming Value + nullptr if Value is generated by a BlockAttribute + 6. Block generating Value + nullptr if value is produced by an Op + + 7. Value can be a OpResult or a Operand + 8. direction of connection +*/ + +enum connection_direction { operation_2_consumer, producer_2_operation }; + +enum producer_type { block_op, operation_op }; + +struct ArgumentInfo { + bool block_or_op; + Block *external_block; + Operation *external_op; + unsigned int external_block_op_index; + unsigned int block_index; +}; + +class connection { +public: + connection() {} + + connection(unsigned external_slot_index, unsigned operation_slot_index, + Operation *operation, Operation *external_operation, + Block *block_external_operation, Value value, + connection_direction direction) { + this->external_slot_index = external_slot_index; + this->operation_slot_index = operation_slot_index; + this->operation = operation; + this->external_operation = external_operation; + this->block_external_operation = block_external_operation; + this->value = value; + this->direction = direction; + } + + unsigned external_slot_index; + unsigned operation_slot_index; + + Operation *operation; + Operation *external_operation; + Block *block_external_operation; + + Value value; + + connection_direction direction; + + producer_type is_producer_block_or_operation() { + if (this->external_operation) + return operation_op; + else if (this->block_external_operation) + return block_op; + } +}; + +std::vector ExtractConsumers(Value v); +std::vector ExtractConsumers(Operation *i); +std::vector ExtractProducers(Operation *i); + +std::list ExtractUniqueConsumers_Operations(Operation *i); +std::list ExtractUniqueProducers_Operations(Operation *i); + +template +inline std::vector ExtractConsumers_ofType(Operation *i) { + std::vector conn = ExtractConsumers(i); + std::vector conn_filtered; + for (auto c : conn) { + if (llvm::isa(c.external_operation)) + conn_filtered.push_back(c); + } + return conn_filtered; +} + +template +inline std::vector ExtractProducers_ofType(Operation *i) { + std::vector prod = ExtractProducers(i); + std::vector prod_filtered; + for (auto p : prod) { + if (p.is_producer_block_or_operation() == operation_op) { + if (llvm::isa(p.external_operation)) + prod_filtered.push_back(p); + } + } + return prod_filtered; +} + +SmallVector arrayref_2_smallvector(ArrayRef s); + +} // namespace mlir + +#endif // MLIR_IR_CONNECTIVITY_H diff --git a/mlir/lib/IR/CMakeLists.txt b/mlir/lib/IR/CMakeLists.txt --- a/mlir/lib/IR/CMakeLists.txt +++ b/mlir/lib/IR/CMakeLists.txt @@ -31,6 +31,8 @@ Value.cpp Verifier.cpp Visitors.cpp + OpCluster.cpp + OpConnectivity.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/IR diff --git a/mlir/lib/IR/OpCluster.cpp b/mlir/lib/IR/OpCluster.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/IR/OpCluster.cpp @@ -0,0 +1,89 @@ +//===- OpCluster.cpp - MLIR Clustering Class ----------------------*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Take a Series of topologically sorted Operators in the Block and cluster +// them together performing hoisting and clustering into a specific Op which +// is at a higher level in the IR hierarchy +//===----------------------------------------------------------------------===// + +#include "mlir/IR/OpCluster.h" + +namespace mlir { + +int findBlockArgumentIndex(bool block_or_op, Block *external_block, + Operation *external_op, + unsigned int external_block_op_index, + std::vector &arguments_origin) { + for (auto &bao : arguments_origin) { + if ((bao.block_or_op == block_or_op) and + (bao.external_block == external_block) and + (bao.external_op == external_op) and + (bao.external_block_op_index == external_block_op_index)) + return bao.block_index; + } + return -1; +} + +mlir::BlockAndValueMapping calculate_input_list( + Operation *op_old, std::vector &subgraph_vertices, + Block *block_instruction, std::vector &arguments_origin, + std::map &cache_subgraph) { + /* For the op_old which is ging to get cloned each value needs to be replaced + * by a value from the new_ops */ + /* A new Op can either be connected to the outside world through the + * BlockArguments or to a already created Op */ + + mlir::BlockAndValueMapping operand_mapping; + if (op_old->getNumOperands() == 0) + return operand_mapping; + + std::vector producers = ExtractProducers(op_old); + for (auto producer : producers) { + Operation *producer_op = producer.external_operation; + if (producer.is_producer_block_or_operation() == producer_type::block_op) { + /* + ** Value is Coming from a BasicBlock Argument outside the Subgraph + through the cluster block arguments We need to find which block + argument index to use + */ + auto index = findBlockArgumentIndex( + true, producer.block_external_operation, nullptr, + producer.external_slot_index, arguments_origin); + if (index == -1) + assert(false && + "cluster::calculate_input_list()::Block Index not found"); + operand_mapping.map(op_old->getOperand(producer.operation_slot_index), + block_instruction->getArgument(index)); + } else if (not(std::find(subgraph_vertices.begin(), subgraph_vertices.end(), + producer_op) != subgraph_vertices.end())) { + /* + ** Value is Coming from an Op outside the Subgraph through the cluster + block arguments We need to find which block argument to use + */ + auto index = findBlockArgumentIndex( + false, nullptr, producer.external_operation, + producer.external_slot_index, arguments_origin); + if (index == -1) + assert(false && + "cluster::calculate_input_list()::Block Index not found"); + operand_mapping.map(op_old->getOperand(producer.operation_slot_index), + block_instruction->getArgument(index)); + } else if (std::find(subgraph_vertices.begin(), subgraph_vertices.end(), + producer_op) != subgraph_vertices.end()) { + /* + ** Value is Coming from an Op within the Subgraph + Use the Cache Subgraph to find the Op + */ + operand_mapping.map(op_old->getOperand(producer.operation_slot_index), + cache_subgraph[producer_op]->getResult(0)); + } + } + return operand_mapping; +} + +} // namespace mlir diff --git a/mlir/lib/IR/OpConnectivity.cpp b/mlir/lib/IR/OpConnectivity.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/IR/OpConnectivity.cpp @@ -0,0 +1,149 @@ +//===- OpConnectivity.cpp ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Utility functions to get the prodcuers and consumer operations of a given +// operation +//===----------------------------------------------------------------------===// + +#include "mlir/IR/OpConnectivity.h" + +namespace mlir { + +/* + Returns Detailed Information of the Consumers of an Value + If a Value goes to the same Operation more than once this is + treated as multiple individual connections + from the value to the operation +*/ +std::vector ExtractConsumers(Value v) { + std::vector consumers; + std::set unique_consumers; + for (Operation *consumer : v.getUsers()) { + unique_consumers.insert(consumer); + } + for (Operation *consumer : unique_consumers) { + unsigned external_slot_index; + for (int ope = 0; ope < consumer->getNumOperands(); ope++) { + Value v1 = consumer->getOperand(ope); + if (v1 == v) { + connection oper_val(ope, -1, nullptr, consumer, nullptr, v, + operation_2_consumer); + consumers.push_back(oper_val); + } + } + } + return consumers; +} + +/* + Returns Detailed Information of the Consumers of an Operation + If a Value goes to the same Operation more than once this is + treated as multiple individual connections + from the operation to the operation +*/ +std::vector ExtractConsumers(Operation *i) { + std::vector consumers; + unsigned no_results = i->getNumResults(); + for (unsigned int r = 0; r < no_results; r++) { + OpResult result = i->getResult(r); + std::set unique_consumers; + for (Operation *consumer : result.getUsers()) { + unique_consumers.insert(consumer); + } + for (Operation *consumer : unique_consumers) { + for (int ope = 0; ope < consumer->getNumOperands(); ope++) { + Value v1 = consumer->getOperand(ope); + if (v1 == result) { + connection oper_val(ope, r, i, consumer, nullptr, result, + operation_2_consumer); + consumers.push_back(oper_val); + } + } + } + } + return consumers; +} + +/* + Returns Detailed Information of the Producers of an Operation + If a Value comes from the same Operation and more than once this is + treated as multiple individual connections + from the operation to the operation +*/ +std::vector ExtractProducers(Operation *i) { + std::vector producers; + for (unsigned int p = 0; p < i->getNumOperands(); p++) { + Value input = i->getOperand(p); + Operation *op_con = input.getDefiningOp(); + if (op_con != nullptr) { + /* Producer is from an Op */ + OpResult result = input.cast(); + Operation *producer = result.getOwner(); + unsigned external_slot_index = 9999999; + for (int ope = 0; ope < producer->getNumResults(); ope++) { + Value v1 = producer->getResult(ope); + if (v1 == result) { + external_slot_index = ope; + break; + } + } + assert(external_slot_index != 9999999 && + "ExtractProducers::Operation Operand Index not found"); + unsigned operation_slot_index = p; + connection oper_val(external_slot_index, operation_slot_index, i, + producer, nullptr, input, producer_2_operation); + producers.push_back(oper_val); + } else { + BlockArgument block_argument = input.cast(); + Block *producer = block_argument.getOwner(); + unsigned external_slot_index = 9999999; + for (int ope = 0; ope < producer->getNumArguments(); ope++) { + BlockArgument v1 = producer->getArgument(ope); + if (v1 == block_argument) { + external_slot_index = ope; + break; + } + } + assert(external_slot_index != 9999999 && + "ExtractProducers::Block Operand Index not found"); + unsigned operation_slot_index = p; + connection oper_val(external_slot_index, operation_slot_index, i, nullptr, + producer, input, producer_2_operation); + producers.push_back(oper_val); + } + } + return producers; +} + +std::list ExtractUniqueConsumers_Operations(Operation *i) { + std::vector conn = ExtractConsumers(i); + std::list conn_filtered; + for (auto c : conn) { + conn_filtered.push_back(c.external_operation); + } + return conn_filtered; +} + +std::list ExtractUniqueProducers_Operations(Operation *i) { + std::vector prod = ExtractProducers(i); + std::list prod_filtered; + for (auto p : prod) { + if (p.external_operation) { + prod_filtered.push_back(p.external_operation); + } + } + return prod_filtered; +} + +SmallVector arrayref_2_smallvector(ArrayRef s) { + SmallVector values; + for (int i = 0; i < s.size(); i++) + values.push_back(s[i]); + return values; +} + +} // namespace mlir