diff --git a/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp b/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp --- a/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp +++ b/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp @@ -29,18 +29,112 @@ return std::string(parentName) + "_omp_outline_" + std::to_string(count); } + // Given a value this function will iterate over an operators results + // and return the relevant index for the result the value corresponds to. + // There may be a simpler way to do this however. + unsigned getResultIndex(mlir::Value value, mlir::Operation *op) { + for (unsigned i = 0; i < op->getNumResults(); ++i) { + if (op->getResult(i) == value) + return i; + } + return 0; + } + + bool isDeclareTargetOp(mlir::Operation *op) { + if (fir::AddrOfOp addressOfOp = mlir::dyn_cast(op)) + if (fir::GlobalOp gOp = mlir::dyn_cast( + addressOfOp->getParentOfType().lookupSymbol( + addressOfOp.getSymbol()))) + if (auto declareTargetGlobal = + llvm::dyn_cast( + gOp.getOperation())) + if (declareTargetGlobal.isDeclareTarget()) + return true; + return false; + } + + // Currently used for cloning arguments that are nested. Should be + // extendable where required, perhaps via operation + // specialisation/overloading, if something needs specialised handling. + // NOTE: Results in duplication of some values that would otherwise be + // a single SSA value shared between operations, this is tidied up on + // lowering to some extent. + mlir::Operation * + cloneArgAndChildren(mlir::OpBuilder &builder, mlir::Operation *op, + llvm::SetVector &inputs, + mlir::Block::BlockArgListType &newInputs) { + mlir::IRMapping valueMap; + for (auto opValue : op->getOperands()) { + if (opValue.getDefiningOp()) { + auto resIdx = getResultIndex(opValue, opValue.getDefiningOp()); + valueMap.map(opValue, + cloneArgAndChildren(builder, opValue.getDefiningOp(), + inputs, newInputs) + ->getResult(resIdx)); + } else { + for (auto inArg : llvm::zip(inputs, newInputs)) { + if (opValue == std::get<0>(inArg)) + valueMap.map(opValue, std::get<1>(inArg)); + } + } + } + + return builder.clone(*op, valueMap); + } + + void cloneMapOpVariables(mlir::OpBuilder &builder, mlir::IRMapping &valueMap, + mlir::IRMapping &mapInfoMap, + llvm::SetVector &inputs, + mlir::Block::BlockArgListType &newInputs, + mlir::Value varPtr) { + if (fir::BoxAddrOp boxAddrOp = + mlir::dyn_cast_if_present(varPtr.getDefiningOp())) { + mlir::Value newV = + cloneArgAndChildren(builder, boxAddrOp, inputs, newInputs) + ->getResult(0); + mapInfoMap.map(varPtr, newV); + valueMap.map(boxAddrOp, newV); + return; + } + + if (varPtr.getDefiningOp() && isDeclareTargetOp(varPtr.getDefiningOp())) { + fir::AddrOfOp addrOp = + mlir::dyn_cast(varPtr.getDefiningOp()); + mlir::Value newV = builder.clone(*addrOp)->getResult(0); + mapInfoMap.map(varPtr, newV); + valueMap.map(addrOp, newV); + return; + } + + for (auto inArg : llvm::zip(inputs, newInputs)) { + if (varPtr == std::get<0>(inArg)) + mapInfoMap.map(varPtr, std::get<1>(inArg)); + } + } + mlir::func::FuncOp outlineTargetOp(mlir::OpBuilder &builder, mlir::omp::TargetOp &targetOp, mlir::func::FuncOp &parentFunc, unsigned count) { + // NOTE: once implicit captures are handled appropriately in the initial + // PFT lowering if it is possible, we can remove the usage of + // getUsedValuesDefinedAbove and instead just iterate over the target op's + // operands (or just the map arguments) and perhaps refactor this function + // a little. // Collect inputs llvm::SetVector inputs; - for (auto operand : targetOp.getOperation()->getOperands()) - inputs.insert(operand); - mlir::Region &targetRegion = targetOp.getRegion(); mlir::getUsedValuesDefinedAbove(targetRegion, inputs); + // filter out declareTarget and map entries which are specially handled + // at the moment, so we do not wish these to end up as function arguments + // which would just be more noise in the IR. + for (auto value : inputs) + if (value.getDefiningOp()) + if (mlir::isa(value.getDefiningOp()) || + isDeclareTargetOp(value.getDefiningOp())) + inputs.remove(value); + // Create new function and initialize mlir::FunctionType funcType = builder.getFunctionType( mlir::TypeRange(inputs.getArrayRef()), mlir::TypeRange()); @@ -51,7 +145,7 @@ mlir::func::FuncOp::create(loc, funcName, funcType); mlir::Block *entryBlock = newFunc.addEntryBlock(); builder.setInsertionPointToStart(entryBlock); - mlir::ValueRange newInputs = entryBlock->getArguments(); + mlir::Block::BlockArgListType newInputs = entryBlock->getArguments(); // Set the declare target information, the outlined function // is always a host function. @@ -68,10 +162,47 @@ newFunc.getOperation())) earlyOutlineOp.setParentName(parentName); - // Create input map from inputs to function parameters. + // The value map for the newly generated Target Operation, we must + // remap most of the input. mlir::IRMapping valueMap; - for (auto InArg : llvm::zip(inputs, newInputs)) - valueMap.map(std::get<0>(InArg), std::get<1>(InArg)); + + // Special handling for map, declare target and regular map variables + // are handled slightly differently for the moment, declare target has + // its addressOfOp cloned over, whereas we skip it for the regular map + // variables. We need knowledge of which global is linked to the map + // operation for declare target, whereas we aren't bothered for the + // regular map variables for the moment. We could treat both the same, + // however, cloning across the minimum for the moment to avoid + // optimisations breaking segments of the lowering seems prudent as this + // was the original intent of the pass. + for (auto oper : targetOp.getOperation()->getOperands()) { + if (auto mapEntry = + mlir::dyn_cast(oper.getDefiningOp())) { + mlir::IRMapping mapInfoMap; + for (auto bound : mapEntry.getBounds()) { + if (auto mapEntryBound = mlir::dyn_cast( + bound.getDefiningOp())) { + mapInfoMap.map(bound, cloneArgAndChildren(builder, mapEntryBound, + inputs, newInputs) + ->getResult(0)); + } + } + + cloneMapOpVariables(builder, valueMap, mapInfoMap, inputs, newInputs, + mapEntry.getVarPtr()); + + if (mapEntry.getVarPtrPtr()) + cloneMapOpVariables(builder, valueMap, mapInfoMap, inputs, newInputs, + mapEntry.getVarPtrPtr()); + + valueMap.map( + mapEntry, + builder.clone(*mapEntry.getOperation(), mapInfoMap)->getResult(0)); + } + } + + for (auto inArg : llvm::zip(inputs, newInputs)) + valueMap.map(std::get<0>(inArg), std::get<1>(inArg)); // Clone the target op into the new function builder.clone(*(targetOp.getOperation()), valueMap);