diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h --- a/flang/include/flang/Optimizer/Transforms/Passes.h +++ b/flang/include/flang/Optimizer/Transforms/Passes.h @@ -20,6 +20,7 @@ class Operation; class Pass; class Region; +class ModuleOp; } // namespace mlir namespace fir { @@ -72,7 +73,8 @@ std::unique_ptr createAlgebraicSimplificationPass(const mlir::GreedyRewriteConfig &config); std::unique_ptr createPolymorphicOpConversionPass(); - +std::unique_ptr> +createOMPEarlyOutliningPass(); // declarative passes #define GEN_PASS_REGISTRATION #include "flang/Optimizer/Transforms/Passes.h.inc" diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td --- a/flang/include/flang/Optimizer/Transforms/Passes.td +++ b/flang/include/flang/Optimizer/Transforms/Passes.td @@ -298,4 +298,17 @@ let dependentDialects = [ "fir::FIROpsDialect" ]; } +def OMPEarlyOutliningPass + : Pass<"omp-early-target-outlining", "mlir::ModuleOp"> { + let summary = "Outlines all target ops into separate functions"; + let description = [{ + This pass outlines all omp.target operations into individual functions. + It is invoked in the front end after the initial FIR has been constructed. + This pass is only needed when compiling for the target device to prevent + the optimizer to perform transforms across target region boundaries. + }]; + let constructor = "::fir::createOMPEarlyOutliningPass()"; + let dependentDialects = ["mlir::omp::OpenMPDialect"]; +} + #endif // FLANG_OPTIMIZER_TRANSFORMS_PASSES diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -23,6 +23,7 @@ #include "flang/Optimizer/Dialect/Support/KindMapping.h" #include "flang/Optimizer/Support/InitFIR.h" #include "flang/Optimizer/Support/Utils.h" +#include "flang/Optimizer/Transforms/Passes.h" #include "flang/Parser/dump-parse-tree.h" #include "flang/Parser/parsing.h" #include "flang/Parser/provenance.h" @@ -299,6 +300,20 @@ // run the default passes. mlir::PassManager pm((*mlirModule)->getName(), mlir::OpPassManager::Nesting::Implicit); + // Add OpenMP-related passes + // WARNING: These passes must be run immediately after the lowering to ensure + // that the FIR is correct with respect to OpenMP operations/attributes. + if (ci.getInvocation().getFrontendOpts().features.IsEnabled( + Fortran::common::LanguageFeature::OpenMP)) { + bool isDevice = false; + if (auto offloadMod = llvm::dyn_cast( + mlirModule->getOperation())) + isDevice = offloadMod.getIsTargetDevice(); + + if (isDevice) + pm.addPass(fir::createOMPEarlyOutliningPass()); + } + pm.enableVerifier(/*verifyPasses=*/true); pm.addPass(std::make_unique()); diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -4251,6 +4251,10 @@ void eraseDeadCodeAndBlocks(mlir::RewriterBase &rewriter, llvm::MutableArrayRef regions) { + // WARNING: Do not add passes that can do folding or code motion here + // because they might cross omp.target region boundaries, which can result + // in incorrect code. Optimization passes like these must be added after + // OMP early outlining has been done. (void)mlir::eraseUnreachableBlocks(rewriter, regions); (void)mlir::runRegionDCE(rewriter, regions); } diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt --- a/flang/lib/Optimizer/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt @@ -16,6 +16,7 @@ AddDebugFoundation.cpp PolymorphicOpConversion.cpp LoopVersioning.cpp + OMPEarlyOutlining.cpp DEPENDS FIRDialect diff --git a/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp b/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp --- a/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp +++ b/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp @@ -44,6 +44,20 @@ return result.second.name; } +/// Update the early outlining parent name +void updateEarlyOutliningParentName(mlir::func::FuncOp funcOp, + bool appendUnderscore) { + if (auto earlyOutlineOp = llvm::dyn_cast( + funcOp.getOperation())) { + auto oldName = earlyOutlineOp.getParentName(); + if (oldName != "") { + auto dName = fir::NameUniquer::deconstruct(oldName); + std::string newName = mangleExternalName(dName, appendUnderscore); + earlyOutlineOp.setParentName(newName); + } + } +} + //===----------------------------------------------------------------------===// // Rewrite patterns //===----------------------------------------------------------------------===// @@ -76,6 +90,7 @@ mlir::SymbolTable::setSymbolName(op, newSymbol); } + updateEarlyOutliningParentName(op, appendUnderscore); rewriter.finalizeRootUpdate(op); return ret; } diff --git a/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp b/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp new file mode 100644 --- /dev/null +++ b/flang/lib/Optimizer/Transforms/OMPEarlyOutlining.cpp @@ -0,0 +1,122 @@ +#include "flang/Optimizer/Dialect/FIRDialect.h" +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/Support/InternalNames.h" +#include "flang/Optimizer/Transforms/Passes.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/Dialect/OpenMP/OpenMPDialect.h" +#include "mlir/IR/BuiltinDialect.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/IRMapping.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/SymbolTable.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Transforms/RegionUtils.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" + +namespace fir { +#define GEN_PASS_DEF_OMPEARLYOUTLININGPASS +#include "flang/Optimizer/Transforms/Passes.h.inc" +} // namespace fir + +namespace { +class OMPEarlyOutliningPass + : public fir::impl::OMPEarlyOutliningPassBase { + + std::string getOutlinedFnName(llvm::StringRef parentName, unsigned count) { + return std::string(parentName) + "_omp_outline_" + std::to_string(count); + } + + mlir::func::FuncOp outlineTargetOp(mlir::OpBuilder &builder, + mlir::omp::TargetOp &targetOp, + mlir::func::FuncOp &parentFunc, + unsigned count) { + // Collect inputs + llvm::SetVector inputs; + for (auto operand : targetOp.getOperation()->getOperands()) + inputs.insert(operand); + + mlir::Region &targetRegion = targetOp.getRegion(); + mlir::getUsedValuesDefinedAbove(targetRegion, inputs); + + // Create new function and initialize + mlir::FunctionType funcType = builder.getFunctionType( + mlir::TypeRange(inputs.getArrayRef()), mlir::TypeRange()); + std::string parentName(parentFunc.getName()); + std::string funcName = getOutlinedFnName(parentName, count); + auto loc = targetOp.getLoc(); + mlir::func::FuncOp newFunc = + mlir::func::FuncOp::create(loc, funcName, funcType); + mlir::Block *entryBlock = newFunc.addEntryBlock(); + builder.setInsertionPointToStart(entryBlock); + mlir::ValueRange newInputs = entryBlock->getArguments(); + + // Set the declare target information, the outlined function + // is always a host function. + if (auto parentDTOp = llvm::dyn_cast( + parentFunc.getOperation())) + if (auto newDTOp = llvm::dyn_cast( + newFunc.getOperation())) + newDTOp.setDeclareTarget(mlir::omp::DeclareTargetDeviceType::host, + parentDTOp.getDeclareTargetCaptureClause()); + + // Set the early outlining interface parent name + if (auto earlyOutlineOp = + llvm::dyn_cast( + newFunc.getOperation())) + earlyOutlineOp.setParentName(parentName); + + // Create input map from inputs to function parameters. + mlir::IRMapping valueMap; + for (auto InArg : llvm::zip(inputs, newInputs)) + valueMap.map(std::get<0>(InArg), std::get<1>(InArg)); + + // Clone the target op into the new function + builder.clone(*(targetOp.getOperation()), valueMap); + + // Create return op + builder.create(loc); + + return newFunc; + } + + void outlineTargetOps(mlir::OpBuilder &builder, + mlir::func::FuncOp &functionOp, + mlir::ModuleOp &moduleOp, + llvm::SmallVectorImpl &newFuncs) { + unsigned count = 0; + for (auto TargetOp : functionOp.getOps()) { + mlir::func::FuncOp outlinedFunc = + outlineTargetOp(builder, TargetOp, functionOp, count); + newFuncs.push_back(outlinedFunc); + count++; + } + } + + void runOnOperation() override { + mlir::ModuleOp moduleOp = getOperation(); + mlir::MLIRContext *context = &getContext(); + mlir::OpBuilder builder(context); + llvm::SmallVector newFuncs; + + for (auto functionOp : + llvm::make_early_inc_range(moduleOp.getOps())) { + outlineTargetOps(builder, functionOp, moduleOp, newFuncs); + functionOp.erase(); + } + + for (auto newFunc : newFuncs) + moduleOp.push_back(newFunc); + } +}; + +} // namespace + +namespace fir { +std::unique_ptr> +createOMPEarlyOutliningPass() { + return std::make_unique(); +} +} // namespace fir diff --git a/flang/test/Driver/omp-cse-region-boundary.f90 b/flang/test/Driver/omp-cse-region-boundary.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Driver/omp-cse-region-boundary.f90 @@ -0,0 +1,26 @@ +!This test checks that when compiling an OpenMP program for the target device +!CSE is not done across target op region boundaries. It also checks that when +!compiling for the host CSE is done. +!RUN: %flang_fc1 -fopenmp-is-device -emit-mlir -fopenmp %s -o - | fir-opt -cse | FileCheck %s -check-prefix=CHECK-DEVICE +!RUN: %flang_fc1 -emit-mlir -fopenmp %s -o - | fir-opt -cse | FileCheck %s -check-prefix=CHECK-HOST + +!Constant should be present inside target region. +!CHECK-DEVICE: omp.target +!CHECK-DEVICE: arith.constant 10 +!CHECK-DEVICE: omp.terminator + +!Constant should not be present inside target region. +!CHECK-HOST: omp.target +!CHECK-NOT-HOST: arith.constant 10 +!CHECK-HOST: omp.terminator + +subroutine writeIndex(sum) + integer :: sum + integer :: myconst1 + integer :: myconst2 + myconst1 = 10 +!$omp target map(from:new_len) + myconst2 = 10 +!$omp end target + sum = myconst2 + myconst2 +end subroutine writeIndex diff --git a/flang/test/Fir/external-mangling.fir b/flang/test/Fir/external-mangling.fir --- a/flang/test/Fir/external-mangling.fir +++ b/flang/test/Fir/external-mangling.fir @@ -89,3 +89,12 @@ // LLVMIR-NOUNDER: llvm.call @callee() : () -> () // LLVMIR-NOUNDER: llvm.call @callee() : () -> () + +// ----- + +func.func @_QPwriteindex_omp_outline_0() attributes {omp.outline_parent_name = "_QPwriteindex"} { + return +} + +// CHECK-UNDER: attributes {omp.outline_parent_name = "writeindex_"} +// CHECK-NOUNDER: attributes {omp.outline_parent_name = "writeindex"} diff --git a/flang/test/Lower/OpenMP/omp-target-early-outlining.f90 b/flang/test/Lower/OpenMP/omp-target-early-outlining.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/OpenMP/omp-target-early-outlining.f90 @@ -0,0 +1,33 @@ +!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-fir -fopenmp -fopenmp-is-device %s -o - | FileCheck %s +!RUN: %flang_fc1 -triple x86_64-unknown-linux-gnu -emit-fir -fopenmp -fopenmp-is-device %s -o - | FileCheck %s + +!CHECK: func.func @_QPwrite_index_omp_outline_0(%[[ARG0:.*]]: !fir.ref) attributes {omp.declare_target = #omp.declaretarget, omp.outline_parent_name = "_QPwrite_index"} { +!CHECK-NEXT: omp.target {{.*}} { +!CHECK: %[[CONSTANT_VALUE_10:.*]] = arith.constant 10 : i32 +!CHECK: fir.store %[[CONSTANT_VALUE_10]] to %[[ARG0]] : !fir.ref +!CHECK: omp.terminator +!CHECK-NEXT: } +!CHECK-NEXT: return + +!CHECK: func.func @_QPwrite_index_omp_outline_1(%[[ARG1:.*]]: !fir.ref) attributes {omp.declare_target = #omp.declaretarget, omp.outline_parent_name = "_QPwrite_index"} { +!CHECK-NEXT: omp.target {{.*}} { +!CHECK: %[[CONSTANT_VALUE_20:.*]] = arith.constant 20 : i32 +!CHECK: fir.store %[[CONSTANT_VALUE_20]] to %[[ARG1]] : !fir.ref +!CHECK: omp.terminator +!CHECK-NEXT: } +!CHECK-NEXT: return + + +SUBROUTINE WRITE_INDEX(INT_ARRAY) + INTEGER :: INT_ARRAY(*) + INTEGER :: NEW_LEN +!$omp target map(from:new_len) + NEW_LEN = 10 +!$omp end target +!$omp target map(from:new_len) + NEW_LEN = 20 +!$omp end target + do INDEX_ = 1, NEW_LEN + INT_ARRAY(INDEX_) = INDEX_ + end do +end subroutine WRITE_INDEX diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPInterfaces.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPInterfaces.h --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPInterfaces.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPInterfaces.h @@ -36,6 +36,11 @@ : public DeclareTargetInterface::ExternalModel, T> {}; +template +struct EarlyOutliningDefaultModel + : public EarlyOutliningInterface::ExternalModel< + EarlyOutliningDefaultModel, T> {}; + } // namespace mlir::omp #endif // MLIR_DIALECT_OPENMP_OPENMPINTERFACES_H_ diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td @@ -262,4 +262,44 @@ ]; } +def EarlyOutliningInterface : OpInterface<"EarlyOutliningInterface"> { + let description = [{ + FuncOps that are a result of early outlining should have this interface. + }]; + + let cppNamespace = "::mlir::omp"; + + let methods = [ + InterfaceMethod< + /*description=*/[{ + Set a StringAttr on an outlined target region function containing the + name of the parent function where the target region was outlined + from. The parent name is used to construct the kernel names for target + regions. + }], + /*retTy=*/"void", + /*methodName=*/"setParentName", + (ins "std::string":$parentName), [{}], [{ + $_op->setAttr( + mlir::StringAttr::get($_op->getContext(), + llvm::Twine{"omp.outline_parent_name"}), + mlir::StringAttr::get($_op->getContext(), parentName)); + }]>, + + InterfaceMethod< + /*description=*/[{ + Returns the parent function name from where the target op was outlined + from. If it doesn't exist it returns an empty string. + }], + /*retTy=*/"llvm::StringRef", + /*methodName=*/"getParentName", + (ins), [{}], [{ + if (Attribute parentName = $_op->getAttr("omp.outline_parent_name")) + if (::llvm::isa(parentName)) + return ::llvm::dyn_cast(parentName).getValue(); + return {}; + }]> + ]; +} + #endif // OpenMP_OPS_INTERFACES diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -89,6 +89,13 @@ *getContext()); mlir::func::FuncOp::attachInterface< mlir::omp::DeclareTargetDefaultModel>(*getContext()); + + // Attach default early outlining interface to func ops. + mlir::func::FuncOp::attachInterface< + mlir::omp::EarlyOutliningDefaultModel>(*getContext()); + mlir::LLVM::LLVMFuncOp::attachInterface< + mlir::omp::EarlyOutliningDefaultModel>( + *getContext()); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1644,6 +1644,14 @@ llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); StringRef parentName = opInst.getParentOfType().getName(); + + // Override parent name if early outlining function + if (auto earlyOutlineOp = llvm::dyn_cast( + opInst.getParentOfType().getOperation())) { + llvm::StringRef outlineParentName = earlyOutlineOp.getParentName(); + parentName = outlineParentName.empty() ? parentName : outlineParentName; + } + llvm::TargetRegionEntryInfo entryInfo; if (!getTargetEntryUniqueInfo(entryInfo, targetOp, parentName)) diff --git a/mlir/test/Dialect/OpenMP/attr.mlir b/mlir/test/Dialect/OpenMP/attr.mlir --- a/mlir/test/Dialect/OpenMP/attr.mlir +++ b/mlir/test/Dialect/OpenMP/attr.mlir @@ -136,3 +136,11 @@ %0 = llvm.mlir.constant(1 : i32) : i32 llvm.return %0 : i32 } + +// ---- + +// CHECK-LABEL: func @_QPwriteindex_omp_outline_0 +// CHECK-SAME: {{.*}} attributes {omp.outline_parent_name = "QPwriteindex"} { +func.func @_QPwriteindex_omp_outline_0() attributes {omp.outline_parent_name = "QPwriteindex"} { + return +} diff --git a/mlir/test/Target/LLVMIR/omptarget-region-llvm-target-device.mlir b/mlir/test/Target/LLVMIR/omptarget-region-llvm-target-device.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-region-llvm-target-device.mlir @@ -0,0 +1,18 @@ +// This test checks that the name of the generated kernel function is using the +// name stored in the omp.outline_parent_name attribute. +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +module attributes {omp.is_device = true} { + llvm.func @writeindex_omp_outline_0_(%arg0: !llvm.ptr, %arg1: !llvm.ptr) attributes {omp.outline_parent_name = "writeindex_"} { + omp.target map((from -> %arg0 : !llvm.ptr), (implicit -> %arg1: !llvm.ptr)) { + %0 = llvm.mlir.constant(20 : i32) : i32 + %1 = llvm.mlir.constant(10 : i32) : i32 + llvm.store %1, %arg0 : !llvm.ptr + llvm.store %0, %arg1 : !llvm.ptr + omp.terminator + } + llvm.return + } +} + +// CHECK: define {{.*}} void @__omp_offloading_{{.*}}_{{.*}}_writeindex__l7(ptr {{.*}}, ptr {{.*}}) {