diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h --- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h +++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h @@ -404,6 +404,11 @@ return IfBuilder(op, *this); } + mlir::Value genNot(mlir::Location loc, mlir::Value boolean) { + return create(loc, mlir::arith::CmpIPredicate::eq, + boolean, createBool(loc, false)); + } + /// Generate code testing \p addr is not a null address. mlir::Value genIsNotNullAddr(mlir::Location loc, mlir::Value addr); diff --git a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp --- a/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/ConvertToFIR.cpp @@ -13,6 +13,7 @@ #include "flang/Optimizer/Builder/HLFIRTools.h" #include "flang/Optimizer/Builder/MutableBox.h" #include "flang/Optimizer/Builder/Runtime/Assign.h" +#include "flang/Optimizer/Builder/Runtime/Inquiry.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/FIRDialect.h" #include "flang/Optimizer/Dialect/FIROps.h" @@ -29,6 +30,37 @@ using namespace mlir; +static mlir::Value genAllocatableTempFromSourceBox(mlir::Location loc, + fir::FirOpBuilder &builder, + mlir::Value sourceBox) { + assert(sourceBox.getType().isa() && + "must be a base box type"); + // Use the runtime to make a quick and dirty temp with the rhs value. + // Overkill for scalar rhs that could be done in much more clever ways. + // Note that temp descriptor must have the allocatable flag set so that + // the runtime will allocate it with the shape and type parameters of + // the RHS. + // This has the huge benefit of dealing with all cases, including + // polymorphic entities. + mlir::Type fromHeapType = fir::HeapType::get( + fir::unwrapRefType(sourceBox.getType().cast().getEleTy())); + mlir::Type fromBoxHeapType = fir::BoxType::get(fromHeapType); + auto fromMutableBox = builder.createTemporary(loc, fromBoxHeapType); + mlir::Value unallocatedBox = + fir::factory::createUnallocatedBox(builder, loc, fromBoxHeapType, {}); + builder.create(loc, unallocatedBox, fromMutableBox); + fir::runtime::genAssign(builder, loc, fromMutableBox, sourceBox); + mlir::Value copy = builder.create(loc, fromMutableBox); + return copy; +} + +static std::pair +genTempFromSourceBox(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value sourceBox) { + return {genAllocatableTempFromSourceBox(loc, builder, sourceBox), + /*cleanUpTemp=*/true}; +} + namespace { /// May \p lhs alias with \p rhs? /// TODO: implement HLFIR alias analysis. @@ -65,23 +97,9 @@ auto to = fir::getBase(builder.createBox(loc, lhsExv)); auto from = fir::getBase(builder.createBox(loc, rhsExv)); bool cleanUpTemp = false; - mlir::Type fromHeapType = fir::HeapType::get( - fir::unwrapRefType(from.getType().cast().getEleTy())); - if (mayAlias(rhs, lhs)) { - /// Use the runtime to make a quick and dirty temp with the rhs value. - /// Overkill for scalar rhs that could be done in much more clever ways. - /// Note that temp descriptor must have the allocatable flag set so that - /// the runtime will allocate it with the shape and type parameters of - // the RHS. - mlir::Type fromBoxHeapType = fir::BoxType::get(fromHeapType); - auto fromMutableBox = builder.createTemporary(loc, fromBoxHeapType); - mlir::Value unallocatedBox = fir::factory::createUnallocatedBox( - builder, loc, fromBoxHeapType, {}); - builder.create(loc, unallocatedBox, fromMutableBox); - fir::runtime::genAssign(builder, loc, fromMutableBox, from); - cleanUpTemp = true; - from = builder.create(loc, fromMutableBox); - } + if (mayAlias(rhs, lhs)) + std::tie(from, cleanUpTemp) = genTempFromSourceBox(loc, builder, from); + auto toMutableBox = builder.createTemporary(loc, to.getType()); // As per 10.2.1.2 point 1 (1) polymorphic variables must be allocatable. // It is assumed here that they have been reallocated with the dynamic @@ -89,8 +107,7 @@ builder.create(loc, to, toMutableBox); fir::runtime::genAssign(builder, loc, toMutableBox, from); if (cleanUpTemp) { - mlir::Value addr = - builder.create(loc, fromHeapType, from); + mlir::Value addr = builder.create(loc, from); builder.create(loc, addr); } } else { @@ -109,6 +126,122 @@ } }; +class CopyInOpConversion : public mlir::OpRewritePattern { +public: + explicit CopyInOpConversion(mlir::MLIRContext *ctx) : OpRewritePattern{ctx} {} + + struct CopyInResult { + mlir::Value addr; + mlir::Value wasCopied; + }; + + static CopyInResult genNonOptionalCopyIn(mlir::Location loc, + fir::FirOpBuilder &builder, + hlfir::CopyInOp copyInOp) { + mlir::Value inputVariable = copyInOp.getVar(); + mlir::Type resultAddrType = copyInOp.getCopiedIn().getType(); + mlir::Value isContiguous = + fir::runtime::genIsContiguous(builder, loc, inputVariable); + mlir::Value addr = + builder + .genIfOp(loc, {resultAddrType}, isContiguous, + /*withElseRegion=*/true) + .genThen( + [&]() { builder.create(loc, inputVariable); }) + .genElse([&] { + // Create temporary on the heap. Note that the runtime is used and + // that is desired: since the data copy happens under a runtime + // check (for IsContiguous) the copy loops can hardly provide any + // value to optimizations, instead, the optimizer just wastes + // compilation time on these loops. + mlir::Value temp = + genAllocatableTempFromSourceBox(loc, builder, inputVariable); + // Get rid of allocatable flag in the fir.box. + temp = builder.create(loc, resultAddrType, temp, + /*shape=*/mlir::Value{}, + /*slice=*/mlir::Value{}); + builder.create(loc, temp); + }) + .getResults()[0]; + return {addr, builder.genNot(loc, isContiguous)}; + } + + static CopyInResult genOptionalCopyIn(mlir::Location loc, + fir::FirOpBuilder &builder, + hlfir::CopyInOp copyInOp) { + mlir::Type resultAddrType = copyInOp.getCopiedIn().getType(); + mlir::Value isPresent = copyInOp.getVarIsPresent(); + auto res = + builder + .genIfOp(loc, {resultAddrType, builder.getI1Type()}, isPresent, + /*withElseRegion=*/true) + .genThen([&]() { + CopyInResult res = genNonOptionalCopyIn(loc, builder, copyInOp); + builder.create( + loc, mlir::ValueRange{res.addr, res.wasCopied}); + }) + .genElse([&] { + mlir::Value absent = + builder.create(loc, resultAddrType); + builder.create( + loc, mlir::ValueRange{absent, isPresent}); + }) + .getResults(); + return {res[0], res[1]}; + } + + mlir::LogicalResult + matchAndRewrite(hlfir::CopyInOp copyInOp, + mlir::PatternRewriter &rewriter) const override { + mlir::Location loc = copyInOp.getLoc(); + auto module = copyInOp->getParentOfType(); + fir::FirOpBuilder builder(rewriter, fir::getKindMapping(module)); + CopyInResult result = copyInOp.getVarIsPresent() + ? genOptionalCopyIn(loc, builder, copyInOp) + : genNonOptionalCopyIn(loc, builder, copyInOp); + rewriter.replaceOp(copyInOp, {result.addr, result.wasCopied}); + return mlir::success(); + } +}; + +class CopyOutOpConversion : public mlir::OpRewritePattern { +public: + explicit CopyOutOpConversion(mlir::MLIRContext *ctx) + : OpRewritePattern{ctx} {} + + mlir::LogicalResult + matchAndRewrite(hlfir::CopyOutOp copyOutOp, + mlir::PatternRewriter &rewriter) const override { + mlir::Location loc = copyOutOp.getLoc(); + auto module = copyOutOp->getParentOfType(); + fir::FirOpBuilder builder(rewriter, fir::getKindMapping(module)); + + builder.genIfThen(loc, copyOutOp.getWasCopied()) + .genThen([&]() { + mlir::Value temp = copyOutOp.getTemp(); + if (mlir::Value var = copyOutOp.getVar()) { + auto mutableBox = builder.createTemporary(loc, var.getType()); + builder.create(loc, var, mutableBox); + // Generate Assign() call to copy data from the temporary + // to the variable. Note that in case the actual argument + // is ALLOCATABLE/POINTER the Assign() implementation + // should not engage its reallocation, because the temporary + // is rank, shape and type compatible with it (it was created + // from the variable). + fir::runtime::genAssign(builder, loc, mutableBox, temp); + } + mlir::Type heapType = + fir::HeapType::get(fir::dyn_cast_ptrOrBoxEleTy(temp.getType())); + mlir::Value tempAddr = + builder.create(loc, heapType, temp); + builder.create(loc, tempAddr); + }) + .end(); + rewriter.eraseOp(copyOutOp); + return mlir::success(); + } +}; + class DeclareOpConversion : public mlir::OpRewritePattern { public: explicit DeclareOpConversion(mlir::MLIRContext *ctx) @@ -390,9 +523,9 @@ auto module = this->getOperation(); auto *context = &getContext(); mlir::RewritePatternSet patterns(context); - patterns - .insert(context); + patterns.insert(context); mlir::ConversionTarget target(*context); target.addIllegalDialect(); target.markUnknownOpDynamicallyLegal( @@ -405,6 +538,7 @@ } } }; + } // namespace std::unique_ptr hlfir::createConvertHLFIRtoFIRPass() { diff --git a/flang/test/HLFIR/copy-in-out-codegen.fir b/flang/test/HLFIR/copy-in-out-codegen.fir new file mode 100644 --- /dev/null +++ b/flang/test/HLFIR/copy-in-out-codegen.fir @@ -0,0 +1,96 @@ +// Test hlfir.copy_in and hlfir.copy_out operation codegen + +// RUN: fir-opt %s -convert-hlfir-to-fir | FileCheck %s + +func.func @test_copy_in(%box: !fir.box>) { + %0:2 = hlfir.copy_in %box : (!fir.box>) -> (!fir.box>, i1) + return +} +// CHECK-LABEL: func.func @test_copy_in( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>) { +// CHECK: %[[VAL_1:.*]] = fir.alloca !fir.box>> +// CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_0]] : (!fir.box>) -> !fir.box +// CHECK: %[[VAL_3:.*]] = fir.call @_FortranAIsContiguous(%[[VAL_2]]) : (!fir.box) -> i1 +// CHECK: %[[VAL_4:.*]] = fir.if %[[VAL_3]] -> (!fir.box>) { +// CHECK: fir.result %[[VAL_0]] : !fir.box> +// CHECK: } else { +// CHECK: %[[VAL_5:.*]] = fir.zero_bits !fir.heap> +// CHECK: %[[VAL_6:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_7:.*]] = fir.shape %[[VAL_6]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_8:.*]] = fir.embox %[[VAL_5]](%[[VAL_7]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> +// CHECK: fir.store %[[VAL_8]] to %[[VAL_1]] : !fir.ref>>> +// CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_1]] : (!fir.ref>>>) -> !fir.ref> +// CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_0]] : (!fir.box>) -> !fir.box +// CHECK: %[[VAL_15:.*]] = fir.call @_FortranAAssign(%[[VAL_12]], %[[VAL_13]], +// CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_1]] : !fir.ref>>> +// CHECK: %[[VAL_17:.*]] = fir.rebox %[[VAL_16]] : (!fir.box>>) -> !fir.box> +// CHECK: fir.result %[[VAL_17]] : !fir.box> +// CHECK: } +// CHECK: %[[VAL_18:.*]] = arith.constant false +// CHECK: %[[VAL_19:.*]] = arith.cmpi eq, %[[VAL_3]], %[[VAL_18]] : i1 +// CHECK: return +// CHECK: } + +func.func @test_copy_in_optional(%box: !fir.box>, %is_present: i1) { + %0:2 = hlfir.copy_in %box handle_optional %is_present : (!fir.box>, i1) -> (!fir.box>, i1) + return +} +// CHECK-LABEL: func.func @test_copy_in_optional( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>, +// CHECK-SAME: %[[VAL_1:.*]]: i1) { +// CHECK: %[[VAL_2:.*]] = fir.alloca !fir.box>> +// CHECK: %[[VAL_3:.*]]:2 = fir.if %[[VAL_1]] -> (!fir.box>, i1) { +// CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_0]] : (!fir.box>) -> !fir.box +// CHECK: %[[VAL_5:.*]] = fir.call @_FortranAIsContiguous(%[[VAL_4]]) : (!fir.box) -> i1 +// CHECK: %[[VAL_6:.*]] = fir.if %[[VAL_5]] -> (!fir.box>) { +// CHECK: fir.result %[[VAL_0]] : !fir.box> +// CHECK: } else { +// CHECK: %[[VAL_7:.*]] = fir.zero_bits !fir.heap> +// CHECK: %[[VAL_8:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_9:.*]] = fir.shape %[[VAL_8]] : (index) -> !fir.shape<1> +// CHECK: %[[VAL_10:.*]] = fir.embox %[[VAL_7]](%[[VAL_9]]) : (!fir.heap>, !fir.shape<1>) -> !fir.box>> +// CHECK: fir.store %[[VAL_10]] to %[[VAL_2]] : !fir.ref>>> +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_2]] : (!fir.ref>>>) -> !fir.ref> +// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_0]] : (!fir.box>) -> !fir.box +// CHECK: %[[VAL_17:.*]] = fir.call @_FortranAAssign(%[[VAL_14]], %[[VAL_15]], +// CHECK: %[[VAL_18:.*]] = fir.load %[[VAL_2]] : !fir.ref>>> +// CHECK: %[[VAL_19:.*]] = fir.rebox %[[VAL_18]] : (!fir.box>>) -> !fir.box> +// CHECK: fir.result %[[VAL_19]] : !fir.box> +// CHECK: } +// CHECK: %[[VAL_20:.*]] = arith.constant false +// CHECK: %[[VAL_21:.*]] = arith.cmpi eq, %[[VAL_5]], %[[VAL_20]] : i1 +// CHECK: fir.result %[[VAL_22:.*]], %[[VAL_21]] : !fir.box>, i1 +// CHECK: } else { +// CHECK: %[[VAL_23:.*]] = fir.absent !fir.box> +// CHECK: fir.result %[[VAL_23]], %[[VAL_1]] : !fir.box>, i1 +// CHECK: } + +func.func @test_copy_out_no_copy_back(%temp: !fir.box>, %was_copied: i1) { + hlfir.copy_out %temp, %was_copied : (!fir.box>, i1) -> () + return +} +// CHECK-LABEL: func.func @test_copy_out_no_copy_back( +// CHECK-SAME: %[[VAL_0:.*]]: !fir.box>, +// CHECK-SAME: %[[VAL_1:.*]]: i1) { +// CHECK-NEXT: fir.if %[[VAL_1]] { +// CHECK-NEXT: %[[VAL_2:.*]] = fir.box_addr %[[VAL_0]] : (!fir.box>) -> !fir.heap> +// CHECK-NEXT: fir.freemem %[[VAL_2]] : !fir.heap> +// CHECK-NEXT: } + +func.func @test_copy_out_copy_back(%box: !fir.box>, %temp: !fir.box>, %was_copied: i1) { + hlfir.copy_out %temp, %was_copied to %box : (!fir.box>, i1, !fir.box>) -> () + return +} +// CHECK-LABEL: func.func @test_copy_out_copy_back( +// CHECK-SAME: %[[VAL_0:[^:]*]]: !fir.box>, +// CHECK-SAME: %[[VAL_1:.*]]: !fir.box>, +// CHECK-SAME: %[[VAL_2:.*]]: i1) { +// CHECK: %[[VAL_3:.*]] = fir.alloca !fir.box> +// CHECK: fir.if %[[VAL_2]] { +// CHECK: fir.store %[[VAL_0]] to %[[VAL_3]] : !fir.ref>> +// CHECK: %[[VAL_7:.*]] = fir.convert %[[VAL_3]] : (!fir.ref>>) -> !fir.ref> +// CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_1]] : (!fir.box>) -> !fir.box +// CHECK: %[[VAL_10:.*]] = fir.call @_FortranAAssign(%[[VAL_7]], %[[VAL_8]], +// CHECK: %[[VAL_11:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box>) -> !fir.heap> +// CHECK: fir.freemem %[[VAL_11]] : !fir.heap> +// CHECK: }