diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -539,7 +539,7 @@ fir::getIntIfConstant(boundsOp.getExtent())) { shape.push_back(*fir::getIntIfConstant(boundsOp.getExtent())); } else { - return ty; // TODO: handle dynamic shaped array slice. + shape.push_back(fir::SequenceType::getUnknownExtent()); } } if (shape.empty() || shape.size() != bounds.size()) @@ -680,24 +680,32 @@ llvm_unreachable("OpenACC reduction unsupported type"); } +/// Check if the DataBoundsOp is a constant bound (lb and ub are constants or +/// extent is a constant). +bool isConstantBound(mlir::acc::DataBoundsOp &op) { + if (op.getLowerbound() && fir::getIntIfConstant(op.getLowerbound()) && + op.getUpperbound() && fir::getIntIfConstant(op.getUpperbound())) + return true; + if (op.getExtent() && fir::getIntIfConstant(op.getExtent())) + return true; + return false; +} + /// Determine if the bounds represent a dynamic shape. bool hasDynamicShape(llvm::SmallVector &bounds) { if (bounds.empty()) return false; for (auto b : bounds) { auto op = mlir::dyn_cast(b.getDefiningOp()); - if (((op.getLowerbound() && !fir::getIntIfConstant(op.getLowerbound())) || - (op.getUpperbound() && !fir::getIntIfConstant(op.getUpperbound()))) && - op.getExtent() && !fir::getIntIfConstant(op.getExtent())) + if (!isConstantBound(op)) return true; } return false; } -static mlir::Value -genReductionInitValue(fir::FirOpBuilder &builder, mlir::Location loc, - mlir::Type ty, mlir::acc::ReductionOperator op, - llvm::SmallVector &bounds) { +static mlir::Value genScalarReductionValue(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Type ty, + mlir::acc::ReductionOperator op) { if (op == mlir::acc::ReductionOperator::AccLand || op == mlir::acc::ReductionOperator::AccLor || op == mlir::acc::ReductionOperator::AccEqv || @@ -736,27 +744,74 @@ return fir::factory::Complex{builder, loc}.createComplex( cmplxTy.getFKind(), realInit, imagInit); } + llvm::report_fatal_error("Unsupported OpenACC reduction type"); +} + +static mlir::Value genReductionInitValue(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Type ty, + mlir::acc::ReductionOperator op, + llvm::SmallVector &bounds) { if (auto refTy = mlir::dyn_cast(ty)) { if (auto seqTy = mlir::dyn_cast(refTy.getEleTy())) { - mlir::Type vecTy = - mlir::VectorType::get(seqTy.getShape(), seqTy.getEleTy()); - auto shTy = vecTy.cast(); - if (seqTy.getEleTy().isIntOrIndex()) - return builder.create( - loc, vecTy, - mlir::DenseElementsAttr::get( - shTy, - getReductionInitValue(op, seqTy.getEleTy()))); - if (mlir::isa(seqTy.getEleTy())) - return builder.create( - loc, vecTy, - mlir::DenseElementsAttr::get( - shTy, - getReductionInitValue(op, seqTy.getEleTy()))); + if (seqTy.hasDynamicExtents()) { + mlir::Block* block = builder.getBlock(); + assert(seqTy.getDimension() == (block->getNumArguments() - 1) && + "expect dynamic extents passed as block arguments"); + llvm::SmallVector extents; + for (unsigned i = 1; i < block->getNumArguments(); ++i) + extents.push_back(block->getArgument(i)); + mlir::Value temp = builder.createTemporary( + loc, seqTy, ".reduction_init", extents, {}, {}); + mlir::Value shapeOp = builder.create(loc, extents); + mlir::Value arrLoad = builder.create( + loc, seqTy, temp, shapeOp, /*slice=*/mlir::Value{}, std::nullopt); + + mlir::Type idxTy = builder.getIndexType(); + llvm::SmallVector loops; + llvm::SmallVector ivs; + mlir::Value innerArg; + for (auto ext : llvm::reverse(extents)) { + mlir::Value lb = builder.createIntegerConstant(loc, idxTy, 0); + mlir::Value c1 = builder.createIntegerConstant(loc, idxTy, 1); + mlir::Value ub = builder.create(loc, ext, c1); + auto loop = builder.create(loc, lb, ub, c1, + /*unordered=*/false, + /*finalCount=*/false, + mlir::ValueRange{arrLoad}); + builder.setInsertionPointToStart(loop.getBody()); + loops.push_back(loop); + ivs.push_back(loop.getInductionVar()); + innerArg = loop.getRegionIterArgs().front(); + } + mlir::Value initValue = + genScalarReductionValue(builder, loc, seqTy.getEleTy(), op); + mlir::Value arrUp = builder.create(loc, seqTy, + innerArg, initValue, ivs, mlir::ValueRange()); + builder.create(loc, arrUp); + builder.setInsertionPointAfter(loops[0]); + builder.create(loc, arrLoad, loops[0].getResult(0), temp, mlir::Value(), mlir::ValueRange()); + return temp; + } else { + mlir::Type vecTy = + mlir::VectorType::get(seqTy.getShape(), seqTy.getEleTy()); + auto shTy = vecTy.cast(); + if (seqTy.getEleTy().isIntOrIndex()) + return builder.create( + loc, vecTy, + mlir::DenseElementsAttr::get( + shTy, + getReductionInitValue(op, seqTy.getEleTy()))); + if (mlir::isa(seqTy.getEleTy())) + return builder.create( + loc, vecTy, + mlir::DenseElementsAttr::get( + shTy, + getReductionInitValue(op, seqTy.getEleTy()))); + } } } - llvm::report_fatal_error("Unsupported OpenACC reduction type"); + return genScalarReductionValue(builder, loc, ty, op); } template @@ -789,35 +844,68 @@ // Handle combiner on arrays. if (auto refTy = mlir::dyn_cast(ty)) { if (auto seqTy = mlir::dyn_cast(refTy.getEleTy())) { - if (seqTy.hasDynamicExtents()) - TODO(loc, "OpenACC reduction on array with dynamic extents"); - mlir::Type idxTy = builder.getIndexType(); - mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); - - llvm::SmallVector loops; - llvm::SmallVector ivs; - for (auto ext : llvm::reverse(seqTy.getShape())) { - auto lb = builder.create( - loc, idxTy, builder.getIntegerAttr(idxTy, 0)); - auto ub = builder.create( - loc, idxTy, builder.getIntegerAttr(idxTy, ext - 1)); - auto step = builder.create( - loc, idxTy, builder.getIntegerAttr(idxTy, 1)); - auto loop = builder.create(loc, lb, ub, step, - /*unordered=*/false); - builder.setInsertionPointToStart(loop.getBody()); - loops.push_back(loop); - ivs.push_back(loop.getInductionVar()); + if (seqTy.hasDynamicExtents()) { + mlir::Block* block = builder.getBlock(); + assert(seqTy.getDimension() == (block->getNumArguments() - 2) && + "expect dynamic extents passed as block arguments"); + llvm::SmallVector extents; + for (unsigned i = 2; i < block->getNumArguments(); ++i) + extents.push_back(block->getArgument(i)); + mlir::Type idxTy = builder.getIndexType(); + mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); + + llvm::SmallVector loops; + llvm::SmallVector ivs; + for (auto ext : llvm::reverse(extents)) { + auto lb = builder.create( + loc, idxTy, builder.getIntegerAttr(idxTy, 0)); + auto c1 = builder.createIntegerConstant(loc, idxTy, 1); + auto ub = builder.create(loc, ext, c1); + auto loop = builder.create(loc, lb, ub, c1, + /*unordered=*/false); + builder.setInsertionPointToStart(loop.getBody()); + loops.push_back(loop); + ivs.push_back(loop.getInductionVar()); + } + auto addr1 = builder.create(loc, refTy, value1, ivs); + auto addr2 = builder.create(loc, refTy, value2, ivs); + auto load1 = builder.create(loc, addr1); + auto load2 = builder.create(loc, addr2); + auto combined = + genCombiner(builder, loc, op, seqTy.getEleTy(), load1, load2); + builder.create(loc, combined, addr1); + builder.setInsertionPointAfter(loops[0]); + + return value1; + } else { + mlir::Type idxTy = builder.getIndexType(); + mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); + + llvm::SmallVector loops; + llvm::SmallVector ivs; + for (auto ext : llvm::reverse(seqTy.getShape())) { + auto lb = builder.create( + loc, idxTy, builder.getIntegerAttr(idxTy, 0)); + auto ub = builder.create( + loc, idxTy, builder.getIntegerAttr(idxTy, ext - 1)); + auto step = builder.create( + loc, idxTy, builder.getIntegerAttr(idxTy, 1)); + auto loop = builder.create(loc, lb, ub, step, + /*unordered=*/false); + builder.setInsertionPointToStart(loop.getBody()); + loops.push_back(loop); + ivs.push_back(loop.getInductionVar()); + } + auto addr1 = builder.create(loc, refTy, value1, ivs); + auto addr2 = builder.create(loc, refTy, value2, ivs); + auto load1 = builder.create(loc, addr1); + auto load2 = builder.create(loc, addr2); + auto combined = + genCombiner(builder, loc, op, seqTy.getEleTy(), load1, load2); + builder.create(loc, combined, addr1); + builder.setInsertionPointAfter(loops[0]); + return value1; } - auto addr1 = builder.create(loc, refTy, value1, ivs); - auto addr2 = builder.create(loc, refTy, value2, ivs); - auto load1 = builder.create(loc, addr1); - auto load2 = builder.create(loc, addr2); - auto combined = - genCombiner(builder, loc, op, seqTy.getEleTy(), load1, load2); - builder.create(loc, combined, addr1); - builder.setInsertionPointAfter(loops[0]); - return value1; } } @@ -887,14 +975,27 @@ mlir::OpBuilder modBuilder(mod.getBodyRegion()); auto recipe = modBuilder.create(loc, recipeName, ty, op); + + llvm::SmallVector argTypes; + llvm::SmallVector argLocs; + argTypes.push_back(ty); + argLocs.push_back(loc); + if (hasDynamicShape(bounds)) { + for (unsigned i; i < bounds.size(); ++i) { + argTypes.push_back(builder.getIndexType()); + argLocs.push_back(loc); + } + } builder.createBlock(&recipe.getInitRegion(), recipe.getInitRegion().end(), - {ty}, {loc}); + argTypes, argLocs); builder.setInsertionPointToEnd(&recipe.getInitRegion().back()); mlir::Value initValue = genReductionInitValue(builder, loc, ty, op, bounds); builder.create(loc, initValue); + argTypes.insert(argTypes.begin(), ty); + argLocs.insert(argLocs.begin(), loc); builder.createBlock(&recipe.getCombinerRegion(), - recipe.getCombinerRegion().end(), {ty, ty}, {loc, loc}); + recipe.getCombinerRegion().end(), argTypes, argLocs); builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back()); mlir::Value v1 = recipe.getCombinerRegion().front().getArgument(0); mlir::Value v2 = recipe.getCombinerRegion().front().getArgument(1); @@ -924,9 +1025,6 @@ converter, builder, semanticsContext, stmtCtx, accObject, operandLocation, asFortran, bounds); - if (hasDynamicShape(bounds)) - TODO(operandLocation, "OpenACC reductions with dynamic shaped array"); - mlir::Type reductionTy = fir::unwrapRefType(baseAddr.getType()); if (auto seqTy = mlir::dyn_cast(reductionTy)) reductionTy = seqTy.getEleTy(); diff --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90 --- a/flang/test/Lower/OpenACC/acc-reduction.f90 +++ b/flang/test/Lower/OpenACC/acc-reduction.f90 @@ -2,6 +2,37 @@ ! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s +! CHECK-LABEL: acc.reduction.recipe @"reduction_add_ref_?xi32" : !fir.ref> reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref>, %[[EXT:.*]]: index): +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array, %arg1 {bindc_name = ".reduction_init"} +! CHECK: %[[SHAPE:.*]] = fir.shape %[[EXT]] : (index) -> !fir.shape<1> +! CHECK: %[[ARR_LOAD:.*]] = fir.array_load %[[ALLOCA]](%[[SHAPE]]) : (!fir.ref>, !fir.shape<1>) -> !fir.array +! CHECK: %[[C0:.*]] = arith.constant 0 : index +! CHECK: %[[C1:.*]] = arith.constant 1 : index +! CHECK: %[[UB:.*]] = arith.subi %[[EXT]], %[[C1]] : index +! CHECK: %[[DO_RES:.*]] = fir.do_loop %[[IV:.*]] = %[[C0]] to %[[UB]] step %[[C1]] iter_args(%[[ARGS:.*]] = %[[ARR_LOAD]]) -> (!fir.array) { +! CHECK: %[[VALUE:.*]] = arith.constant 0 : i32 +! CHECK: %[[ARR_UP:.*]] = fir.array_update %[[ARGS]], %[[VALUE]], %[[IV]] : (!fir.array, i32, index) -> !fir.array +! CHECK: fir.result %[[ARR_UP]] : !fir.array +! CHECK: } +! CHECK: fir.array_merge_store %[[ARR_LOAD]], %[[DO_RES]] to %[[ALLOCA]] : !fir.array, !fir.array, !fir.ref> +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref> +! CHECK: } combiner { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>, %[[EXT:.*]]: index): +! CHECK: %[[C0:.*]] = arith.constant 0 : index +! CHECK: %[[C1:.*]] = arith.constant 1 : index +! CHECK: %[[UB:.*]] = arith.subi %[[EXT]], %[[C1]] : index +! CHECK: fir.do_loop %[[IV:.*]] = %[[C0]] to %[[UB]] step %[[C1]] { +! CHECK: %[[COORD0:.*]] = fir.coordinate_of %[[ARG0]], %[[IV]] : (!fir.ref>, index) -> !fir.ref +! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG1]], %[[IV]] : (!fir.ref>, index) -> !fir.ref +! CHECK: %[[LOAD0:.*]] = fir.load %[[COORD0]] : !fir.ref +! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref +! CHECK: %[[COMBINED:.*]] = arith.addi %[[LOAD0]], %[[LOAD1]] : i32 +! CHECK: fir.store %[[COMBINED]] to %[[COORD0]] : !fir.ref +! CHECK: } +! CHECK: acc.yield %[[ARG0]] : !fir.ref> +! CHECK: } + ! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_10xi32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%{{.*}}: !fir.ref>): ! CHECK: %[[CST:.*]] = arith.constant dense<0> : vector<10xi32> @@ -839,3 +870,10 @@ ! CHECK: %[[BOUND:.*]] = acc.bounds lowerbound(%[[LB]] : index) upperbound(%[[UB]] : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index) ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[ARG0]] : !fir.ref>) bounds(%[[BOUND]]) -> !fir.ref> {name = "a(11:20)"} ! CHECK: acc.parallel reduction(@reduction_add_ref_10xi32 -> %[[RED]] : !fir.ref>) + +subroutine acc_reduction_add_dynamic_slice(a, n, m) + integer :: a(100) + integer :: n, m + !$acc parallel reduction(+:a(n:m)) + !$acc end parallel +end subroutine