diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -108,7 +108,7 @@ llvm::ArrayRef argTy; Op op = builder.create(loc, argTy, operands); builder.createBlock(&op.getRegion()); - auto &block = op.getRegion().back(); + mlir::Block &block = op.getRegion().back(); builder.setInsertionPointToStart(&block); builder.create(loc); @@ -204,161 +204,166 @@ } } -static void genACC(Fortran::lower::AbstractConverter &converter, - Fortran::lower::pft::Evaluation &eval, - const Fortran::parser::OpenACCLoopConstruct &loopConstruct) { +static mlir::acc::LoopOp +createLoopOp(Fortran::lower::AbstractConverter &converter, + const Fortran::parser::AccClauseList &accClauseList) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + mlir::Location currentLocation = converter.getCurrentLocation(); Fortran::lower::StatementContext stmtCtx; - const auto &beginLoopDirective = - std::get(loopConstruct.t); - const auto &loopDirective = - std::get(beginLoopDirective.t); - if (loopDirective.v == llvm::acc::ACCD_loop) { - auto &firOpBuilder = converter.getFirOpBuilder(); - auto currentLocation = converter.getCurrentLocation(); - - // Add attribute extracted from clauses. - const auto &accClauseList = - std::get(beginLoopDirective.t); + mlir::Value workerNum; + mlir::Value vectorNum; + mlir::Value gangNum; + mlir::Value gangStatic; + llvm::SmallVector tileOperands, privateOperands, + reductionOperands; + std::int64_t executionMapping = mlir::acc::OpenACCExecMapping::NONE; - mlir::Value workerNum; - mlir::Value vectorLength; - mlir::Value gangNum; - mlir::Value gangStatic; - llvm::SmallVector tileOperands, privateOperands, - reductionOperands; - std::int64_t executionMapping = mlir::acc::OpenACCExecMapping::NONE; - - // Lower clauses values mapped to operands. - for (const auto &clause : accClauseList.v) { - if (const auto *gangClause = - std::get_if(&clause.u)) { - if (gangClause->v) { - const Fortran::parser::AccGangArgument &x = *gangClause->v; - if (const auto &gangNumValue = - std::get>( - x.t)) { - gangNum = fir::getBase(converter.genExprValue( - *Fortran::semantics::GetExpr(gangNumValue.value()), stmtCtx)); - } - if (const auto &gangStaticValue = - std::get>(x.t)) { - const auto &expr = - std::get>( - gangStaticValue.value().t); - if (expr) { - gangStatic = fir::getBase(converter.genExprValue( - *Fortran::semantics::GetExpr(*expr), stmtCtx)); - } else { - // * was passed as value and will be represented as a -1 constant - // integer. - gangStatic = firOpBuilder.createIntegerConstant( - currentLocation, firOpBuilder.getIntegerType(32), - /* STAR */ -1); - } - } - } - executionMapping |= mlir::acc::OpenACCExecMapping::GANG; - } else if (const auto *workerClause = - std::get_if( - &clause.u)) { - if (workerClause->v) { - workerNum = fir::getBase(converter.genExprValue( - *Fortran::semantics::GetExpr(*workerClause->v), stmtCtx)); - } - executionMapping |= mlir::acc::OpenACCExecMapping::WORKER; - } else if (const auto *vectorClause = - std::get_if( - &clause.u)) { - if (vectorClause->v) { - vectorLength = fir::getBase(converter.genExprValue( - *Fortran::semantics::GetExpr(*vectorClause->v), stmtCtx)); + for (const Fortran::parser::AccClause &clause : accClauseList.v) { + if (const auto *gangClause = + std::get_if(&clause.u)) { + if (gangClause->v) { + const Fortran::parser::AccGangArgument &x = *gangClause->v; + if (const auto &gangNumValue = + std::get>(x.t)) { + gangNum = fir::getBase(converter.genExprValue( + *Fortran::semantics::GetExpr(gangNumValue.value()), stmtCtx)); } - executionMapping |= mlir::acc::OpenACCExecMapping::VECTOR; - } else if (const auto *tileClause = - std::get_if(&clause.u)) { - const Fortran::parser::AccTileExprList &accTileExprList = tileClause->v; - for (const auto &accTileExpr : accTileExprList.v) { + if (const auto &gangStaticValue = + std::get>(x.t)) { const auto &expr = - std::get>( - accTileExpr.t); + std::get>( + gangStaticValue.value().t); if (expr) { - tileOperands.push_back(fir::getBase(converter.genExprValue( - *Fortran::semantics::GetExpr(*expr), stmtCtx))); + gangStatic = fir::getBase(converter.genExprValue( + *Fortran::semantics::GetExpr(*expr), stmtCtx)); } else { - // * was passed as value and will be represented as a -1 constant - // integer. - mlir::Value tileStar = firOpBuilder.createIntegerConstant( - currentLocation, firOpBuilder.getIntegerType(32), - /* STAR */ -1); - tileOperands.push_back(tileStar); + // * was passed as value and will be represented as a special + // constant. + gangStatic = firOpBuilder.createIntegerConstant( + currentLocation, firOpBuilder.getIndexType(), starCst); } } - } else if (const auto *privateClause = - std::get_if( - &clause.u)) { - genObjectList(privateClause->v, converter, privateOperands); } - // Reduction clause is left out for the moment as the clause will probably - // end up having its own operation. + executionMapping |= mlir::acc::OpenACCExecMapping::GANG; + } else if (const auto *workerClause = + std::get_if(&clause.u)) { + if (workerClause->v) { + workerNum = fir::getBase(converter.genExprValue( + *Fortran::semantics::GetExpr(*workerClause->v), stmtCtx)); + } + executionMapping |= mlir::acc::OpenACCExecMapping::WORKER; + } else if (const auto *vectorClause = + std::get_if(&clause.u)) { + if (vectorClause->v) { + vectorNum = fir::getBase(converter.genExprValue( + *Fortran::semantics::GetExpr(*vectorClause->v), stmtCtx)); + } + executionMapping |= mlir::acc::OpenACCExecMapping::VECTOR; + } else if (const auto *tileClause = + std::get_if(&clause.u)) { + const Fortran::parser::AccTileExprList &accTileExprList = tileClause->v; + for (const auto &accTileExpr : accTileExprList.v) { + const auto &expr = + std::get>( + accTileExpr.t); + if (expr) { + tileOperands.push_back(fir::getBase(converter.genExprValue( + *Fortran::semantics::GetExpr(*expr), stmtCtx))); + } else { + // * was passed as value and will be represented as a -1 constant + // integer. + mlir::Value tileStar = firOpBuilder.createIntegerConstant( + currentLocation, firOpBuilder.getIntegerType(32), + /* STAR */ -1); + tileOperands.push_back(tileStar); + } + } + } else if (const auto *privateClause = + std::get_if( + &clause.u)) { + genObjectList(privateClause->v, converter, privateOperands); } + // Reduction clause is left out for the moment as the clause will probably + // end up having its own operation. + } - // Prepare the operand segement size attribute and the operands value range. - llvm::SmallVector operands; - llvm::SmallVector operandSegments; - addOperand(operands, operandSegments, gangNum); - addOperand(operands, operandSegments, gangStatic); - addOperand(operands, operandSegments, workerNum); - addOperand(operands, operandSegments, vectorLength); - addOperands(operands, operandSegments, tileOperands); - addOperands(operands, operandSegments, privateOperands); - addOperands(operands, operandSegments, reductionOperands); - - auto loopOp = createRegionOp( - firOpBuilder, currentLocation, operands, operandSegments); - - loopOp->setAttr(mlir::acc::LoopOp::getExecutionMappingAttrName(), - firOpBuilder.getI64IntegerAttr(executionMapping)); - - // Lower clauses mapped to attributes - for (const auto &clause : accClauseList.v) { - if (const auto *collapseClause = - std::get_if(&clause.u)) { - const auto *expr = Fortran::semantics::GetExpr(collapseClause->v); - const auto collapseValue = Fortran::evaluate::ToInt64(*expr); - if (collapseValue) { - loopOp->setAttr(mlir::acc::LoopOp::getCollapseAttrName(), - firOpBuilder.getI64IntegerAttr(*collapseValue)); - } - } else if (std::get_if(&clause.u)) { - loopOp->setAttr(mlir::acc::LoopOp::getSeqAttrName(), - firOpBuilder.getUnitAttr()); - } else if (std::get_if( - &clause.u)) { - loopOp->setAttr(mlir::acc::LoopOp::getIndependentAttrName(), - firOpBuilder.getUnitAttr()); - } else if (std::get_if(&clause.u)) { - loopOp->setAttr(mlir::acc::LoopOp::getAutoAttrName(), - firOpBuilder.getUnitAttr()); + // Prepare the operand segement size attribute and the operands value range. + llvm::SmallVector operands; + llvm::SmallVector operandSegments; + addOperand(operands, operandSegments, gangNum); + addOperand(operands, operandSegments, gangStatic); + addOperand(operands, operandSegments, workerNum); + addOperand(operands, operandSegments, vectorNum); + addOperands(operands, operandSegments, tileOperands); + addOperands(operands, operandSegments, privateOperands); + addOperands(operands, operandSegments, reductionOperands); + + auto loopOp = createRegionOp( + firOpBuilder, currentLocation, operands, operandSegments); + + loopOp->setAttr(mlir::acc::LoopOp::getExecutionMappingAttrName(), + firOpBuilder.getI64IntegerAttr(executionMapping)); + + // Lower clauses mapped to attributes + for (const Fortran::parser::AccClause &clause : accClauseList.v) { + if (const auto *collapseClause = + std::get_if(&clause.u)) { + const auto *expr = Fortran::semantics::GetExpr(collapseClause->v); + const std::optional collapseValue = + Fortran::evaluate::ToInt64(*expr); + if (collapseValue) { + loopOp->setAttr(mlir::acc::LoopOp::getCollapseAttrName(), + firOpBuilder.getI64IntegerAttr(*collapseValue)); } + } else if (std::get_if(&clause.u)) { + loopOp->setAttr(mlir::acc::LoopOp::getSeqAttrName(), + firOpBuilder.getUnitAttr()); + } else if (std::get_if( + &clause.u)) { + loopOp->setAttr(mlir::acc::LoopOp::getIndependentAttrName(), + firOpBuilder.getUnitAttr()); + } else if (std::get_if(&clause.u)) { + loopOp->setAttr(mlir::acc::LoopOp::getAutoAttrName(), + firOpBuilder.getUnitAttr()); } } + return loopOp; } -static void -genACCParallelOp(Fortran::lower::AbstractConverter &converter, +static void genACC(Fortran::lower::AbstractConverter &converter, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenACCLoopConstruct &loopConstruct) { + + const auto &beginLoopDirective = + std::get(loopConstruct.t); + const auto &loopDirective = + std::get(beginLoopDirective.t); + + if (loopDirective.v == llvm::acc::ACCD_loop) { + const auto &accClauseList = + std::get(beginLoopDirective.t); + createLoopOp(converter, accClauseList); + } +} + +static mlir::acc::ParallelOp +createParallelOp(Fortran::lower::AbstractConverter &converter, const Fortran::parser::AccClauseList &accClauseList) { + + // Parallel operation operands mlir::Value async; mlir::Value numGangs; mlir::Value numWorkers; mlir::Value vectorLength; mlir::Value ifCond; mlir::Value selfCond; + mlir::Value waitDevnum; llvm::SmallVector waitOperands, reductionOperands, copyOperands, copyinOperands, copyinReadonlyOperands, copyoutOperands, copyoutZeroOperands, createOperands, createZeroOperands, noCreateOperands, - presentOperands, devicePtrOperands, attachOperands, privateOperands, - firstprivateOperands; + presentOperands, devicePtrOperands, attachOperands, firstprivateOperands, + privateOperands; // Async, wait and self clause have optional values but can be present with // no value as well. When there is no value, the op has an attribute to @@ -367,38 +372,21 @@ bool addWaitAttr = false; bool addSelfAttr = false; - auto &firOpBuilder = converter.getFirOpBuilder(); - auto currentLocation = converter.getCurrentLocation(); + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + mlir::Location currentLocation = converter.getCurrentLocation(); Fortran::lower::StatementContext stmtCtx; // Lower clauses values mapped to operands. // Keep track of each group of operands separatly as clauses can appear // more than once. - for (const auto &clause : accClauseList.v) { + for (const Fortran::parser::AccClause &clause : accClauseList.v) { if (const auto *asyncClause = std::get_if(&clause.u)) { - const auto &asyncClauseValue = asyncClause->v; - if (asyncClauseValue) { // async has a value. - async = fir::getBase(converter.genExprValue( - *Fortran::semantics::GetExpr(*asyncClauseValue), stmtCtx)); - } else { - addAsyncAttr = true; - } + genAsyncClause(converter, asyncClause, async, addAsyncAttr, stmtCtx); } else if (const auto *waitClause = std::get_if(&clause.u)) { - const auto &waitClauseValue = waitClause->v; - if (waitClauseValue) { // wait has a value. - const Fortran::parser::AccWaitArgument &waitArg = *waitClauseValue; - const auto &waitList = - std::get>(waitArg.t); - for (const Fortran::parser::ScalarIntExpr &value : waitList) { - auto v = fir::getBase(converter.genExprValue( - *Fortran::semantics::GetExpr(value), stmtCtx)); - waitOperands.push_back(v); - } - } else { - addWaitAttr = true; - } + genWaitClause(converter, waitClause, waitOperands, waitDevnum, + addWaitAttr, stmtCtx); } else if (const auto *numGangsClause = std::get_if( &clause.u)) { @@ -416,10 +404,7 @@ *Fortran::semantics::GetExpr(vectorLengthClause->v), stmtCtx)); } else if (const auto *ifClause = std::get_if(&clause.u)) { - mlir::Value cond = fir::getBase(converter.genExprValue( - *Fortran::semantics::GetExpr(ifClause->v), stmtCtx)); - ifCond = firOpBuilder.createConvert(currentLocation, - firOpBuilder.getI1Type(), cond); + genIfClause(converter, ifClause, ifCond, stmtCtx); } else if (const auto *selfClause = std::get_if(&clause.u)) { const Fortran::parser::AccSelfClause &accSelfClause = selfClause->v; @@ -434,6 +419,21 @@ } else { addSelfAttr = true; } + } else if (const auto *accClauseList = + std::get_if( + &accSelfClause.u)) { + // TODO This would be nicer to be done in canonicalization step. + if (accClauseList->v.size() == 1) { + const auto &accObject = accClauseList->v.front(); + if (const auto *designator = + std::get_if(&accObject.u)) { + if (const auto *name = getDesignatorNameIfDataRef(*designator)) { + auto cond = converter.getSymbolAddress(*name->symbol); + selfCond = firOpBuilder.createConvert( + currentLocation, firOpBuilder.getI1Type(), cond); + } + } + } } } else if (const auto *copyClause = std::get_if(&clause.u)) { @@ -508,8 +508,9 @@ addOperands(operands, operandSegments, privateOperands); addOperands(operands, operandSegments, firstprivateOperands); - auto parallelOp = createRegionOp( - firOpBuilder, currentLocation, operands, operandSegments); + mlir::acc::ParallelOp parallelOp = + createRegionOp( + firOpBuilder, currentLocation, operands, operandSegments); if (addAsyncAttr) parallelOp->setAttr(mlir::acc::ParallelOp::getAsyncAttrName(), @@ -520,6 +521,14 @@ if (addSelfAttr) parallelOp->setAttr(mlir::acc::ParallelOp::getSelfAttrName(), firOpBuilder.getUnitAttr()); + + return parallelOp; +} + +static void +genACCParallelOp(Fortran::lower::AbstractConverter &converter, + const Fortran::parser::AccClauseList &accClauseList) { + createParallelOp(converter, accClauseList); } static void genACCDataOp(Fortran::lower::AbstractConverter &converter, @@ -619,6 +628,37 @@ } } +static void +genACCParallelLoopOps(Fortran::lower::AbstractConverter &converter, + const Fortran::parser::AccClauseList &accClauseList) { + createParallelOp(converter, accClauseList); + createLoopOp(converter, accClauseList); +} + +static void +genACC(Fortran::lower::AbstractConverter &converter, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenACCCombinedConstruct &combinedConstruct) { + const auto &beginCombinedDirective = + std::get(combinedConstruct.t); + const auto &combinedDirective = + std::get(beginCombinedDirective.t); + const auto &accClauseList = + std::get(beginCombinedDirective.t); + + if (combinedDirective.v == llvm::acc::ACCD_kernels_loop) { + TODO(converter.getCurrentLocation(), + "OpenACC Kernels Loop construct not lowered yet!"); + } else if (combinedDirective.v == llvm::acc::ACCD_parallel_loop) { + genACCParallelLoopOps(converter, accClauseList); + } else if (combinedDirective.v == llvm::acc::ACCD_serial_loop) { + TODO(converter.getCurrentLocation(), + "OpenACC Serial Loop construct not lowered yet!"); + } else { + llvm::report_fatal_error("Unknown combined construct encountered"); + } +} + static void genACCEnterDataOp(Fortran::lower::AbstractConverter &converter, const Fortran::parser::AccClauseList &accClauseList) { @@ -979,8 +1019,7 @@ }, [&](const Fortran::parser::OpenACCCombinedConstruct &combinedConstruct) { - TODO(converter.getCurrentLocation(), - "OpenACC Combined construct not lowered yet!"); + genACC(converter, eval, combinedConstruct); }, [&](const Fortran::parser::OpenACCLoopConstruct &loopConstruct) { genACC(converter, eval, loopConstruct); diff --git a/flang/test/Lower/OpenACC/acc-loop.f90 b/flang/test/Lower/OpenACC/acc-loop.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/OpenACC/acc-loop.f90 @@ -0,0 +1,268 @@ +! This test checks lowering of OpenACC loop directive. + +! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s + +program acc_loop + + integer :: i, j + integer, parameter :: n = 10 + real, dimension(n) :: a, b + real, dimension(n, n) :: c, d + integer :: gangNum = 8 + integer :: gangStatic = 8 + integer :: vectorLength = 128 + integer, parameter :: tileSize = 2 + + + !$acc loop + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc loop seq + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: } attributes {seq} + + !$acc loop auto + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: } attributes {auto} + + !$acc loop independent + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: } attributes {independent} + + !$acc loop gang + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.loop gang { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc loop gang(num: 8) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: [[GANGNUM1:%.*]] = arith.constant 8 : i32 +!CHECK-NEXT: acc.loop gang(num=[[GANGNUM1]]: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc loop gang(num: gangNum) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: [[GANGNUM2:%.*]] = fir.load %{{.*}} : !fir.ref +!CHECK-NEXT: acc.loop gang(num=[[GANGNUM2]]: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc loop gang(num: gangNum, static: gangStatic) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.loop gang(num=%{{.*}}: i32, static=%{{.*}}: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc loop vector + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.loop vector { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc loop vector(128) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: [[CONSTANT128:%.*]] = arith.constant 128 : i32 +!CHECK: acc.loop vector([[CONSTANT128]]: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc loop vector(vectorLength) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: [[VECTORLENGTH:%.*]] = fir.load %{{.*}} : !fir.ref +!CHECK: acc.loop vector([[VECTORLENGTH]]: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + +!$acc loop worker + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.loop worker { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc loop worker(128) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: [[WORKER128:%.*]] = arith.constant 128 : i32 +!CHECK: acc.loop worker([[WORKER128]]: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc loop private(c) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.loop private(%{{.*}}: !fir.ref>) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc loop private(c, d) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.loop private(%{{.*}}: !fir.ref>, %{{.*}}: !fir.ref>) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc loop private(c) private(d) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.loop private(%{{.*}}: !fir.ref>, %{{.*}}: !fir.ref>) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc loop tile(2) + DO i = 1, n + a(i) = b(i) + END DO +!CHECK: [[TILESIZE:%.*]] = arith.constant 2 : i32 +!CHECK: acc.loop tile([[TILESIZE]]: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc loop tile(*) + DO i = 1, n + a(i) = b(i) + END DO +!CHECK: [[TILESIZEM1:%.*]] = arith.constant -1 : i32 +!CHECK: acc.loop tile([[TILESIZEM1]]: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc loop tile(2, 2) + DO i = 1, n + DO j = 1, n + c(i, j) = d(i, j) + END DO + END DO + +!CHECK: [[TILESIZE1:%.*]] = arith.constant 2 : i32 +!CHECK: [[TILESIZE2:%.*]] = arith.constant 2 : i32 +!CHECK: acc.loop tile([[TILESIZE1]]: i32, [[TILESIZE2]]: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc loop tile(tileSize) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.loop tile(%{{.*}}: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc loop tile(tileSize, tileSize) + DO i = 1, n + DO j = 1, n + c(i, j) = d(i, j) + END DO + END DO + +!CHECK: acc.loop tile(%{{.*}}: i32, %{{.*}}: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc loop collapse(2) + DO i = 1, n + DO j = 1, n + c(i, j) = d(i, j) + END DO + END DO + +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: } attributes {collapse = 2 : i64} + + !$acc loop + DO i = 1, n + !$acc loop + DO j = 1, n + c(i, j) = d(i, j) + END DO + END DO + +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + +end program diff --git a/flang/test/Lower/OpenACC/acc-parallel-loop.f90 b/flang/test/Lower/OpenACC/acc-parallel-loop.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/OpenACC/acc-parallel-loop.f90 @@ -0,0 +1,697 @@ +! This test checks lowering of OpenACC parallel loop combined directive. + +! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s + +subroutine acc_parallel_loop + integer :: i, j + + integer :: async = 1 + integer :: wait1 = 1 + integer :: wait2 = 2 + integer :: numGangs = 1 + integer :: numWorkers = 10 + integer :: vectorLength = 128 + logical :: ifCondition = .TRUE. + integer, parameter :: n = 10 + real, dimension(n) :: a, b, c + real, dimension(n, n) :: d, e + real, pointer :: f, g + + integer :: gangNum = 8 + integer :: gangStatic = 8 + integer :: vectorNum = 128 + integer, parameter :: tileSize = 2 + +!CHECK: [[A:%.*]] = fir.alloca !fir.array<10xf32> {{{.*}}uniq_name = "{{.*}}Ea"} +!CHECK: [[B:%.*]] = fir.alloca !fir.array<10xf32> {{{.*}}uniq_name = "{{.*}}Eb"} +!CHECK: [[C:%.*]] = fir.alloca !fir.array<10xf32> {{{.*}}uniq_name = "{{.*}}Ec"} +!CHECK: [[F:%.*]] = fir.alloca !fir.box> {bindc_name = "f", uniq_name = "{{.*}}Ef"} +!CHECK: [[G:%.*]] = fir.alloca !fir.box> {bindc_name = "g", uniq_name = "{{.*}}Eg"} +!CHECK: [[IFCONDITION:%.*]] = fir.address_of(@{{.*}}ifcondition) : !fir.ref> + + !$acc parallel loop + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop async + DO i = 1, n + a(i) = b(i) + END DO + !$acc end parallel loop + +!CHECK: acc.parallel { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: } attributes {asyncAttr} + + !$acc parallel loop async(1) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: [[ASYNC1:%.*]] = arith.constant 1 : i32 +!CHECK: acc.parallel async([[ASYNC1]]: i32) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop async(async) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: [[ASYNC2:%.*]] = fir.load %{{.*}} : !fir.ref +!CHECK: acc.parallel async([[ASYNC2]]: i32) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop wait + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: } attributes {waitAttr} + + !$acc parallel loop wait(1) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: [[WAIT1:%.*]] = arith.constant 1 : i32 +!CHECK: acc.parallel wait([[WAIT1]]: i32) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop wait(1, 2) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: [[WAIT2:%.*]] = arith.constant 1 : i32 +!CHECK: [[WAIT3:%.*]] = arith.constant 2 : i32 +!CHECK: acc.parallel wait([[WAIT2]]: i32, [[WAIT3]]: i32) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop wait(wait1, wait2) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: [[WAIT4:%.*]] = fir.load %{{.*}} : !fir.ref +!CHECK: [[WAIT5:%.*]] = fir.load %{{.*}} : !fir.ref +!CHECK: acc.parallel wait([[WAIT4]]: i32, [[WAIT5]]: i32) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop num_gangs(1) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: [[NUMGANGS1:%.*]] = arith.constant 1 : i32 +!CHECK: acc.parallel num_gangs([[NUMGANGS1]]: i32) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop num_gangs(numGangs) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: [[NUMGANGS2:%.*]] = fir.load %{{.*}} : !fir.ref +!CHECK: acc.parallel num_gangs([[NUMGANGS2]]: i32) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop num_workers(10) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: [[NUMWORKERS1:%.*]] = arith.constant 10 : i32 +!CHECK: acc.parallel num_workers([[NUMWORKERS1]]: i32) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop num_workers(numWorkers) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: [[NUMWORKERS2:%.*]] = fir.load %{{.*}} : !fir.ref +!CHECK: acc.parallel num_workers([[NUMWORKERS2]]: i32) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop vector_length(128) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: [[VECTORLENGTH1:%.*]] = arith.constant 128 : i32 +!CHECK: acc.parallel vector_length([[VECTORLENGTH1]]: i32) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop vector_length(vectorLength) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: [[VECTORLENGTH2:%.*]] = fir.load %{{.*}} : !fir.ref +!CHECK: acc.parallel vector_length([[VECTORLENGTH2]]: i32) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop if(.TRUE.) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: [[IF1:%.*]] = arith.constant true +!CHECK: acc.parallel if([[IF1]]) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop if(ifCondition) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: [[IFCOND:%.*]] = fir.load %{{.*}} : !fir.ref> +!CHECK: [[IF2:%.*]] = fir.convert [[IFCOND]] : (!fir.logical<4>) -> i1 +!CHECK: acc.parallel if([[IF2]]) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop self(.TRUE.) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: [[SELF1:%.*]] = arith.constant true +!CHECK: acc.parallel self([[SELF1]]) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop self + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: } attributes {selfAttr} + + !$acc parallel loop self(ifCondition) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: [[SELF2:%.*]] = fir.convert [[IFCONDITION]] : (!fir.ref>) -> i1 +!CHECK: acc.parallel self([[SELF2]]) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop copy(a, b) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel copy([[A]]: !fir.ref>, [[B]]: !fir.ref>) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop copy(a) copy(b) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel copy([[A]]: !fir.ref>, [[B]]: !fir.ref>) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop copyin(a) copyin(readonly: b) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel copyin([[A]]: !fir.ref>) copyin_readonly([[B]]: !fir.ref>) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop copyout(a) copyout(zero: b) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel copyout([[A]]: !fir.ref>) copyout_zero([[B]]: !fir.ref>) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop create(b) create(zero: a) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel create([[B]]: !fir.ref>) create_zero([[A]]: !fir.ref>) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop no_create(a, b) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel no_create([[A]]: !fir.ref>, [[B]]: !fir.ref>) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop present(a, b) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel present([[A]]: !fir.ref>, [[B]]: !fir.ref>) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop deviceptr(a) deviceptr(b) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel deviceptr([[A]]: !fir.ref>, [[B]]: !fir.ref>) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop attach(f, g) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel attach([[F]]: !fir.ref>>, [[G]]: !fir.ref>>) { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop private(a) firstprivate(b) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel private([[A]]: !fir.ref>) firstprivate([[B]]: !fir.ref>) { +!CHECK: acc.loop private([[A]]: !fir.ref>) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop seq + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: } attributes {seq} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop auto + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: } attributes {auto} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop independent + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: } attributes {independent} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop gang + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel { +!CHECK: acc.loop gang { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop gang(num: 8) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel { +!CHECK: [[GANGNUM1:%.*]] = arith.constant 8 : i32 +!CHECK-NEXT: acc.loop gang(num=[[GANGNUM1]]: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop gang(num: gangNum) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel { +!CHECK: [[GANGNUM2:%.*]] = fir.load %{{.*}} : !fir.ref +!CHECK-NEXT: acc.loop gang(num=[[GANGNUM2]]: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop gang(num: gangNum, static: gangStatic) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel { +!CHECK: acc.loop gang(num=%{{.*}}: i32, static=%{{.*}}: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop vector + DO i = 1, n + a(i) = b(i) + END DO +!CHECK: acc.parallel { +!CHECK: acc.loop vector { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop vector(128) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel { +!CHECK: [[CONSTANT128:%.*]] = arith.constant 128 : i32 +!CHECK: acc.loop vector([[CONSTANT128]]: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop vector(vectorLength) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel { +!CHECK: [[VECTORLENGTH:%.*]] = fir.load %{{.*}} : !fir.ref +!CHECK: acc.loop vector([[VECTORLENGTH]]: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop worker + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel { +!CHECK: acc.loop worker { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop worker(128) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel { +!CHECK: [[WORKER128:%.*]] = arith.constant 128 : i32 +!CHECK: acc.loop worker([[WORKER128]]: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop collapse(2) + DO i = 1, n + DO j = 1, n + d(i, j) = e(i, j) + END DO + END DO + +!CHECK: acc.parallel { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: } attributes {collapse = 2 : i64} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop + DO i = 1, n + !$acc loop + DO j = 1, n + d(i, j) = e(i, j) + END DO + END DO + +!CHECK: acc.parallel { +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.loop { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop tile(2) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel { +!CHECK: [[TILESIZE:%.*]] = arith.constant 2 : i32 +!CHECK: acc.loop tile([[TILESIZE]]: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop tile(*) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel { +!CHECK: [[TILESIZEM1:%.*]] = arith.constant -1 : i32 +!CHECK: acc.loop tile([[TILESIZEM1]]: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop tile(2, 2) + DO i = 1, n + DO j = 1, n + d(i, j) = e(i, j) + END DO + END DO + +!CHECK: acc.parallel { +!CHECK: [[TILESIZE1:%.*]] = arith.constant 2 : i32 +!CHECK: [[TILESIZE2:%.*]] = arith.constant 2 : i32 +!CHECK: acc.loop tile([[TILESIZE1]]: i32, [[TILESIZE2]]: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop tile(tileSize) + DO i = 1, n + a(i) = b(i) + END DO + +!CHECK: acc.parallel { +!CHECK: acc.loop tile(%{{.*}}: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel loop tile(tileSize, tileSize) + DO i = 1, n + DO j = 1, n + d(i, j) = e(i, j) + END DO + END DO + +!CHECK: acc.parallel { +!CHECK: acc.loop tile(%{{.*}}: i32, %{{.*}}: i32) { +!CHECK: fir.do_loop +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + +end subroutine acc_parallel_loop diff --git a/flang/test/Lower/OpenACC/acc-parallel.f90 b/flang/test/Lower/OpenACC/acc-parallel.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/OpenACC/acc-parallel.f90 @@ -0,0 +1,246 @@ +! This test checks lowering of OpenACC parallel directive. + +! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s + +subroutine acc_parallel + integer :: i, j + + integer :: async = 1 + integer :: wait1 = 1 + integer :: wait2 = 2 + integer :: numGangs = 1 + integer :: numWorkers = 10 + integer :: vectorLength = 128 + logical :: ifCondition = .TRUE. + real, dimension(10, 10) :: a, b, c + real, pointer :: d, e + +!CHECK: [[A:%.*]] = fir.alloca !fir.array<10x10xf32> {{{.*}}uniq_name = "{{.*}}Ea"} +!CHECK: [[B:%.*]] = fir.alloca !fir.array<10x10xf32> {{{.*}}uniq_name = "{{.*}}Eb"} +!CHECK: [[C:%.*]] = fir.alloca !fir.array<10x10xf32> {{{.*}}uniq_name = "{{.*}}Ec"} +!CHECK: [[D:%.*]] = fir.alloca !fir.box> {bindc_name = "d", uniq_name = "{{.*}}Ed"} +!CHECK: [[E:%.*]] = fir.alloca !fir.box> {bindc_name = "e", uniq_name = "{{.*}}Ee"} +!CHECK: [[IFCONDITION:%.*]] = fir.address_of(@{{.*}}ifcondition) : !fir.ref> + + !$acc parallel + !$acc end parallel + +!CHECK: acc.parallel { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel async + !$acc end parallel + +!CHECK: acc.parallel { +!CHECK: acc.yield +!CHECK-NEXT: } attributes {asyncAttr} + + !$acc parallel async(1) + !$acc end parallel + +!CHECK: [[ASYNC1:%.*]] = arith.constant 1 : i32 +!CHECK: acc.parallel async([[ASYNC1]]: i32) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel async(async) + !$acc end parallel + +!CHECK: [[ASYNC2:%.*]] = fir.load %{{.*}} : !fir.ref +!CHECK: acc.parallel async([[ASYNC2]]: i32) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel wait + !$acc end parallel + +!CHECK: acc.parallel { +!CHECK: acc.yield +!CHECK-NEXT: } attributes {waitAttr} + + !$acc parallel wait(1) + !$acc end parallel + +!CHECK: [[WAIT1:%.*]] = arith.constant 1 : i32 +!CHECK: acc.parallel wait([[WAIT1]]: i32) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel wait(1, 2) + !$acc end parallel + +!CHECK: [[WAIT2:%.*]] = arith.constant 1 : i32 +!CHECK: [[WAIT3:%.*]] = arith.constant 2 : i32 +!CHECK: acc.parallel wait([[WAIT2]]: i32, [[WAIT3]]: i32) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel wait(wait1, wait2) + !$acc end parallel + +!CHECK: [[WAIT4:%.*]] = fir.load %{{.*}} : !fir.ref +!CHECK: [[WAIT5:%.*]] = fir.load %{{.*}} : !fir.ref +!CHECK: acc.parallel wait([[WAIT4]]: i32, [[WAIT5]]: i32) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel num_gangs(1) + !$acc end parallel + +!CHECK: [[NUMGANGS1:%.*]] = arith.constant 1 : i32 +!CHECK: acc.parallel num_gangs([[NUMGANGS1]]: i32) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel num_gangs(numGangs) + !$acc end parallel + +!CHECK: [[NUMGANGS2:%.*]] = fir.load %{{.*}} : !fir.ref +!CHECK: acc.parallel num_gangs([[NUMGANGS2]]: i32) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel num_workers(10) + !$acc end parallel + +!CHECK: [[NUMWORKERS1:%.*]] = arith.constant 10 : i32 +!CHECK: acc.parallel num_workers([[NUMWORKERS1]]: i32) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel num_workers(numWorkers) + !$acc end parallel + +!CHECK: [[NUMWORKERS2:%.*]] = fir.load %{{.*}} : !fir.ref +!CHECK: acc.parallel num_workers([[NUMWORKERS2]]: i32) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel vector_length(128) + !$acc end parallel + +!CHECK: [[VECTORLENGTH1:%.*]] = arith.constant 128 : i32 +!CHECK: acc.parallel vector_length([[VECTORLENGTH1]]: i32) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel vector_length(vectorLength) + !$acc end parallel + +!CHECK: [[VECTORLENGTH2:%.*]] = fir.load %{{.*}} : !fir.ref +!CHECK: acc.parallel vector_length([[VECTORLENGTH2]]: i32) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel if(.TRUE.) + !$acc end parallel + +!CHECK: [[IF1:%.*]] = arith.constant true +!CHECK: acc.parallel if([[IF1]]) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel if(ifCondition) + !$acc end parallel + +!CHECK: [[IFCOND:%.*]] = fir.load %{{.*}} : !fir.ref> +!CHECK: [[IF2:%.*]] = fir.convert [[IFCOND]] : (!fir.logical<4>) -> i1 +!CHECK: acc.parallel if([[IF2]]) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel self(.TRUE.) + !$acc end parallel + +!CHECK: [[SELF1:%.*]] = arith.constant true +!CHECK: acc.parallel self([[SELF1]]) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel self + !$acc end parallel + +!CHECK: acc.parallel { +!CHECK: acc.yield +!CHECK-NEXT: } attributes {selfAttr} + + !$acc parallel self(ifCondition) + !$acc end parallel + +!CHECK: [[SELF2:%.*]] = fir.convert [[IFCONDITION]] : (!fir.ref>) -> i1 +!CHECK: acc.parallel self([[SELF2]]) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel copy(a, b, c) + !$acc end parallel + +!CHECK: acc.parallel copy([[A]]: !fir.ref>, [[B]]: !fir.ref>, [[C]]: !fir.ref>) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel copy(a) copy(b) copy(c) + !$acc end parallel + +!CHECK: acc.parallel copy([[A]]: !fir.ref>, [[B]]: !fir.ref>, [[C]]: !fir.ref>) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel copyin(a) copyin(readonly: b, c) + !$acc end parallel + +!CHECK: acc.parallel copyin([[A]]: !fir.ref>) copyin_readonly([[B]]: !fir.ref>, [[C]]: !fir.ref>) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel copyout(a) copyout(zero: b) copyout(c) + !$acc end parallel + +!CHECK: acc.parallel copyout([[A]]: !fir.ref>, [[C]]: !fir.ref>) copyout_zero([[B]]: !fir.ref>) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel create(a, b) create(zero: c) + !$acc end parallel + +!CHECK: acc.parallel create([[A]]: !fir.ref>, [[B]]: !fir.ref>) create_zero([[C]]: !fir.ref>) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel no_create(a, b) create(zero: c) + !$acc end parallel + +!CHECK: acc.parallel create_zero([[C]]: !fir.ref>) no_create([[A]]: !fir.ref>, [[B]]: !fir.ref>) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel present(a, b, c) + !$acc end parallel + +!CHECK: acc.parallel present([[A]]: !fir.ref>, [[B]]: !fir.ref>, [[C]]: !fir.ref>) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel deviceptr(a) deviceptr(c) + !$acc end parallel + +!CHECK: acc.parallel deviceptr([[A]]: !fir.ref>, [[C]]: !fir.ref>) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel attach(d, e) + !$acc end parallel + +!CHECK: acc.parallel attach([[D]]: !fir.ref>>, [[E]]: !fir.ref>>) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + + !$acc parallel private(a) firstprivate(b) private(c) + !$acc end parallel + +!CHECK: acc.parallel private([[A]]: !fir.ref>, [[C]]: !fir.ref>) firstprivate([[B]]: !fir.ref>) { +!CHECK: acc.yield +!CHECK-NEXT: }{{$}} + +end subroutine acc_parallel