diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -87,7 +87,10 @@ const Fortran::lower::SomeExpr *lowerExpr; const Fortran::lower::SomeExpr *upperExpr; const Fortran::lower::SomeExpr *stepExpr; + const Fortran::lower::SomeExpr *maskExpr = nullptr; bool isUnordered; // do concurrent, forall + llvm::SmallVector localInitSymList; + llvm::SmallVector sharedSymList; mlir::Value loopVariable = nullptr; mlir::Value stepValue = nullptr; // possible uses in multiple blocks @@ -98,6 +101,7 @@ bool hasRealControl = false; mlir::Value tripVariable = nullptr; mlir::Block *headerBlock = nullptr; // loop entry and test block + mlir::Block *maskBlock = nullptr; // concurrent loop mask block mlir::Block *bodyBlock = nullptr; // first loop body block mlir::Block *exitBlock = nullptr; // loop exit target block }; @@ -636,9 +640,24 @@ } /// Generate the address of loop variable \p sym. + /// If \p sym is not mapped yet, allocate local storage for it. mlir::Value genLoopVariableAddress(mlir::Location loc, - const Fortran::semantics::Symbol &sym) { - assert(lookupSymbol(sym) && "loop control variable must already be in map"); + const Fortran::semantics::Symbol &sym, + bool isUnordered) { + if (isUnordered || sym.has() || + sym.has()) { + if (!shallowLookupSymbol(sym)) { + // Do concurrent loop variables are not mapped yet since they are local + // to the Do concurrent scope (same for OpenMP loops). + auto newVal = builder->createTemporary(loc, genType(sym), + toStringRef(sym.name())); + bindIfNewSymbol(sym, newVal); + return newVal; + } + } + auto entry = lookupSymbol(sym); + (void)entry; + assert(entry && "loop control variable must already be in map"); Fortran::lower::StatementContext stmtCtx; return fir::getBase( genExprAddr(Fortran::evaluate::AsGenericExpr(sym).value(), stmtCtx)); @@ -973,6 +992,34 @@ builder->create(loc, selectExpr, indexList, blockList); } + /// Collect DO CONCURRENT or FORALL loop control information. + IncrementLoopNestInfo getConcurrentControl( + const Fortran::parser::ConcurrentHeader &header, + const std::list &localityList = {}) { + IncrementLoopNestInfo incrementLoopNestInfo; + for (const Fortran::parser::ConcurrentControl &control : + std::get>(header.t)) + incrementLoopNestInfo.emplace_back( + *std::get<0>(control.t).symbol, std::get<1>(control.t), + std::get<2>(control.t), std::get<3>(control.t), /*isUnordered=*/true); + IncrementLoopInfo &info = incrementLoopNestInfo.back(); + info.maskExpr = Fortran::semantics::GetExpr( + std::get>(header.t)); + for (const Fortran::parser::LocalitySpec &x : localityList) { + if (const auto *localInitList = + std::get_if(&x.u)) + for (const Fortran::parser::Name &x : localInitList->v) + info.localInitSymList.push_back(x.symbol); + if (const auto *sharedList = + std::get_if(&x.u)) + for (const Fortran::parser::Name &x : sharedList->v) + info.sharedSymList.push_back(x.symbol); + if (std::get_if(&x.u)) + TODO(toLocation(), "do concurrent locality specs not implemented"); + } + return incrementLoopNestInfo; + } + /// Generate FIR for a DO construct. There are six variants: /// - unstructured infinite and while loops /// - structured and unstructured increment loops @@ -1029,7 +1076,34 @@ info.exitBlock = exitBlock; } } else { - TODO(toLocation(), "infinite/unstructured loop/concurrent loop"); + const auto *concurrent = + std::get_if( + &loopControl->u); + assert(concurrent && "invalid DO loop variant"); + incrementLoopNestInfo = getConcurrentControl( + std::get(concurrent->t), + std::get>(concurrent->t)); + if (unstructuredContext) { + maybeStartBlock(preheaderBlock); + for (IncrementLoopInfo &info : incrementLoopNestInfo) { + // The original loop body provides the body and latch blocks of the + // innermost dimension. The (first) body block of a non-innermost + // dimension is the preheader block of the immediately enclosed + // dimension. The latch block of a non-innermost dimension is the + // exit block of the immediately enclosed dimension. + auto createNextExitBlock = [&]() { + // Create unstructured loop exit blocks, outermost to innermost. + return exitBlock = insertBlock(exitBlock); + }; + bool isInnermost = &info == &incrementLoopNestInfo.back(); + bool isOutermost = &info == &incrementLoopNestInfo.front(); + info.headerBlock = isOutermost ? headerBlock : createNextBeginBlock(); + info.bodyBlock = isInnermost ? bodyBlock : createNextBeginBlock(); + info.exitBlock = isOutermost ? exitBlock : createNextExitBlock(); + if (info.maskExpr) + info.maskBlock = createNextBeginBlock(); + } + } } // Increment loop begin code. (Infinite/while code was already generated.) @@ -1065,8 +1139,29 @@ return builder->createRealConstant(loc, controlType, 1u); return builder->createIntegerConstant(loc, controlType, 1); // step }; + auto handleLocalitySpec = [&](IncrementLoopInfo &info) { + // Generate Local Init Assignments + for (const Fortran::semantics::Symbol *sym : info.localInitSymList) { + const auto *hostDetails = + sym->detailsIf(); + assert(hostDetails && "missing local_init variable host variable"); + const Fortran::semantics::Symbol &hostSym = hostDetails->symbol(); + (void)hostSym; + TODO(loc, "do concurrent locality specs not implemented"); + // assign sym = hostSym + } + // Handle shared locality spec + for (const Fortran::semantics::Symbol *sym : info.sharedSymList) { + const auto *hostDetails = + sym->detailsIf(); + assert(hostDetails && "missing shared variable host variable"); + const Fortran::semantics::Symbol &hostSym = hostDetails->symbol(); + copySymbolBinding(hostSym, *sym); + } + }; for (IncrementLoopInfo &info : incrementLoopNestInfo) { - info.loopVariable = genLoopVariableAddress(loc, info.loopVariableSym); + info.loopVariable = + genLoopVariableAddress(loc, info.loopVariableSym, info.isUnordered); mlir::Value lowerValue = genControlValue(info.lowerExpr, info); mlir::Value upperValue = genControlValue(info.upperExpr, info); info.stepValue = genControlValue(info.stepExpr, info); @@ -1081,8 +1176,17 @@ mlir::Value value = builder->createConvert( loc, info.getLoopVariableType(), info.doLoop.getInductionVar()); builder->create(loc, value, info.loopVariable); - // TODO: Mask expr - // TODO: handle Locality Spec + if (info.maskExpr) { + Fortran::lower::StatementContext stmtCtx; + mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx); + stmtCtx.finalize(); + mlir::Value maskCondCast = + builder->createConvert(loc, builder->getI1Type(), maskCond); + auto ifOp = builder->create(loc, maskCondCast, + /*withElseRegion=*/false); + builder->setInsertionPointToStart(&ifOp.getThenRegion().front()); + } + handleLocalitySpec(info); continue; } @@ -1119,16 +1223,33 @@ builder->create(loc, lowerValue, info.loopVariable); // Unstructured loop header - generate loop condition and mask. + // Note - Currently there is no way to tag a loop as a concurrent loop. startBlock(info.headerBlock); tripCount = builder->create(loc, info.tripVariable); mlir::Value zero = builder->createIntegerConstant(loc, tripCount.getType(), 0); auto cond = builder->create( loc, mlir::arith::CmpIPredicate::sgt, tripCount, zero); - // TODO: mask expression - genFIRConditionalBranch(cond, info.bodyBlock, info.exitBlock); - if (&info != &incrementLoopNestInfo.back()) // not innermost - startBlock(info.bodyBlock); // preheader block of enclosed dimension + if (info.maskExpr) { + genFIRConditionalBranch(cond, info.maskBlock, info.exitBlock); + startBlock(info.maskBlock); + mlir::Block *latchBlock = getEval().getLastNestedEvaluation().block; + assert(latchBlock && "missing masked concurrent loop latch block"); + Fortran::lower::StatementContext stmtCtx; + mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx); + stmtCtx.finalize(); + genFIRConditionalBranch(maskCond, info.bodyBlock, latchBlock); + } else { + genFIRConditionalBranch(cond, info.bodyBlock, info.exitBlock); + if (&info != &incrementLoopNestInfo.back()) // not innermost + startBlock(info.bodyBlock); // preheader block of enclosed dimension + } + if (!info.localInitSymList.empty()) { + mlir::OpBuilder::InsertPoint insertPt = builder->saveInsertionPoint(); + builder->setInsertionPointToStart(info.bodyBlock); + handleLocalitySpec(info); + builder->restoreInsertionPoint(insertPt); + } } } diff --git a/flang/test/Lower/OpenMP/omp-unstructured.f90 b/flang/test/Lower/OpenMP/omp-unstructured.f90 --- a/flang/test/Lower/OpenMP/omp-unstructured.f90 +++ b/flang/test/Lower/OpenMP/omp-unstructured.f90 @@ -59,8 +59,8 @@ end ! CHECK-LABEL: func @_QPss3{{.*}} { -! CHECK: %[[ALLOCA_K:.*]] = fir.alloca i32 {bindc_name = "k", {{.*}}} ! CHECK: omp.parallel { +! CHECK: %[[ALLOCA_K:.*]] = fir.alloca i32 {bindc_name = "k", pinned} ! CHECK: %[[ALLOCA_1:.*]] = fir.alloca i32 {{{.*}}, pinned} ! CHECK: %[[ALLOCA_2:.*]] = fir.alloca i32 {{{.*}}, pinned} ! CHECK: br ^bb1 diff --git a/flang/test/Lower/loops.f90 b/flang/test/Lower/loops.f90 new file mode 100644 --- /dev/null +++ b/flang/test/Lower/loops.f90 @@ -0,0 +1,109 @@ +! RUN: bbc -emit-fir -o - %s | FileCheck %s + +! CHECK-LABEL: loop_test +subroutine loop_test + ! CHECK: %[[VAL_2:.*]] = fir.alloca i16 {bindc_name = "i"} + ! CHECK: %[[VAL_3:.*]] = fir.alloca i16 {bindc_name = "i"} + ! CHECK: %[[VAL_4:.*]] = fir.alloca i16 {bindc_name = "i"} + ! CHECK: %[[VAL_5:.*]] = fir.alloca i8 {bindc_name = "k"} + ! CHECK: %[[VAL_6:.*]] = fir.alloca i8 {bindc_name = "j"} + ! CHECK: %[[VAL_7:.*]] = fir.alloca i8 {bindc_name = "i"} + ! CHECK: %[[VAL_8:.*]] = fir.alloca i32 {bindc_name = "k"} + ! CHECK: %[[VAL_9:.*]] = fir.alloca i32 {bindc_name = "j"} + ! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i"} + ! CHECK: %[[VAL_11:.*]] = fir.alloca !fir.array<5x5x5xi32> {bindc_name = "a", uniq_name = "_QFloop_testEa"} + ! CHECK: %[[VAL_12:.*]] = fir.alloca i32 {bindc_name = "asum", uniq_name = "_QFloop_testEasum"} + ! CHECK: %[[VAL_13:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFloop_testEi"} + ! CHECK: %[[VAL_14:.*]] = fir.alloca i32 {bindc_name = "j", uniq_name = "_QFloop_testEj"} + ! CHECK: %[[VAL_15:.*]] = fir.alloca i32 {bindc_name = "k", uniq_name = "_QFloop_testEk"} + ! CHECK: %[[VAL_16:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QFloop_testEx"} + ! CHECK: %[[VAL_17:.*]] = fir.alloca i32 {bindc_name = "xsum", uniq_name = "_QFloop_testExsum"} + + integer(4) :: a(5,5,5), i, j, k, asum, xsum + + i = 100 + j = 200 + k = 300 + + ! CHECK-COUNT-3: fir.do_loop {{.*}} unordered + do concurrent (i=1:5, j=1:5, k=1:5) ! shared(a) + ! CHECK: fir.coordinate_of + a(i,j,k) = 0 + enddo + ! CHECK: fir.call @_FortranAioBeginExternalListOutput + print*, 'A:', i, j, k + + ! CHECK-COUNT-3: fir.do_loop {{.*}} unordered + ! CHECK: fir.if + do concurrent (integer(1)::i=1:5, j=1:5, k=1:5, i.ne.j .and. k.ne.3) shared(a) + ! CHECK-COUNT-2: fir.coordinate_of + a(i,j,k) = a(i,j,k) + 1 + enddo + + ! CHECK-COUNT-3: fir.do_loop {{[^un]*}} -> index + asum = 0 + do i=1,5 + do j=1,5 + do k=1,5 + ! CHECK: fir.coordinate_of + asum = asum + a(i,j,k) + enddo + enddo + enddo + ! CHECK: fir.call @_FortranAioBeginExternalListOutput + print*, 'B:', i, j, k, '-', asum + + ! CHECK: fir.do_loop {{.*}} unordered + ! CHECK-COUNT-2: fir.if + do concurrent (integer(2)::i=1:5, i.ne.3) + if (i.eq.2 .or. i.eq.4) goto 5 ! fir.if + ! CHECK: fir.call @_FortranAioBeginExternalListOutput + print*, 'C:', i + 5 continue + enddo + + ! CHECK: fir.do_loop {{.*}} unordered + ! CHECK-COUNT-2: fir.if + do concurrent (integer(2)::i=1:5, i.ne.3) + if (i.eq.2 .or. i.eq.4) then ! fir.if + goto 6 + endif + ! CHECK: fir.call @_FortranAioBeginExternalListOutput + print*, 'D:', i + 6 continue + enddo + + ! CHECK-NOT: fir.do_loop + ! CHECK-NOT: fir.if + do concurrent (integer(2)::i=1:5, i.ne.3) + goto (7, 7) i+1 + ! CHECK: fir.call @_FortranAioBeginExternalListOutput + print*, 'E:', i + 7 continue + enddo + + xsum = 0.0 + ! CHECK-NOT: fir.do_loop + do x = 1.5, 3.5, 0.3 + xsum = xsum + 1 + enddo + ! CHECK: fir.call @_FortranAioBeginExternalFormattedOutput + print '(" F:",X,F3.1,A,I2)', x, ' -', xsum +end subroutine loop_test + +! CHECK-LABEL: print_nothing +subroutine print_nothing(k1, k2) + if (k1 > 0) then + ! CHECK: br [[header:\^bb[0-9]+]] + ! CHECK: [[header]] + do while (k1 > k2) + print*, k1, k2 ! no output + k2 = k2 + 1 + ! CHECK: br [[header]] + end do + end if +end + + call loop_test + call print_nothing(2, 2) +end