diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -906,6 +906,7 @@ mlir::Value scheduleChunkClauseOperand, ifClauseOperand; mlir::Attribute scheduleClauseOperand, noWaitClauseOperand, orderedClauseOperand, orderClauseOperand; + mlir::IntegerAttr simdlenClauseOperand; SmallVector reductionDeclSymbols; Fortran::lower::StatementContext stmtCtx; const auto &loopOpClauseList = std::get( @@ -1017,6 +1018,13 @@ TODO(currentLocation, "Reduction of intrinsic procedures is not supported"); } + } else if (const auto &simdlenClause = + std::get_if( + &clause.u)) { + const auto *expr = Fortran::semantics::GetExpr(simdlenClause->v); + const std::optional simdlenVal = + Fortran::evaluate::ToInt64(*expr); + simdlenClauseOperand = firOpBuilder.getI64IntegerAttr(*simdlenVal); } } @@ -1038,7 +1046,8 @@ TypeRange resultType; auto SimdLoopOp = firOpBuilder.create( currentLocation, resultType, lowerBound, upperBound, step, - ifClauseOperand, /*inclusive=*/firOpBuilder.getUnitAttr()); + ifClauseOperand, simdlenClauseOperand, + /*inclusive=*/firOpBuilder.getUnitAttr()); createBodyOfOp(SimdLoopOp, converter, currentLocation, eval, &loopOpClauseList, iv); return; diff --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90 --- a/flang/test/Lower/OpenMP/simd.f90 +++ b/flang/test/Lower/OpenMP/simd.f90 @@ -36,3 +36,56 @@ end do !$OMP END SIMD end subroutine + +!CHECK-LABEL: func @_QPsimdloop_with_simdlen_clause +subroutine simdloop_with_simdlen_clause(n, threshold) +integer :: i, n, threshold + !$OMP SIMD SIMDLEN(2) + ! CHECK: %[[LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[UB:.*]] = fir.load %arg0 + ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 + ! CHECK: omp.simdloop simdlen(2) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { + do i = 1, n + ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref + ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) : (!fir.ref, i32) -> i1 + print*, i + end do + !$OMP END SIMD +end subroutine + +!CHECK-LABEL: func @_QPsimdloop_with_simdlen_clause_from_param +subroutine simdloop_with_simdlen_clause_from_param(n, threshold) +integer :: i, n, threshold +integer, parameter :: simdlen = 2; + !$OMP SIMD SIMDLEN(simdlen) + ! CHECK: %[[LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[UB:.*]] = fir.load %arg0 + ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 + ! CHECK: omp.simdloop simdlen(2) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { + do i = 1, n + ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref + ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) : (!fir.ref, i32) -> i1 + print*, i + end do + !$OMP END SIMD +end subroutine + +!CHECK-LABEL: func @_QPsimdloop_with_simdlen_clause_from_expr_from_param +subroutine simdloop_with_simdlen_clause_from_expr_from_param(n, threshold) +integer :: i, n, threshold +integer, parameter :: simdlen = 2; + !$OMP SIMD SIMDLEN(simdlen*2 + 2) + ! CHECK: %[[LB:.*]] = arith.constant 1 : i32 + ! CHECK: %[[UB:.*]] = fir.load %arg0 + ! CHECK: %[[STEP:.*]] = arith.constant 1 : i32 + ! CHECK: omp.simdloop simdlen(6) for (%[[I:.*]]) : i32 = (%[[LB]]) to (%[[UB]]) inclusive step (%[[STEP]]) { + do i = 1, n + ! CHECK: fir.store %[[I]] to %[[LOCAL:.*]] : !fir.ref + ! CHECK: %[[LD:.*]] = fir.load %[[LOCAL]] : !fir.ref + ! CHECK: fir.call @_FortranAioOutputInteger32({{.*}}, %[[LD]]) : (!fir.ref, i32) -> i1 + print*, i + end do + !$OMP END SIMD +end subroutine diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -415,6 +415,9 @@ When an if clause is present and evaluates to false, the preferred number of iterations to be executed concurrently is one, regardless of whether a simdlen clause is speciļ¬ed. + + When a simdlen clause is present, the preferred number of iterations to be + executed concurrently is the value provided to the simdlen clause. ``` omp.simdloop for (%i1, %i2) : index = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) { @@ -429,12 +432,14 @@ Variadic:$upperBound, Variadic:$step, Optional:$if_expr, + Confined, [IntPositive]>:$simdlen, UnitAttr:$inclusive ); let regions = (region AnyRegion:$region); let assemblyFormat = [{ oilist(`if` `(` $if_expr `)` + |`simdlen` `(` $simdlen `)` ) `for` custom($region, $lowerBound, $upperBound, $step, type($step), $inclusive) attr-dict }]; diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -971,7 +971,11 @@ llvm::CanonicalLoopInfo *loopInfo = ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {}); - ompBuilder->applySimd(loopInfo, nullptr); + llvm::ConstantInt *simdlen = nullptr; + if (llvm::Optional simdlenVar = loop.simdlen()) + simdlen = builder.getInt64(simdlenVar.getValue()); + + ompBuilder->applySimd(loopInfo, simdlen); builder.restoreIP(afterIP); return success(); diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -205,6 +205,16 @@ // ----- +func.func @omp_simdloop_pretty_simdlen(%lb : index, %ub : index, %step : index) -> () { + // expected-error @below {{op attribute 'simdlen' failed to satisfy constraint: 64-bit signless integer attribute whose value is positive}} + omp.simdloop simdlen(0) for (%iv): index = (%lb) to (%ub) step (%step) { + omp.yield + } + return +} + +// ----- + // expected-error @below {{op expects initializer region with one argument of the reduction type}} omp.reduction.declare @add_f32 : f64 init { diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -357,6 +357,15 @@ return } +// CHECK-LABEL: omp_simdloop_pretty_simdlen +func.func @omp_simdloop_pretty_simdlen(%lb : index, %ub : index, %step : index) -> () { + // CHECK: omp.simdloop simdlen(2) for (%{{.*}}) : index = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) + omp.simdloop simdlen(2) for (%iv): index = (%lb) to (%ub) step (%step) { + omp.yield + } + return +} + // CHECK-LABEL: omp_simdloop_pretty_multiple func.func @omp_simdloop_pretty_multiple(%lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> () { // CHECK: omp.simdloop for (%{{.*}}, %{{.*}}) : index = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -728,6 +728,28 @@ // ----- +// CHECK-LABEL: @simdloop_simple_multiple_simdlen +llvm.func @simdloop_simple_multiple_simdlen(%lb1 : i64, %ub1 : i64, %step1 : i64, %lb2 : i64, %ub2 : i64, %step2 : i64, %arg0: !llvm.ptr, %arg1: !llvm.ptr) { + omp.simdloop simdlen(2) for (%iv1, %iv2) : i64 = (%lb1, %lb2) to (%ub1, %ub2) step (%step1, %step2) { + %3 = llvm.mlir.constant(2.000000e+00 : f32) : f32 + // The form of the emitted IR is controlled by OpenMPIRBuilder and + // tested there. Just check that the right metadata is added. + // CHECK: llvm.access.group + // CHECK-NEXT: llvm.access.group + %4 = llvm.getelementptr %arg0[%iv1] : (!llvm.ptr, i64) -> !llvm.ptr + %5 = llvm.getelementptr %arg1[%iv2] : (!llvm.ptr, i64) -> !llvm.ptr + llvm.store %3, %4 : !llvm.ptr + llvm.store %3, %5 : !llvm.ptr + omp.yield + } + llvm.return +} +// CHECK: llvm.loop.parallel_accesses +// CHECK-NEXT: llvm.loop.vectorize.enable +// CHECK-NEXT: llvm.loop.vectorize.width{{.*}}i64 2 + +// ----- + llvm.func @body(i64) llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () {