Index: flang/include/flang/Lower/OpenMP.h =================================================================== --- flang/include/flang/Lower/OpenMP.h +++ flang/include/flang/Lower/OpenMP.h @@ -34,6 +34,9 @@ struct OmpClauseList; } // namespace parser +namespace semantics { +class SemanticsContext; +} namespace lower { class AbstractConverter; @@ -48,7 +51,8 @@ mlir::Location); void genOpenMPConstruct(AbstractConverter &, pft::Evaluation &, - const parser::OpenMPConstruct &); + const parser::OpenMPConstruct &, + Fortran::semantics::SemanticsContext &); void genOpenMPDeclarativeConstruct(AbstractConverter &, pft::Evaluation &, const parser::OpenMPDeclarativeConstruct &); int64_t getCollapseValue(const Fortran::parser::OmpClauseList &clauseList); Index: flang/lib/Lower/Bridge.cpp =================================================================== --- flang/lib/Lower/Bridge.cpp +++ flang/lib/Lower/Bridge.cpp @@ -2238,7 +2238,7 @@ void genFIR(const Fortran::parser::OpenMPConstruct &omp) { mlir::OpBuilder::InsertPoint insertPt = builder->saveInsertionPoint(); localSymbols.pushScope(); - genOpenMPConstruct(*this, getEval(), omp); + genOpenMPConstruct(*this, getEval(), omp, bridge.getSemanticsContext()); const Fortran::parser::OpenMPLoopConstruct *ompLoop = std::get_if(&omp.u); Index: flang/lib/Lower/OpenMP.cpp =================================================================== --- flang/lib/Lower/OpenMP.cpp +++ flang/lib/Lower/OpenMP.cpp @@ -24,6 +24,7 @@ #include "flang/Semantics/tools.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" using namespace mlir; @@ -1446,6 +1447,98 @@ return mlir::omp::ScheduleModifier::none; } +static llvm::StringMap getTargetFeatures(mlir::ModuleOp module) { + llvm::StringMap featuresMap; + llvm::SmallVector targetFeaturesVec; + llvm::StringRef targetFeaturesStr; + auto targetInterface = + llvm::dyn_cast(module.getOperation()); + if (!targetInterface) { + return featuresMap; + } + auto targetAttr = targetInterface.getTarget(); + if (!targetAttr) + return featuresMap; + targetFeaturesStr = targetAttr.getTargetFeatures(); + targetFeaturesStr.split(targetFeaturesVec, ","); + for (auto &feature : targetFeaturesVec) { + if (feature.empty()) + continue; + llvm::StringRef featureKeyString = feature.substr(1); + featuresMap[featureKeyString] = (feature[0] == '+'); + } + + return featuresMap; +} + +static void +genAlignClause(Fortran::lower::AbstractConverter &converter, + const Fortran::parser::OmpAlignedClause &ompAlignClause, + SmallVector &alignVars, + SmallVector &alignmentValues, mlir::Location loc, + Fortran::semantics::SemanticsContext &semanticsContext) { + + mlir::IntegerAttr alignmentValueAttr; + fir::FirOpBuilder &builder = converter.getFirOpBuilder(); + Fortran::lower::StatementContext stmtCtx; + const Fortran::parser::OmpObjectList &ompObjectList = + std::get(ompAlignClause.t); + + const auto &alignmentValueParserExpr = + std::get>( + ompAlignClause.t); + if (alignmentValueParserExpr) { + // Use alignment value specified in the input Fortran code + const auto &alignmentValueExpr = + Fortran::semantics::GetExpr(alignmentValueParserExpr); + const std::optional alignment = + Fortran::evaluate::ToInt64(*alignmentValueExpr); + + alignmentValueAttr = builder.getI64IntegerAttr(*alignment); + } else { + // Get target features string stored in MLIR module and generate map where + // the key corresponds to given feature and bool value describes if given + // feaut + llvm::StringMap featuresMap = getTargetFeatures(builder.getModule()); + + const llvm::Triple &triple = fir::getTargetTriple(builder.getModule()); + // Calculate the default alignment for given target + int64_t defaultOpenMPSimdAlignment = + llvm::OpenMPIRBuilder::getOpenMPDefaultSimdAlign(triple, featuresMap); + alignmentValueAttr = builder.getI64IntegerAttr(defaultOpenMPSimdAlignment); + } + + for (const Fortran::parser::OmpObject &ompItem : ompObjectList.v) { + const Fortran::parser::Designator *sym = nullptr; + std::visit(Fortran::common::visitors{ + [&](const Fortran::parser::Designator &designator) { + sym = &designator; + }, + [&](const Fortran::parser::Name &name) { + // Common block is represented in OmpObject as Name. + // According to OpenMP 5.2 spec (5.11) common block + // is not allowed as an aligned item. Flang parser checks + // if given aligned item is common block. That's why we + // skip common blocks in MLIR lowering + }}, + ompItem.u); + + if (!sym) + continue; + + // The default alignment for some targets is equal to 0. + // Do not generate alignment assumption if alignment is equal to 0. + if (alignmentValueAttr.getInt() <= 0) + continue; + + mlir::Value alignedItem = fir::getBase(converter.genExprAddr( + *(Fortran::semantics::AnalyzeExpr(semanticsContext, *sym)), stmtCtx, + &loc)); + alignVars.push_back(alignedItem); + alignmentValues.push_back(alignmentValueAttr); + } +} + static mlir::omp::ScheduleModifier getSIMDModifier(const Fortran::parser::OmpScheduleClause &x) { const auto &modifier = @@ -1597,7 +1690,8 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, - const Fortran::parser::OpenMPLoopConstruct &loopConstruct) { + const Fortran::parser::OpenMPLoopConstruct &loopConstruct, + Fortran::semantics::SemanticsContext &semanticsContext) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); llvm::SmallVector lowerBound, upperBound, step, linearVars, @@ -1606,7 +1700,7 @@ mlir::Attribute scheduleClauseOperand, noWaitClauseOperand, orderedClauseOperand, orderClauseOperand; mlir::IntegerAttr simdlenClauseOperand, safelenClauseOperand; - SmallVector reductionDeclSymbols; + SmallVector reductionDeclSymbols, alignmentValues; Fortran::lower::StatementContext stmtCtx; const auto &loopOpClauseList = std::get( std::get(loopConstruct.t).t); @@ -1687,6 +1781,11 @@ std::get_if(&clause.u)) { ifClauseOperand = getIfClauseOperand(converter, stmtCtx, ifClause, clauseLocation); + } else if (const auto &alignedClause = + std::get_if( + &clause.u)) { + genAlignClause(converter, alignedClause->v, alignedVars, alignmentValues, + clauseLocation, semanticsContext); } else if (const auto &reductionClause = std::get_if( &clause.u)) { @@ -1727,7 +1826,10 @@ TypeRange resultType; auto simdLoopOp = firOpBuilder.create( currentLocation, resultType, lowerBound, upperBound, step, alignedVars, - nullptr, ifClauseOperand, nontemporalVars, + alignmentValues.empty() + ? nullptr + : mlir::ArrayAttr::get(firOpBuilder.getContext(), alignmentValues), + ifClauseOperand, nontemporalVars, orderClauseOperand.dyn_cast_or_null(), simdlenClauseOperand, safelenClauseOperand, /*inclusive=*/firOpBuilder.getUnitAttr()); @@ -2599,7 +2701,8 @@ void Fortran::lower::genOpenMPConstruct( Fortran::lower::AbstractConverter &converter, Fortran::lower::pft::Evaluation &eval, - const Fortran::parser::OpenMPConstruct &ompConstruct) { + const Fortran::parser::OpenMPConstruct &ompConstruct, + Fortran::semantics::SemanticsContext &semanticsContext) { std::visit( common::visitors{ @@ -2615,7 +2718,7 @@ genOMP(converter, eval, sectionConstruct); }, [&](const Fortran::parser::OpenMPLoopConstruct &loopConstruct) { - genOMP(converter, eval, loopConstruct); + genOMP(converter, eval, loopConstruct, semanticsContext); }, [&](const Fortran::parser::OpenMPDeclarativeAllocate &execAllocConstruct) { Index: flang/test/Lower/OpenMP/simd.f90 =================================================================== --- flang/test/Lower/OpenMP/simd.f90 +++ flang/test/Lower/OpenMP/simd.f90 @@ -164,3 +164,37 @@ end do !$OMP END SIMD end subroutine + +!CHECK: func.func @_QPsimdloop_aligned_cptr(%[[ARG_A:.*]]: !fir.ref +!CHECK-SAME: > {fir.bindc_name = "a"}) { +subroutine simdloop_aligned_cptr( A) + use iso_c_binding + integer :: i + type (c_ptr) :: A +!CHECK: omp.simdloop aligned(%[[ARG_A]] : !fir.ref +!CHECK-SAME: > +!CHECK-SAME: -> 256 : i64) + !$OMP SIMD ALIGNED(A:256) + do i = 1, 10 + call c_test_call(A) + end do + !$OMP END SIMD +end subroutine + +!CHECK-LABEL: func @_QPsimdloop_aligned_allocatable +subroutine simdloop_aligned_allocatable() + integer :: i + integer, allocatable :: A(:) + allocate(A(10)) +!CHECK: %[[A_PTR:.*]] = fir.alloca !fir.box>> {bindc_name = "a", +!CHECK-SAME: uniq_name = "_QFsimdloop_aligned_allocatableEa"} +!CHECK: %[[A_PTR_LOAD:.*]] = fir.load %[[A_PTR]] : !fir.ref>>> +!CHECK: %[[A_ALIGNED:.*]] = fir.box_addr %[[A_PTR_LOAD]] : (!fir.box>>) +!CHECK-SAME: -> !fir.heap> +!CHECK: omp.simdloop aligned(%[[A_ALIGNED]] : !fir.heap> -> 256 : i64) + !$OMP SIMD ALIGNED(A:256) + do i = 1, 10 + A(i) = i + end do +end subroutine Index: flang/test/Lower/OpenMP/simd_aarch64.f90 =================================================================== --- /dev/null +++ flang/test/Lower/OpenMP/simd_aarch64.f90 @@ -0,0 +1,20 @@ +! Tests for 2.9.3.1 Simd and target dependent defult alignment for AArch64 +! The default alignment for AARch is 0 so we do not emit aligned clause + +! REQUIRES: aarch64-registered-target +! RUN: %flang_fc1 -triple aarch64-unknown-linux-gnu -emit-fir -fopenmp %s -o - | FileCheck %s + +subroutine simdloop_aligned_cptr(A) + use iso_c_binding + integer :: i + type (c_ptr) :: A +!CHECK: omp.simdloop +!CHECK-NOT: aligned( +!CHECK-SAME: for (%[[IT:.*]]) : i32 = (%[[LB:.*]]) to (%[[UB:.*]]) inclusive step (%[[INC:.*]]) { + !$OMP SIMD ALIGNED(A) + do i = 1, 10 + call c_test_call(A) + end do + !$OMP END SIMD +end subroutine + Index: flang/test/Lower/OpenMP/simd_x86_64.f90 =================================================================== --- /dev/null +++ flang/test/Lower/OpenMP/simd_x86_64.f90 @@ -0,0 +1,36 @@ +! Tests for 2.9.3.1 Simd and target dependent defult alignment for x86 + +! REQUIRES: x86-registered-target +! RUN: %flang_fc1 -triple x86_64-unknown-linux-gnu -emit-fir -fopenmp -target-cpu x86-64 %s -o - | FileCheck --check-prefixes=DEFAULT %s +! RUN: %flang_fc1 -triple x86_64-unknown-linux-gnu -emit-fir -fopenmp -target-cpu x86-64 -target-feature +avx %s -o - | FileCheck --check-prefixes=AVX %s +! RUN: %flang_fc1 -triple x86_64-unknown-linux-gnu -emit-fir -fopenmp -target-cpu x86-64 -target-feature +avx512f %s -o - | FileCheck --check-prefixes=AVX512F %s + +!DEFAULT: func.func @_QPsimdloop_aligned_cptr(%[[ARG_A:.*]]: !fir.ref +!DEFAULT-SAME: > {fir.bindc_name = "a"}) { +!AVX: func.func @_QPsimdloop_aligned_cptr(%[[ARG_A:.*]]: !fir.ref +!AVX-SAME: > {fir.bindc_name = "a"}) { +!AVX512F: func.func @_QPsimdloop_aligned_cptr(%[[ARG_A:.*]]: !fir.ref +!AVX512F-SAME: > {fir.bindc_name = "a"}) { +subroutine simdloop_aligned_cptr(A) + use iso_c_binding + integer :: i + type (c_ptr) :: A +!DEFAULT: omp.simdloop aligned(%[[ARG_A]] : !fir.ref +!DEFAULT-SAME: > +!DEFAULT-SAME: -> 128 : i64) +!AVX: omp.simdloop aligned(%[[ARG_A]] : !fir.ref +!AVX-SAME: > +!AVX-SAME: -> 256 : i64) +!AVX512F: omp.simdloop aligned(%[[ARG_A]] : !fir.ref +!AVX512F-SAME: > +!AVX512F-SAME: -> 512 : i64) + !$OMP SIMD ALIGNED(A) + do i = 1, 10 + call c_test_call(A) + end do + !$OMP END SIMD +end subroutine + Index: mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td =================================================================== --- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -540,6 +540,9 @@ /// Returns the number of loops in the simd loop nest. unsigned getNumLoops() { return getLowerBound().size(); } + /// Returns the number of aligned variables in the simd loop. + unsigned getNumAlignedVars() { return getAlignedVars().size(); } + }]; let hasCustomAssemblyFormat = 1; Index: mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp =================================================================== --- mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1029,6 +1029,15 @@ safelen = builder.getInt64(safelenVar.value()); llvm::MapVector alignedVars; + for (unsigned i = 0; i < loop.getNumAlignedVars(); ++i) { + llvm::Value *alignedVar = + moduleTranslation.lookupValue(loop.getAlignedVars()[i]); + IntegerAttr alignmentAttr = + (loop.getAlignmentValues().value()[i].cast()); + alignedVars[alignedVar] = + builder.getInt64(alignmentAttr.getValue().getZExtValue()); + } + ompBuilder->applySimd( loopInfo, alignedVars, loop.getIfExpr() ? moduleTranslation.lookupValue(loop.getIfExpr()) Index: mlir/test/Target/LLVMIR/openmp-llvm.mlir =================================================================== --- mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -756,6 +756,45 @@ // CHECK-NEXT: llvm.loop.vectorize.enable // CHECK: llvm.loop.vectorize.enable +// ----- + +// CHECK: define void @simdloop_aligned_cptr_(ptr %[[ALIGNED:.*]]) { +llvm.func @simdloop_aligned_cptr_(%arg0: !llvm.ptr> {fir.bindc_name = "a"}) { + %0 = llvm.mlir.constant(10 : i32) : i32 + %1 = llvm.mlir.constant(1 : i32) : i32 + %2 = llvm.mlir.constant(1 : i64) : i64 + %3 = llvm.alloca %2 x i32 {adapt.valuebyref, in_type = i32, operand_segment_sizes = array} : (i64) -> !llvm.ptr + %4 = llvm.mlir.constant(1 : i64) : i64 + %5 = llvm.alloca %4 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFsimdloop_aligned_cptrEi"} : (i64) -> !llvm.ptr +// CHECK: call void @llvm.assume(i1 true) [ "align"(ptr %[[ALIGNED]], i64 256) ] + omp.simdloop aligned(%arg0 : !llvm.ptr> -> 256 : i64) for (%arg1) : i32 = (%1) to (%0) inclusive step (%1) { + omp.yield + } + llvm.return +} + +// ----- + +// CHECK-LABEL: @simdloop_aligned_allocatable_ +llvm.func @simdloop_aligned_allocatable_() { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>> + %2 = llvm.mlir.constant(1 : i32) : i32 + %3 = llvm.mlir.constant(10 : i32) : i32 + %4 = llvm.mlir.constant(1 : i32) : i32 + %5 = llvm.getelementptr %1[0, 0] : (!llvm.ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>>) -> !llvm.ptr> + %6 = llvm.load %5 : !llvm.ptr> +// CHECK: %[[PTR_ALLOC:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 +// CHECK: %[[TMP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[PTR_ALLOC]], i32 0, i32 0 +// CHECK: %[[ALIGNED:.*]] = load ptr, ptr %[[TMP]], align 8 +// CHECK: call void @llvm.assume(i1 true) [ "align"(ptr %[[ALIGNED]], i64 128) ] + omp.simdloop aligned(%6 : !llvm.ptr -> 128 : i64) for (%arg0) : i32 = (%4) to (%3) inclusive step (%4) { + omp.yield + } + llvm.return +} + + // ----- llvm.func @body(i64)