diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -783,28 +783,50 @@ } } +/// This function returns the identity value of the operator \p reductionOpName. +/// For example: +/// 0 + x = x, +/// 1 * x = x +static int getOperationIdentity(llvm::StringRef reductionOpName, + mlir::Location loc) { + if (reductionOpName.contains("add")) + return 0; + else if (reductionOpName.contains("multiply")) + return 1; + TODO(loc, "Reduction of some intrinsic operators is not supported"); +} + +static Value getReductionInitValue(mlir::Location loc, mlir::Type type, + llvm::StringRef reductionOpName, + fir::FirOpBuilder &builder) { + return builder.create( + loc, type, + builder.getIntegerAttr(type, getOperationIdentity(reductionOpName, loc))); +} + /// Creates an OpenMP reduction declaration and inserts it into the provided /// symbol table. The declaration has a constant initializer with the neutral /// value `initValue`, and the reduction combiner carried over from `reduce`. /// TODO: Generalize this for non-integer types, add atomic region. -static omp::ReductionDeclareOp createReductionDecl(fir::FirOpBuilder &builder, - llvm::StringRef name, - mlir::Type type, - mlir::Location loc) { +static omp::ReductionDeclareOp createReductionDecl( + fir::FirOpBuilder &builder, llvm::StringRef reductionOpName, + Fortran::parser::DefinedOperator::IntrinsicOperator intrinsicOp, + mlir::Type type, mlir::Location loc) { OpBuilder::InsertionGuard guard(builder); mlir::ModuleOp module = builder.getModule(); mlir::OpBuilder modBuilder(module.getBodyRegion()); - auto decl = module.lookupSymbol(name); + auto decl = + module.lookupSymbol(reductionOpName); if (!decl) - decl = modBuilder.create(loc, name, type); + decl = + modBuilder.create(loc, reductionOpName, type); else return decl; builder.createBlock(&decl.initializerRegion(), decl.initializerRegion().end(), {type}, {loc}); builder.setInsertionPointToEnd(&decl.initializerRegion().back()); - Value init = builder.create( - loc, type, builder.getIntegerAttr(type, 0)); + Value init = getReductionInitValue(loc, type, reductionOpName, builder); builder.create(loc, init); builder.createBlock(&decl.reductionRegion(), decl.reductionRegion().end(), @@ -812,8 +834,20 @@ builder.setInsertionPointToEnd(&decl.reductionRegion().back()); mlir::Value op1 = decl.reductionRegion().front().getArgument(0); mlir::Value op2 = decl.reductionRegion().front().getArgument(1); - Value addRes = builder.create(loc, op1, op2); - builder.create(loc, addRes); + + Value res; + switch (intrinsicOp) { + case Fortran::parser::DefinedOperator::IntrinsicOperator::Add: + res = builder.create(loc, op1, op2); + break; + case Fortran::parser::DefinedOperator::IntrinsicOperator::Multiply: + res = builder.create(loc, op1, op2); + break; + default: + TODO(loc, "Reduction of some intrinsic operators is not supported"); + } + + builder.create(loc, res); return decl; } @@ -885,10 +919,18 @@ Fortran::parser::DefinedOperator::IntrinsicOperator intrinsicOp, mlir::Type ty) { std::string reductionName; - if (intrinsicOp == Fortran::parser::DefinedOperator::IntrinsicOperator::Add) + + switch (intrinsicOp) { + case Fortran::parser::DefinedOperator::IntrinsicOperator::Add: reductionName = "add_reduction"; - else + break; + case Fortran::parser::DefinedOperator::IntrinsicOperator::Multiply: + reductionName = "multiply_reduction"; + break; + default: reductionName = "other_reduction"; + break; + } return (llvm::Twine(reductionName) + (ty.isIntOrIndex() ? llvm::Twine("_i_") : llvm::Twine("_f_")) + @@ -990,10 +1032,16 @@ const auto &intrinsicOp{ std::get( redDefinedOp->u)}; - if (intrinsicOp != - Fortran::parser::DefinedOperator::IntrinsicOperator::Add) + switch (intrinsicOp) { + case Fortran::parser::DefinedOperator::IntrinsicOperator::Add: + case Fortran::parser::DefinedOperator::IntrinsicOperator::Multiply: + break; + + default: TODO(currentLocation, "Reduction of some intrinsic operators is not supported"); + break; + } for (const auto &ompObject : objectList.v) { if (const auto *name{ Fortran::parser::Unwrap(ompObject)}) { @@ -1005,7 +1053,7 @@ if (redType.isIntOrIndex()) { decl = createReductionDecl( firOpBuilder, getReductionName(intrinsicOp, redType), - redType, currentLocation); + intrinsicOp, redType, currentLocation); } else { TODO(currentLocation, "Reduction of some types is not supported"); @@ -1604,8 +1652,8 @@ // Generate an OpenMP reduction operation. This implementation finds the chain : // load reduction var -> reduction_operation -> store reduction var and replaces // it with the reduction operation. -// TODO: Currently assumes it is an integer addition reduction. Generalize this -// for various reduction operation types. +// TODO: Currently assumes it is an integer addition/multiplication reduction. +// Generalize this for various reduction operation types. // TODO: Generate the reduction operation during lowering instead of creating // and removing operations since this is not a robust approach. Also, removing // ops in the builder (instead of a rewriter) is probably not the best approach. @@ -1626,9 +1674,14 @@ const auto &intrinsicOp{ std::get( reductionOp->u)}; - if (intrinsicOp != - Fortran::parser::DefinedOperator::IntrinsicOperator::Add) + + switch (intrinsicOp) { + case Fortran::parser::DefinedOperator::IntrinsicOperator::Add: + case Fortran::parser::DefinedOperator::IntrinsicOperator::Multiply: + break; + default: continue; + } for (const auto &ompObject : objectList.v) { if (const auto *name{ Fortran::parser::Unwrap(ompObject)}) { diff --git a/flang/test/Lower/OpenMP/Todo/reduction-multiply.f90 b/flang/test/Lower/OpenMP/Todo/reduction-multiply.f90 deleted file mode 100644 --- a/flang/test/Lower/OpenMP/Todo/reduction-multiply.f90 +++ /dev/null @@ -1,15 +0,0 @@ -! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s - -! CHECK: not yet implemented: Reduction of some intrinsic operators is not supported -subroutine reduction_multiply - integer :: x - !$omp parallel - !$omp do reduction(*:x) - do i=1, 100 - x = x * i - end do - !$omp end do - !$omp end parallel - print *, x -end subroutine diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-int.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-int-add.f90 rename from flang/test/Lower/OpenMP/wsloop-reduction-int.f90 rename to flang/test/Lower/OpenMP/wsloop-reduction-int-add.f90 diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-int.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-int-mul.f90 rename from flang/test/Lower/OpenMP/wsloop-reduction-int.f90 rename to flang/test/Lower/OpenMP/wsloop-reduction-int-mul.f90 --- a/flang/test/Lower/OpenMP/wsloop-reduction-int.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-int-mul.f90 @@ -4,35 +4,35 @@ !CHECK-LABEL: omp.reduction.declare !CHECK-SAME: @[[RED_I64_NAME:.*]] : i64 init { !CHECK: ^bb0(%{{.*}}: i64): -!CHECK: %[[C0_1:.*]] = arith.constant 0 : i64 -!CHECK: omp.yield(%[[C0_1]] : i64) +!CHECK: %[[C1_1:.*]] = arith.constant 1 : i64 +!CHECK: omp.yield(%[[C1_1]] : i64) !CHECK: } combiner { !CHECK: ^bb0(%[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64): -!CHECK: %[[RES:.*]] = arith.addi %[[ARG0]], %[[ARG1]] : i64 +!CHECK: %[[RES:.*]] = arith.muli %[[ARG0]], %[[ARG1]] : i64 !CHECK: omp.yield(%[[RES]] : i64) !CHECK: } !CHECK-LABEL: omp.reduction.declare !CHECK-SAME: @[[RED_I32_NAME:.*]] : i32 init { !CHECK: ^bb0(%{{.*}}: i32): -!CHECK: %[[C0_1:.*]] = arith.constant 0 : i32 -!CHECK: omp.yield(%[[C0_1]] : i32) +!CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 +!CHECK: omp.yield(%[[C1_1]] : i32) !CHECK: } combiner { !CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32): -!CHECK: %[[RES:.*]] = arith.addi %[[ARG0]], %[[ARG1]] : i32 +!CHECK: %[[RES:.*]] = arith.muli %[[ARG0]], %[[ARG1]] : i32 !CHECK: omp.yield(%[[RES]] : i32) !CHECK: } !CHECK-LABEL: func.func @_QPsimple_reduction !CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_reductionEx"} -!CHECK: %[[C0_2:.*]] = arith.constant 0 : i32 -!CHECK: fir.store %[[C0_2]] to %[[XREF]] : !fir.ref +!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 +!CHECK: fir.store %[[C1_2]] to %[[XREF]] : !fir.ref !CHECK: omp.parallel !CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 +!CHECK: %[[C10:.*]] = arith.constant 10 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) +!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C10]]) inclusive step (%[[C1_2]]) !CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref !CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref !CHECK: omp.reduction %[[I_PVT_VAL]], %[[XREF]] : !fir.ref @@ -42,11 +42,11 @@ subroutine simple_reduction integer :: x - x = 0 + x = 1 !$omp parallel - !$omp do reduction(+:x) - do i=1, 100 - x = x + i + !$omp do reduction(*:x) + do i=1, 10 + x = x * i end do !$omp end do !$omp end parallel @@ -54,14 +54,14 @@ !CHECK-LABEL: func.func @_QPsimple_reduction_switch_order !CHECK: %[[XREF:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFsimple_reduction_switch_orderEx"} -!CHECK: %[[C0_2:.*]] = arith.constant 0 : i32 -!CHECK: fir.store %[[C0_2]] to %[[XREF]] : !fir.ref +!CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 +!CHECK: fir.store %[[C1_2]] to %[[XREF]] : !fir.ref !CHECK: omp.parallel !CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: %[[C1_1:.*]] = arith.constant 1 : i32 -!CHECK: %[[C100:.*]] = arith.constant 100 : i32 +!CHECK: %[[C10:.*]] = arith.constant 10 : i32 !CHECK: %[[C1_2:.*]] = arith.constant 1 : i32 -!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C100]]) inclusive step (%[[C1_2]]) +!CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 = (%[[C1_1]]) to (%[[C10]]) inclusive step (%[[C1_2]]) !CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref !CHECK: %[[I_PVT_VAL:.*]] = fir.load %[[I_PVT_REF]] : !fir.ref !CHECK: omp.reduction %[[I_PVT_VAL]], %[[XREF]] : !fir.ref @@ -71,11 +71,11 @@ subroutine simple_reduction_switch_order integer :: x - x = 0 + x = 1 !$omp parallel - !$omp do reduction(+:x) - do i=1, 100 - x = i + x + !$omp do reduction(*:x) + do i=1, 10 + x = i * x end do !$omp end do !$omp end parallel @@ -101,15 +101,15 @@ subroutine multiple_reductions_same_type integer :: x,y,z - x = 0 - y = 0 - z = 0 + x = 1 + y = 1 + z = 1 !$omp parallel - !$omp do reduction(+:x,y,z) - do i=1, 100 - x = x + i - y = y + i - z = z + i + !$omp do reduction(*:x,y,z) + do i=1, 10 + x = x * i + y = y * i + z = z * i end do !$omp end do !$omp end parallel @@ -122,10 +122,10 @@ !CHECK: %[[I_PVT_REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} !CHECK: omp.wsloop reduction(@[[RED_I32_NAME]] -> %[[XREF]] : !fir.ref, @[[RED_I64_NAME]] -> %[[YREF]] : !fir.ref) for (%[[IVAL:.*]]) : i32 !CHECK: fir.store %[[IVAL]] to %[[I_PVT_REF]] : !fir.ref -!CHECK: %[[C1_32:.*]] = arith.constant 1 : i32 -!CHECK: omp.reduction %[[C1_32]], %[[XREF]] : !fir.ref -!CHECK: %[[C1_64:.*]] = arith.constant 1 : i64 -!CHECK: omp.reduction %[[C1_64]], %[[YREF]] : !fir.ref +!CHECK: %[[C2_32:.*]] = arith.constant 2 : i32 +!CHECK: omp.reduction %[[C2_32]], %[[XREF]] : !fir.ref +!CHECK: %[[C2_64:.*]] = arith.constant 2 : i64 +!CHECK: omp.reduction %[[C2_64]], %[[YREF]] : !fir.ref !CHECK: omp.yield !CHECK: omp.terminator !CHECK: return @@ -134,10 +134,10 @@ integer :: x integer(kind=8) :: y !$omp parallel - !$omp do reduction(+:x,y) - do i=1, 100 - x = x + 1_4 - y = y + 1_8 + !$omp do reduction(*:x,y) + do i=1, 10 + x = x * 2_4 + y = y * 2_8 end do !$omp end do !$omp end parallel