diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -535,6 +535,41 @@ llvm_unreachable("unexpected reduction operator"); } +/// Get the correct DenseElementsAttr attribute for the given init value. +/// The verifier on the DenseElementsAttr is strict about the init value passed +/// to it so it must matched the type. +static mlir::DenseElementsAttr getDenseAttr(mlir::ShapedType shTy, + int64_t value) { + if (shTy.getElementType().isIntOrIndex()) { + if (auto intTy = mlir::dyn_cast(shTy.getElementType())) { + if (intTy.getIntOrFloatBitWidth() == 8) + return mlir::DenseElementsAttr::get(shTy, static_cast(value)); + if (intTy.getIntOrFloatBitWidth() == 16) + return mlir::DenseElementsAttr::get(shTy, static_cast(value)); + if (intTy.getIntOrFloatBitWidth() == 32) + return mlir::DenseElementsAttr::get(shTy, static_cast(value)); + if (intTy.getIntOrFloatBitWidth() == 64) + return mlir::DenseElementsAttr::get(shTy, value); + } + } + + if (mlir::isa(shTy.getElementType())) { + if (auto intTy = mlir::dyn_cast(shTy.getElementType())) { + if (intTy.getIntOrFloatBitWidth() == 16) + return mlir::DenseElementsAttr::get(shTy, static_cast(value)); + if (intTy.getIntOrFloatBitWidth() == 32) + return mlir::DenseElementsAttr::get(shTy, static_cast(value)); + if (intTy.getIntOrFloatBitWidth() == 64) + return mlir::DenseElementsAttr::get(shTy, static_cast(value)); + if (intTy.getIntOrFloatBitWidth() == 128) + return mlir::DenseElementsAttr::get(shTy, + static_cast(value)); + } + } + + llvm_unreachable("unsupported dense attribute type"); +} + static mlir::Value genReductionInitValue(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type ty, mlir::acc::ReductionOperator op) { @@ -584,6 +619,15 @@ if (mlir::isa(ty)) return builder.create( loc, ty, builder.getFloatAttr(ty, initValue)); + if (auto refTy = mlir::dyn_cast(ty)) { + if (auto seqTy = mlir::dyn_cast(refTy.getEleTy())) { + mlir::Type vecType = + mlir::VectorType::get(seqTy.getShape(), seqTy.getEleTy()); + mlir::DenseElementsAttr denseAttr = + getDenseAttr(vecType.cast(), initValue); + return builder.create(loc, vecType, denseAttr); + } + } } TODO(loc, "reduction type"); @@ -592,6 +636,39 @@ static mlir::Value genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, mlir::acc::ReductionOperator op, mlir::Type ty, mlir::Value value1, mlir::Value value2) { + + // Handle combiner on arrays. + if (auto refTy = mlir::dyn_cast(ty)) { + if (auto seqTy = mlir::dyn_cast(refTy.getEleTy())) { + if (seqTy.getShape().size() > 1) + TODO(loc, "OpenACC reduction on array with more than one dimension"); + if (seqTy.hasDynamicExtents()) + TODO(loc, "OpenACC reduction on array with dynamic extents"); + mlir::Type idxTy = builder.getIndexType(); + mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); + auto lb = builder.create( + loc, idxTy, builder.getIntegerAttr(idxTy, 0)); + auto ub = builder.create( + loc, idxTy, builder.getIntegerAttr(idxTy, seqTy.getShape()[0] - 1)); + auto step = builder.create( + loc, idxTy, builder.getIntegerAttr(idxTy, 1)); + auto loop = builder.create(loc, lb, ub, step, + /*unordered=*/false); + builder.setInsertionPointToStart(loop.getBody()); + auto addr1 = builder.create( + loc, refTy, value1, mlir::ValueRange{loop.getInductionVar()}); + auto addr2 = builder.create( + loc, refTy, value2, mlir::ValueRange{loop.getInductionVar()}); + auto load1 = builder.create(loc, addr1); + auto load2 = builder.create(loc, addr2); + auto combined = + genCombiner(builder, loc, op, seqTy.getEleTy(), load1, load2); + builder.create(loc, combined, addr1); + builder.setInsertionPointAfter(loop); + return value1; + } + } + if (op == mlir::acc::ReductionOperator::AccAdd) { if (ty.isIntOrIndex()) return builder.create(loc, value1, value2); @@ -666,10 +743,16 @@ converter, builder, semanticsContext, stmtCtx, accObject, operandLocation, asFortran, bounds); - if (!fir::isa_trivial(fir::unwrapRefType(baseAddr.getType()))) + mlir::Type reductionTy = fir::unwrapRefType(baseAddr.getType()); + if (auto seqTy = mlir::dyn_cast(reductionTy)) + reductionTy = seqTy.getEleTy(); + + if (!fir::isa_trivial(reductionTy)) TODO(operandLocation, "reduction with unsupported type"); mlir::Type ty = fir::unwrapRefType(baseAddr.getType()); + if (!fir::isa_trivial(ty)) + ty = baseAddr.getType(); std::string recipeName = fir::getTypeAsString( ty, converter.getKindMap(), ("reduction_" + stringifyReductionOperator(mlirOp)).str()); diff --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90 --- a/flang/test/Lower/OpenACC/acc-reduction.f90 +++ b/flang/test/Lower/OpenACC/acc-reduction.f90 @@ -56,6 +56,26 @@ ! CHECK: acc.yield %[[COMBINED]] : f32 ! CHECK: } +! CHECK-LABEL: acc.reduction.recipe @reduction_mul_ref_100xi32 : !fir.ref> reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref>): +! CHECK: %[[CST:.*]] = arith.constant dense<1> : vector<100xi32> +! CHECK: acc.yield %[[CST]] : vector<100xi32> +! CHECK: } combiner { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): +! CHECK: %[[LB:.*]] = arith.constant 0 : index +! CHECK: %[[UB:.*]] = arith.constant 99 : index +! CHECK: %[[STEP:.*]] = arith.constant 1 : index +! CHECK: fir.do_loop %[[IV:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] { +! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV]] : (!fir.ref>, index) -> !fir.ref +! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV]] : (!fir.ref>, index) -> !fir.ref +! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref +! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref +! CHECK: %[[COMBINED:.*]] = arith.muli %[[LOAD1]], %[[LOAD2]] : i32 +! CHECK: fir.store %[[COMBINED]] to %[[COORD1]] : !fir.ref +! CHECK: } +! CHECK: acc.yield %[[ARG0]] : !fir.ref> +! CHECK: } + ! CHECK-LABEL: acc.reduction.recipe @reduction_mul_i32 : i32 reduction_operator init { ! CHECK: ^bb0(%{{.*}}: i32): ! CHECK: %[[INIT:.*]] = arith.constant 1 : i32 @@ -66,6 +86,26 @@ ! CHECK: acc.yield %[[COMBINED]] : i32 ! CHECK: } +! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_100xf32 : !fir.ref> reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref>): +! CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<100xf32> +! CHECK: acc.yield %[[CST]] : vector<100xf32> +! CHECK: } combiner { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): +! CHECK: %[[LB:.*]] = arith.constant 0 : index +! CHECK: %[[UB:.*]] = arith.constant 99 : index +! CHECK: %[[STEP:.*]] = arith.constant 1 : index +! CHECK: fir.do_loop %[[IV:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] { +! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV]] : (!fir.ref>, index) -> !fir.ref +! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV]] : (!fir.ref>, index) -> !fir.ref +! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref +! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref +! CHECK: %[[COMBINED:.*]] = arith.addf %[[LOAD1]], %[[LOAD2]] fastmath : f32 +! CHECK: fir.store %[[COMBINED]] to %[[COORD1]] : !fir.ref +! CHECK: } +! CHECK: acc.yield %[[ARG0]] : !fir.ref> +! CHECK: } + ! CHECK-LABEL: acc.reduction.recipe @reduction_add_f32 : f32 reduction_operator init { ! CHECK: ^bb0(%{{.*}}: f32): ! CHECK: %[[INIT:.*]] = arith.constant 0.000000e+00 : f32 @@ -76,6 +116,26 @@ ! CHECK: acc.yield %[[COMBINED]] : f32 ! CHECK: } +! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_100xi32 : !fir.ref> reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref>): +! CHECK: %[[CST:.*]] = arith.constant dense<0> : vector<100xi32> +! CHECK: acc.yield %[[CST]] : vector<100xi32> +! CHECK: } combiner { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): +! CHECK: %[[LB:.*]] = arith.constant 0 : index +! CHECK: %[[UB:.*]] = arith.constant 99 : index +! CHECK: %[[STEP:.*]] = arith.constant 1 : index +! CHECK: fir.do_loop %[[IV:.*]] = %[[LB]] to %[[UB]] step %[[STEP]] { +! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV]] : (!fir.ref>, index) -> !fir.ref +! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV]] : (!fir.ref>, index) -> !fir.ref +! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref +! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref +! CHECK: %[[COMBINED:.*]] = arith.addi %[[LOAD1]], %[[LOAD2]] : i32 +! CHECK: fir.store %[[COMBINED]] to %[[COORD1]] : !fir.ref +! CHECK: } +! CHECK: acc.yield %[[ARG0]] : !fir.ref> +! CHECK: } + ! CHECK-LABEL: acc.reduction.recipe @reduction_add_i32 : i32 reduction_operator init { ! CHECK: ^bb0(%{{.*}}: i32): ! CHECK: %[[INIT:.*]] = arith.constant 0 : i32 @@ -100,6 +160,20 @@ ! CHECK-SAME: %{{.*}}: !fir.ref> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref {fir.bindc_name = "b"}) ! CHECK: acc.loop reduction(@reduction_add_i32 -> %[[B]] : !fir.ref) +subroutine acc_reduction_add_int_array_1d(a, b) + integer :: a(100) + integer :: i, b(100) + + !$acc loop reduction(+:b) + do i = 1, 100 + b(i) = b(i) + a(i) + end do +end subroutine + +! CHECK-LABEL: func.func @_QPacc_reduction_add_int_array_1d( +! CHECK-SAME: %{{.*}}: !fir.ref> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref> {fir.bindc_name = "b"}) +! CHECK: acc.loop reduction(@reduction_add_ref_100xi32 -> %[[B]] : !fir.ref>) + subroutine acc_reduction_add_float(a, b) real :: a(100), b integer :: i @@ -114,6 +188,20 @@ ! CHECK-SAME: %{{.*}}: !fir.ref> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref {fir.bindc_name = "b"}) ! CHECK: acc.loop reduction(@reduction_add_f32 -> %[[B]] : !fir.ref) +subroutine acc_reduction_add_float_array_1d(a, b) + real :: a(100), b(100) + integer :: i + + !$acc loop reduction(+:b) + do i = 1, 100 + b(i) = b(i) + a(i) + end do +end subroutine + +! CHECK-LABEL: func.func @_QPacc_reduction_add_float_array_1d( +! CHECK-SAME: %{{.*}}: !fir.ref> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref> {fir.bindc_name = "b"}) +! CHECK: acc.loop reduction(@reduction_add_ref_100xf32 -> %[[B]] : !fir.ref>) + subroutine acc_reduction_mul_int(a, b) integer :: a(100) integer :: i, b @@ -128,6 +216,20 @@ ! CHECK-SAME: %{{.*}}: !fir.ref> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref {fir.bindc_name = "b"}) ! CHECK: acc.loop reduction(@reduction_mul_i32 -> %[[B]] : !fir.ref) +subroutine acc_reduction_mul_int_array_1d(a, b) + integer :: a(100) + integer :: i, b(100) + + !$acc loop reduction(*:b) + do i = 1, 100 + b(i) = b(i) * a(i) + end do +end subroutine + +! CHECK-LABEL: func.func @_QPacc_reduction_mul_int_array_1d( +! CHECK-SAME: %{{.*}}: !fir.ref> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref> {fir.bindc_name = "b"}) +! CHECK: acc.loop reduction(@reduction_mul_ref_100xi32 -> %[[B]] : !fir.ref>) + subroutine acc_reduction_mul_float(a, b) real :: a(100), b integer :: i @@ -142,6 +244,19 @@ ! CHECK-SAME: %{{.*}}: !fir.ref> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref {fir.bindc_name = "b"}) ! CHECK: acc.loop reduction(@reduction_mul_f32 -> %[[B]] : !fir.ref) +subroutine acc_reduction_mul_float_array_1d(a, b) + real :: a(100), b(100) + integer :: i + + !$acc loop reduction(*:b) + do i = 1, 100 + b(i) = b(i) * a(i) + end do +end subroutine + +! CHECK-LABEL: func.func @_QPacc_reduction_mul_float_array_1d( +! CHECK-SAME: %{{.*}}: !fir.ref> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref> {fir.bindc_name = "b"}) +! CHECK: acc.loop reduction(@reduction_mul_ref_100xf32 -> %[[B]] : !fir.ref>) subroutine acc_reduction_min_int(a, b) integer :: a(100) diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -436,7 +436,7 @@ LogicalResult acc::ReductionRecipeOp::verifyRegions() { if (failed(verifyInitLikeSingleArgRegion(*this, getInitRegion(), "reduction", "init", getType(), - /*verifyYield=*/true))) + /*verifyYield=*/false))) return failure(); if (getCombinerRegion().empty()) diff --git a/mlir/test/Dialect/OpenACC/invalid.mlir b/mlir/test/Dialect/OpenACC/invalid.mlir --- a/mlir/test/Dialect/OpenACC/invalid.mlir +++ b/mlir/test/Dialect/OpenACC/invalid.mlir @@ -418,15 +418,6 @@ // ----- -// expected-error@+1 {{expects init region to yield a value of the reduction type}} -acc.reduction.recipe @reduction_i64 : i64 reduction_operator init { -^bb0(%0: i64): - %1 = arith.constant 0 : i32 - acc.yield %1 : i32 -} combiner {} - -// ----- - // expected-error@+1 {{expects non-empty combiner region}} acc.reduction.recipe @reduction_i64 : i64 reduction_operator init { ^bb0(%0: i64):