diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -535,39 +535,40 @@ llvm_unreachable("unexpected reduction operator"); } -/// Get the correct DenseElementsAttr attribute for the given init value. -/// The verifier on the DenseElementsAttr is strict about the init value passed -/// to it so it must matched the type. -static mlir::DenseElementsAttr getDenseAttr(mlir::ShapedType shTy, - int64_t value) { - if (shTy.getElementType().isIntOrIndex()) { - if (auto intTy = mlir::dyn_cast(shTy.getElementType())) { - if (intTy.getIntOrFloatBitWidth() == 8) - return mlir::DenseElementsAttr::get(shTy, static_cast(value)); - if (intTy.getIntOrFloatBitWidth() == 16) - return mlir::DenseElementsAttr::get(shTy, static_cast(value)); - if (intTy.getIntOrFloatBitWidth() == 32) - return mlir::DenseElementsAttr::get(shTy, static_cast(value)); - if (intTy.getIntOrFloatBitWidth() == 64) - return mlir::DenseElementsAttr::get(shTy, value); +/// Get the initial value for reduction operator. +template +static R getReductionInitValue(mlir::acc::ReductionOperator op, mlir::Type ty) { + if (op == mlir::acc::ReductionOperator::AccMin) { + // min init value -> largest + if constexpr (std::is_same_v) { + assert(ty.isIntOrIndex() && "expect integer or index type"); + return llvm::APInt::getSignedMaxValue(ty.getIntOrFloatBitWidth()); + } + if constexpr (std::is_same_v) { + auto floatTy = mlir::dyn_cast_or_null(ty); + assert(floatTy && "expect float type"); + return llvm::APFloat::getLargest(floatTy.getFloatSemantics(), + /*negative=*/false); + } + } else { + // +, ior, ieor init value -> 0 + // * init value -> 1 + int64_t value = (op == mlir::acc::ReductionOperator::AccMul) ? 1 : 0; + if constexpr (std::is_same_v) { + assert(ty.isIntOrIndex() && "expect integer or index type"); + return llvm::APInt(ty.getIntOrFloatBitWidth(), value, true); } - } - if (mlir::isa(shTy.getElementType())) { - if (auto intTy = mlir::dyn_cast(shTy.getElementType())) { - if (intTy.getIntOrFloatBitWidth() == 16) - return mlir::DenseElementsAttr::get(shTy, static_cast(value)); - if (intTy.getIntOrFloatBitWidth() == 32) - return mlir::DenseElementsAttr::get(shTy, static_cast(value)); - if (intTy.getIntOrFloatBitWidth() == 64) - return mlir::DenseElementsAttr::get(shTy, static_cast(value)); - if (intTy.getIntOrFloatBitWidth() == 128) - return mlir::DenseElementsAttr::get(shTy, - static_cast(value)); + if constexpr (std::is_same_v) { + assert(mlir::isa(ty) && "expect float type"); + auto floatTy = mlir::dyn_cast(ty); + return llvm::APFloat(floatTy.getFloatSemantics(), value); } - } - llvm_unreachable("unsupported dense attribute type"); + if constexpr (std::is_same_v) + return value; + } + llvm_unreachable("OpenACC reduction unsupported type"); } static mlir::Value genReductionInitValue(fir::FirOpBuilder &builder, @@ -581,19 +582,34 @@ // min -> largest if (op == mlir::acc::ReductionOperator::AccMin) { - if (ty.isIntOrIndex()) { - unsigned bits = ty.getIntOrFloatBitWidth(); + if (ty.isIntOrIndex()) return builder.create( loc, ty, - builder.getIntegerAttr( - ty, llvm::APInt::getSignedMaxValue(bits).getSExtValue())); - } - if (auto floatTy = mlir::dyn_cast_or_null(ty)) { - const llvm::fltSemantics &sem = floatTy.getFloatSemantics(); + builder.getIntegerAttr(ty, + getReductionInitValue(op, ty))); + if (auto floatTy = mlir::dyn_cast_or_null(ty)) return builder.create( loc, ty, - builder.getFloatAttr( - ty, llvm::APFloat::getLargest(sem, /*negative=*/false))); + builder.getFloatAttr(ty, + getReductionInitValue(op, ty))); + if (auto refTy = mlir::dyn_cast(ty)) { + if (auto seqTy = mlir::dyn_cast(refTy.getEleTy())) { + mlir::Type vecTy = + mlir::VectorType::get(seqTy.getShape(), seqTy.getEleTy()); + auto shTy = vecTy.cast(); + if (seqTy.getEleTy().isIntOrIndex()) + return builder.create( + loc, vecTy, + mlir::DenseElementsAttr::get( + shTy, + getReductionInitValue(op, seqTy.getEleTy()))); + if (mlir::isa(seqTy.getEleTy())) + return builder.create( + loc, vecTy, + mlir::DenseElementsAttr::get( + shTy, + getReductionInitValue(op, seqTy.getEleTy()))); + } } // max -> least } else if (op == mlir::acc::ReductionOperator::AccMax) { @@ -610,22 +626,31 @@ ty, llvm::APFloat::getSmallest(floatTy.getFloatSemantics(), /*negative=*/true))); } else { - // 0 for +, ior, ieor - // 1 for * - int64_t initValue = op == mlir::acc::ReductionOperator::AccMul ? 1 : 0; if (ty.isIntOrIndex()) return builder.create( - loc, ty, builder.getIntegerAttr(ty, initValue)); + loc, ty, + builder.getIntegerAttr(ty, getReductionInitValue(op, ty))); if (mlir::isa(ty)) return builder.create( - loc, ty, builder.getFloatAttr(ty, initValue)); + loc, ty, + builder.getFloatAttr(ty, getReductionInitValue(op, ty))); if (auto refTy = mlir::dyn_cast(ty)) { if (auto seqTy = mlir::dyn_cast(refTy.getEleTy())) { - mlir::Type vecType = + mlir::Type vecTy = mlir::VectorType::get(seqTy.getShape(), seqTy.getEleTy()); - mlir::DenseElementsAttr denseAttr = - getDenseAttr(vecType.cast(), initValue); - return builder.create(loc, vecType, denseAttr); + auto shTy = vecTy.cast(); + if (seqTy.getEleTy().isIntOrIndex()) + return builder.create( + loc, vecTy, + mlir::DenseElementsAttr::get( + shTy, + getReductionInitValue(op, seqTy.getEleTy()))); + if (mlir::isa(seqTy.getEleTy())) + return builder.create( + loc, vecTy, + mlir::DenseElementsAttr::get( + shTy, + getReductionInitValue(op, seqTy.getEleTy()))); } } } diff --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90 --- a/flang/test/Lower/OpenACC/acc-reduction.f90 +++ b/flang/test/Lower/OpenACC/acc-reduction.f90 @@ -24,6 +24,32 @@ ! CHECK: acc.yield %[[SELECT]] : i32 ! CHECK: } +! CHECK-LABEL: acc.reduction.recipe @reduction_min_ref_100x10xf32 : !fir.ref> reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref>): +! CHECK: %[[CST:.*]] = arith.constant dense<3.40282347E+38> : vector<100x10xf32> +! CHECK: acc.yield %[[CST]] : vector<100x10xf32> +! CHECK: } combiner { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): +! CHECK: %[[LB0:.*]] = arith.constant 0 : index +! CHECK: %[[UB0:.*]] = arith.constant 9 : index +! CHECK: %[[STEP0:.*]] = arith.constant 1 : index +! CHECK: fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] { +! CHECK: %[[LB1:.*]] = arith.constant 0 : index +! CHECK: %[[UB1:.*]] = arith.constant 99 : index +! CHECK: %[[STEP1:.*]] = arith.constant 1 : index +! CHECK: fir.do_loop %[[IV1:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] { +! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]], %[[IV1]] : (!fir.ref>, index, index) -> !fir.ref +! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]], %[[IV1]] : (!fir.ref>, index, index) -> !fir.ref +! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref +! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref +! CHECK: %[[CMP:.*]] = arith.cmpf olt, %[[LOAD1]], %[[LOAD2]] : f32 +! CHECK: %[[SELECT:.*]] = arith.select %[[CMP]], %[[LOAD1]], %[[LOAD2]] : f32 +! CHECK: fir.store %[[SELECT]] to %[[COORD1]] : !fir.ref +! CHECK: } +! CHECK: } +! CHECK: acc.yield %[[ARG0]] : !fir.ref> +! CHECK: } + ! CHECK-LABEL: acc.reduction.recipe @reduction_min_f32 : f32 reduction_operator init { ! CHECK: ^bb0(%{{.*}}: f32): ! CHECK: %[[INIT:.*]] = arith.constant 3.40282347E+38 : f32 @@ -35,6 +61,27 @@ ! CHECK: acc.yield %[[SELECT]] : f32 ! CHECK: } +! CHECK-LABEL: acc.reduction.recipe @reduction_min_ref_100xi32 : !fir.ref> reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref>): +! CHECK: %[[CST:.*]] = arith.constant dense<2147483647> : vector<100xi32> +! CHECK: acc.yield %[[CST]] : vector<100xi32> +! CHECK: } combiner { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): +! CHECK: %[[LB0:.*]] = arith.constant 0 : index +! CHECK: %[[UB0:.*]] = arith.constant 99 : index +! CHECK: %[[STEP0:.*]] = arith.constant 1 : index +! CHECK: fir.do_loop %[[IV0:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] { +! CHECK: %[[COORD1:.*]] = fir.coordinate_of %[[ARG0]], %[[IV0]] : (!fir.ref>, index) -> !fir.ref +! CHECK: %[[COORD2:.*]] = fir.coordinate_of %[[ARG1]], %[[IV0]] : (!fir.ref>, index) -> !fir.ref +! CHECK: %[[LOAD1:.*]] = fir.load %[[COORD1]] : !fir.ref +! CHECK: %[[LOAD2:.*]] = fir.load %[[COORD2]] : !fir.ref +! CHECK: %[[CMP:.*]] = arith.cmpi slt, %[[LOAD1]], %[[LOAD2]] : i32 +! CHECK: %[[SELECT:.*]] = arith.select %[[CMP]], %[[LOAD1]], %[[LOAD2]] : i32 +! CHECK: fir.store %[[SELECT]] to %[[COORD1]] : !fir.ref +! CHECK: } +! CHECK: acc.yield %[[ARG0]] : !fir.ref> +! CHECK: } + ! CHECK-LABEL: acc.reduction.recipe @reduction_min_i32 : i32 reduction_operator init { ! CHECK: ^bb0(%arg0: i32): ! CHECK: %[[INIT:.*]] = arith.constant 2147483647 : i32 @@ -374,6 +421,22 @@ ! CHECK: %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref) -> !fir.ref {name = "b"} ! CHECK: acc.loop reduction(@reduction_min_i32 -> %[[RED_B]] : !fir.ref) +subroutine acc_reduction_min_int_array_1d(a, b) + integer :: a(100), b(100) + integer :: i + + !$acc loop reduction(min:b) + do i = 1, 100 + b(i) = min(b(i), a(i)) + end do +end subroutine + +! CHECK-LABEL: func.func @_QPacc_reduction_min_int_array_1d( +! CHECK-SAME: %{{.*}}: !fir.ref> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref> {fir.bindc_name = "b"}) +! CHECK: %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[ARG1]] : !fir.ref>) bounds(%2) -> !fir.ref> {name = "b"} +! CHECK: acc.loop reduction(@reduction_min_ref_100xi32 -> %[[RED_ARG1]] : !fir.ref>) + + subroutine acc_reduction_min_float(a, b) real :: a(100), b integer :: i @@ -389,6 +452,24 @@ ! CHECK: %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref) -> !fir.ref {name = "b"} ! CHECK: acc.loop reduction(@reduction_min_f32 -> %[[RED_B]] : !fir.ref) +subroutine acc_reduction_min_float_array2d(a, b) + real :: a(100, 10), b(100, 10) + integer :: i, j + + !$acc loop reduction(min:b) collapse(2) + do i = 1, 100 + do j = 1, 10 + b(i, j) = min(b(i, j), a(i, j)) + end do + end do +end subroutine + +! CHECK-LABEL: func.func @_QPacc_reduction_min_float_array2d( +! CHECK-SAME: %{{.*}}: !fir.ref> {fir.bindc_name = "a"}, %[[ARG1:.*]]: !fir.ref> {fir.bindc_name = "b"}) +! CHECK: %[[RED_ARG1:.*]] = acc.reduction varPtr(%[[ARG1]] : !fir.ref>) bounds(%3, %5) -> !fir.ref> {name = "b"} +! CHECK: acc.loop reduction(@reduction_min_ref_100x10xf32 -> %[[RED_ARG1]] : !fir.ref>) +! CHECK: attributes {collapse = 2 : i64} + subroutine acc_reduction_max_int(a, b) integer :: a(100) integer :: i, b