diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -694,10 +694,11 @@ return false; } -static mlir::Value -genReductionInitValue(fir::FirOpBuilder &builder, mlir::Location loc, - mlir::Type ty, mlir::acc::ReductionOperator op, - llvm::SmallVector &bounds) { +/// Return a constant with the initial value for the reduction operator and +/// type combination. +static mlir::Value getReductionInitValue(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Type ty, + mlir::acc::ReductionOperator op) { if (op == mlir::acc::ReductionOperator::AccLand || op == mlir::acc::ReductionOperator::AccLor || op == mlir::acc::ReductionOperator::AccEqv || @@ -736,26 +737,48 @@ return fir::factory::Complex{builder, loc}.createComplex( cmplxTy.getFKind(), realInit, imagInit); } - if (auto refTy = mlir::dyn_cast(ty)) { - if (auto seqTy = mlir::dyn_cast(refTy.getEleTy())) { - mlir::Type vecTy = - mlir::VectorType::get(seqTy.getShape(), seqTy.getEleTy()); - auto shTy = vecTy.cast(); - if (seqTy.getEleTy().isIntOrIndex()) - return builder.create( - loc, vecTy, - mlir::DenseElementsAttr::get( - shTy, - getReductionInitValue(op, seqTy.getEleTy()))); - if (mlir::isa(seqTy.getEleTy())) - return builder.create( - loc, vecTy, - mlir::DenseElementsAttr::get( - shTy, - getReductionInitValue(op, seqTy.getEleTy()))); + + if (auto seqTy = mlir::dyn_cast(ty)) + return getReductionInitValue(builder, loc, seqTy.getEleTy(), op); + + llvm::report_fatal_error("Unsupported OpenACC reduction type"); +} + +static mlir::Value genReductionInitRegion(fir::FirOpBuilder &builder, + mlir::Location loc, mlir::Type ty, + mlir::acc::ReductionOperator op) { + ty = fir::unwrapRefType(ty); + mlir::Value initValue = getReductionInitValue(builder, loc, ty, op); + if (fir::isa_trivial(ty)) { + mlir::Value alloca = builder.create(loc, ty); + builder.create(loc, builder.createConvert(loc, ty, initValue), + alloca); + return alloca; + } else if (auto seqTy = mlir::dyn_cast_or_null(ty)) { + if (seqTy.hasDynamicExtents()) + TODO(loc, "private recipe of array with dynamic extents"); + if (fir::isa_trivial(seqTy.getEleTy())) { + mlir::Value alloca = builder.create(loc, seqTy); + mlir::Type idxTy = builder.getIndexType(); + mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); + llvm::SmallVector loops; + llvm::SmallVector ivs; + for (auto ext : llvm::reverse(seqTy.getShape())) { + auto lb = builder.createIntegerConstant(loc, idxTy, 0); + auto ub = builder.createIntegerConstant(loc, idxTy, ext - 1); + auto step = builder.createIntegerConstant(loc, idxTy, 1); + auto loop = builder.create(loc, lb, ub, step, + /*unordered=*/false); + builder.setInsertionPointToStart(loop.getBody()); + loops.push_back(loop); + ivs.push_back(loop.getInductionVar()); + } + auto coord = builder.create(loc, refTy, alloca, ivs); + builder.create(loc, initValue, coord); + builder.setInsertionPointAfter(loops[0]); + return alloca; } } - llvm::report_fatal_error("Unsupported OpenACC reduction type"); } @@ -766,8 +789,16 @@ mlir::Type i1 = builder.getI1Type(); mlir::Value v1 = builder.create(loc, i1, value1); mlir::Value v2 = builder.create(loc, i1, value2); - mlir::Value add = builder.create(loc, v1, v2); - return builder.create(loc, value1.getType(), add); + mlir::Value combined = builder.create(loc, v1, v2); + return builder.create(loc, value1.getType(), combined); +} + +static mlir::Value loadIfRef(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value value) { + if (mlir::isa( + value.getType())) + return builder.create(loc, value); + return value; } static mlir::Value genComparisonCombiner(fir::FirOpBuilder &builder, @@ -782,45 +813,13 @@ return builder.create(loc, value1.getType(), add); } -static mlir::Value genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, - mlir::acc::ReductionOperator op, mlir::Type ty, - mlir::Value value1, mlir::Value value2) { - - // Handle combiner on arrays. - if (auto refTy = mlir::dyn_cast(ty)) { - if (auto seqTy = mlir::dyn_cast(refTy.getEleTy())) { - if (seqTy.hasDynamicExtents()) - TODO(loc, "OpenACC reduction on array with dynamic extents"); - mlir::Type idxTy = builder.getIndexType(); - mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); - - llvm::SmallVector loops; - llvm::SmallVector ivs; - for (auto ext : llvm::reverse(seqTy.getShape())) { - auto lb = builder.create( - loc, idxTy, builder.getIntegerAttr(idxTy, 0)); - auto ub = builder.create( - loc, idxTy, builder.getIntegerAttr(idxTy, ext - 1)); - auto step = builder.create( - loc, idxTy, builder.getIntegerAttr(idxTy, 1)); - auto loop = builder.create(loc, lb, ub, step, - /*unordered=*/false); - builder.setInsertionPointToStart(loop.getBody()); - loops.push_back(loop); - ivs.push_back(loop.getInductionVar()); - } - auto addr1 = builder.create(loc, refTy, value1, ivs); - auto addr2 = builder.create(loc, refTy, value2, ivs); - auto load1 = builder.create(loc, addr1); - auto load2 = builder.create(loc, addr2); - auto combined = - genCombiner(builder, loc, op, seqTy.getEleTy(), load1, load2); - builder.create(loc, combined, addr1); - builder.setInsertionPointAfter(loops[0]); - return value1; - } - } - +static mlir::Value genScalarCombiner(fir::FirOpBuilder &builder, + mlir::Location loc, + mlir::acc::ReductionOperator op, + mlir::Type ty, mlir::Value value1, + mlir::Value value2) { + value1 = loadIfRef(builder, loc, value1); + value2 = loadIfRef(builder, loc, value2); if (op == mlir::acc::ReductionOperator::AccAdd) { if (ty.isIntOrIndex()) return builder.create(loc, value1, value2); @@ -874,6 +873,43 @@ TODO(loc, "reduction operator"); } +static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::acc::ReductionOperator op, mlir::Type ty, + mlir::Value value1, mlir::Value value2) { + ty = fir::unwrapRefType(ty); + + if (auto seqTy = mlir::dyn_cast(ty)) { + if (seqTy.hasDynamicExtents()) + TODO(loc, "OpenACC reduction on array with dynamic extents"); + mlir::Type idxTy = builder.getIndexType(); + mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); + + llvm::SmallVector loops; + llvm::SmallVector ivs; + for (auto ext : llvm::reverse(seqTy.getShape())) { + auto lb = builder.createIntegerConstant(loc, idxTy, 0); + auto ub = builder.createIntegerConstant(loc, idxTy, ext - 1); + auto step = builder.createIntegerConstant(loc, idxTy, 1); + auto loop = builder.create(loc, lb, ub, step, + /*unordered=*/false); + builder.setInsertionPointToStart(loop.getBody()); + loops.push_back(loop); + ivs.push_back(loop.getInductionVar()); + } + auto addr1 = builder.create(loc, refTy, value1, ivs); + auto addr2 = builder.create(loc, refTy, value2, ivs); + auto load1 = builder.create(loc, addr1); + auto load2 = builder.create(loc, addr2); + mlir::Value res = + genScalarCombiner(builder, loc, op, seqTy.getEleTy(), load1, load2); + builder.create(loc, res, addr1); + builder.setInsertionPointAfter(loops[0]); + } else { + mlir::Value res = genScalarCombiner(builder, loc, op, ty, value1, value2); + builder.create(loc, res, value1); + } +} + mlir::acc::ReductionRecipeOp Fortran::lower::createOrGetReductionRecipe( fir::FirOpBuilder &builder, llvm::StringRef recipeName, mlir::Location loc, mlir::Type ty, mlir::acc::ReductionOperator op, @@ -890,7 +926,7 @@ builder.createBlock(&recipe.getInitRegion(), recipe.getInitRegion().end(), {ty}, {loc}); builder.setInsertionPointToEnd(&recipe.getInitRegion().back()); - mlir::Value initValue = genReductionInitValue(builder, loc, ty, op, bounds); + mlir::Value initValue = genReductionInitRegion(builder, loc, ty, op); builder.create(loc, initValue); builder.createBlock(&recipe.getCombinerRegion(), @@ -898,8 +934,8 @@ builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back()); mlir::Value v1 = recipe.getCombinerRegion().front().getArgument(0); mlir::Value v2 = recipe.getCombinerRegion().front().getArgument(1); - mlir::Value combinedValue = genCombiner(builder, loc, op, ty, v1, v2); - builder.create(loc, combinedValue); + genCombiner(builder, loc, op, ty, v1, v2); + builder.create(loc, v1); builder.restoreInsertionPoint(crtPos); return recipe; } @@ -941,9 +977,7 @@ auto op = createDataEntryOp( builder, operandLocation, baseAddr, asFortran, bounds, /*structured=*/true, mlir::acc::DataClause::acc_reduction, retTy); - mlir::Type ty = fir::unwrapRefType(op.getAccPtr().getType()); - if (!fir::isa_trivial(ty)) - ty = retTy; + mlir::Type ty = op.getAccPtr().getType(); std::string recipeName = fir::getTypeAsString( ty, converter.getKindMap(), ("reduction_" + stringifyReductionOperator(mlirOp)).str()); diff --git a/flang/test/Lower/OpenACC/acc-kernels-loop.f90 b/flang/test/Lower/OpenACC/acc-kernels-loop.f90 --- a/flang/test/Lower/OpenACC/acc-kernels-loop.f90 +++ b/flang/test/Lower/OpenACC/acc-kernels-loop.f90 @@ -720,7 +720,7 @@ end do ! CHECK: acc.kernels { -! CHECK: acc.loop reduction(@reduction_add_f32 -> %{{.*}} : !fir.ref, @reduction_mul_i32 -> %{{.*}} : !fir.ref) { +! CHECK: acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref) { ! CHECK: fir.do_loop ! CHECK: acc.yield ! CHECK-NEXT: }{{$}} diff --git a/flang/test/Lower/OpenACC/acc-loop.f90 b/flang/test/Lower/OpenACC/acc-loop.f90 --- a/flang/test/Lower/OpenACC/acc-loop.f90 +++ b/flang/test/Lower/OpenACC/acc-loop.f90 @@ -279,7 +279,7 @@ reduction_i = 1 end do -! CHECK: acc.loop reduction(@reduction_add_f32 -> %{{.*}} : !fir.ref, @reduction_mul_i32 -> %{{.*}} : !fir.ref) { +! CHECK: acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref) { ! CHECK: fir.do_loop ! CHECK: acc.yield ! CHECK-NEXT: }{{$}} diff --git a/flang/test/Lower/OpenACC/acc-parallel-loop.f90 b/flang/test/Lower/OpenACC/acc-parallel-loop.f90 --- a/flang/test/Lower/OpenACC/acc-parallel-loop.f90 +++ b/flang/test/Lower/OpenACC/acc-parallel-loop.f90 @@ -760,8 +760,8 @@ reduction_i = 1 end do -! CHECK: acc.parallel reduction(@reduction_add_f32 -> %{{.*}} : !fir.ref, @reduction_mul_i32 -> %{{.*}} : !fir.ref) { -! CHECK: acc.loop reduction(@reduction_add_f32 -> %{{.*}} : !fir.ref, @reduction_mul_i32 -> %{{.*}} : !fir.ref) { +! CHECK: acc.parallel reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref) { +! CHECK: acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref) { ! CHECK: fir.do_loop ! CHECK: acc.yield ! CHECK-NEXT: }{{$}} diff --git a/flang/test/Lower/OpenACC/acc-parallel.f90 b/flang/test/Lower/OpenACC/acc-parallel.f90 --- a/flang/test/Lower/OpenACC/acc-parallel.f90 +++ b/flang/test/Lower/OpenACC/acc-parallel.f90 @@ -326,7 +326,7 @@ !$acc parallel reduction(+:reduction_r) reduction(*:reduction_i) !$acc end parallel -! CHECK: acc.parallel reduction(@reduction_add_f32 -> %{{.*}} : !fir.ref, @reduction_mul_i32 -> %{{.*}} : !fir.ref) { +! CHECK: acc.parallel reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref) { ! CHECK: acc.yield ! CHECK-NEXT: }{{$}} diff --git a/flang/test/Lower/OpenACC/acc-reduction.f90 b/flang/test/Lower/OpenACC/acc-reduction.f90 --- a/flang/test/Lower/OpenACC/acc-reduction.f90 +++ b/flang/test/Lower/OpenACC/acc-reduction.f90 @@ -4,8 +4,8 @@ ! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_10xi32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%{{.*}}: !fir.ref>): -! CHECK: %[[CST:.*]] = arith.constant dense<0> : vector<10xi32> -! CHECK: acc.yield %[[CST]] : vector<10xi32> + + ! CHECK: } combiner { ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): ! CHECK: %[[LB:.*]] = arith.constant 0 : index @@ -22,120 +22,170 @@ ! CHECK: acc.yield %[[ARG0]] : !fir.ref> ! CHECK: } -! CHECK-LABEL: acc.reduction.recipe @reduction_mul_z32 : !fir.complex<4> reduction_operator init { -! CHECK: ^bb0(%{{.*}}: !fir.complex<4>): +! CHECK-LABEL: acc.reduction.recipe @reduction_mul_ref_z32 : !fir.ref> reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref>): ! CHECK: %[[REAL:.*]] = arith.constant 1.000000e+00 : f32 ! CHECK: %[[IMAG:.*]] = arith.constant 0.000000e+00 : f32 ! CHECK: %[[UNDEF:.*]] = fir.undefined !fir.complex<4> ! CHECK: %[[UNDEF1:.*]] = fir.insert_value %[[UNDEF]], %[[REAL]], [0 : index] : (!fir.complex<4>, f32) -> !fir.complex<4> ! CHECK: %[[UNDEF2:.*]] = fir.insert_value %[[UNDEF1]], %[[IMAG]], [1 : index] : (!fir.complex<4>, f32) -> !fir.complex<4> -! CHECK: acc.yield %[[UNDEF2]] : !fir.complex<4> +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.complex<4> +! CHECK: fir.store %[[UNDEF2]] to %[[ALLOCA]] : !fir.ref> +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref> ! CHECK: } combiner { -! CHECK: ^bb0(%[[ARG0:.*]]: !fir.complex<4>, %[[ARG1:.*]]: !fir.complex<4>): -! CHECK: %[[COMBINED:.*]] = fir.mulc %[[ARG0]], %[[ARG1]] : !fir.complex<4> -! CHECK: acc.yield %[[COMBINED]] : !fir.complex<4> +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): +! CHECK: %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref> +! CHECK: %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref> +! CHECK: %[[COMBINED:.*]] = fir.mulc %[[LOAD0]], %[[LOAD1]] : !fir.complex<4> +! CHECK: fir.store %[[COMBINED]] to %[[ARG0]] : !fir.ref> +! CHECK: acc.yield %[[ARG0]] : !fir.ref> ! CHECK: } -! CHECK-LABEL: acc.reduction.recipe @reduction_add_z32 : !fir.complex<4> reduction_operator init { -! CHECK: ^bb0(%{{.*}}: !fir.complex<4>): +! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_z32 : !fir.ref> reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref>): ! CHECK: %[[REAL:.*]] = arith.constant 0.000000e+00 : f32 ! CHECK: %[[IMAG:.*]] = arith.constant 0.000000e+00 : f32 ! CHECK: %[[UNDEF:.*]] = fir.undefined !fir.complex<4> ! CHECK: %[[UNDEF1:.*]] = fir.insert_value %[[UNDEF]], %[[REAL]], [0 : index] : (!fir.complex<4>, f32) -> !fir.complex<4> ! CHECK: %[[UNDEF2:.*]] = fir.insert_value %[[UNDEF1]], %[[IMAG]], [1 : index] : (!fir.complex<4>, f32) -> !fir.complex<4> -! CHECK: acc.yield %[[UNDEF2]] : !fir.complex<4> +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.complex<4> +! CHECK: fir.store %[[UNDEF2]] to %[[ALLOCA]] : !fir.ref> +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref> ! CHECK: } combiner { -! CHECK: ^bb0(%[[ARG0:.*]]: !fir.complex<4>, %[[ARG1:.*]]: !fir.complex<4>): -! CHECK: %[[COMBINED:.*]] = fir.addc %[[ARG0]], %[[ARG1]] : !fir.complex<4> -! CHECK: acc.yield %[[COMBINED]] : !fir.complex<4> +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): +! CHECK: %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref> +! CHECK: %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref> +! CHECK: %[[COMBINED:.*]] = fir.addc %[[LOAD0]], %[[LOAD1]] : !fir.complex<4> +! CHECK: fir.store %[[COMBINED]] to %[[ARG0]] : !fir.ref> +! CHECK: acc.yield %[[ARG0]] : !fir.ref> ! CHECK: } -! CHECK-LABEL: acc.reduction.recipe @reduction_neqv_l32 : !fir.logical<4> reduction_operator init { -! CHECK: ^bb0(%{{.*}}: !fir.logical<4>): +! CHECK-LABEL: acc.reduction.recipe @reduction_neqv_ref_l32 : !fir.ref> reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref>): ! CHECK: %[[CST:.*]] = arith.constant false -! CHECK: acc.yield %[[CST]] : i1 +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.logical<4> +! CHECK: %[[CONVERT:.*]] = fir.convert %[[CST]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[CONVERT]] to %[[ALLOCA]] : !fir.ref> +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref> ! CHECK: } combiner { -! CHECK: ^bb0(%[[ARG0:.*]]: !fir.logical<4>, %[[ARG1:.*]]: !fir.logical<4>): -! CHECK: %[[V1:.*]] = fir.convert %[[ARG0]] : (!fir.logical<4>) -> i1 -! CHECK: %[[V2:.*]] = fir.convert %[[ARG1]] : (!fir.logical<4>) -> i1 -! CHECK: %[[NEQV:.*]] = arith.cmpi ne, %[[V1]], %[[V2]] : i1 -! CHECK: %[[CONV:.*]] = fir.convert %[[NEQV]] : (i1) -> !fir.logical<4> -! CHECK: acc.yield %[[CONV]] : !fir.logical<4> +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): +! CHECK: %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref> +! CHECK: %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref> +! CHECK: %[[CONV0:.*]] = fir.convert %[[LOAD0]] : (!fir.logical<4>) -> i1 +! CHECK: %[[CONV1:.*]] = fir.convert %[[LOAD1]] : (!fir.logical<4>) -> i1 +! CHECK: %[[CMP:.*]] = arith.cmpi ne, %[[CONV0]], %[[CONV1]] : i1 +! CHECK: %[[CMP_CONV:.*]] = fir.convert %[[CMP]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[CMP_CONV]] to %[[ARG0]] : !fir.ref> +! CHECK: acc.yield %[[ARG0]] : !fir.ref> ! CHECK: } -! CHECK-LABEL: acc.reduction.recipe @reduction_eqv_l32 : !fir.logical<4> reduction_operator init { -! CHECK: ^bb0(%{{.*}}: !fir.logical<4>): +! CHECK-LABEL: acc.reduction.recipe @reduction_eqv_ref_l32 : !fir.ref> reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref>): ! CHECK: %[[CST:.*]] = arith.constant true -! CHECK: acc.yield %[[CST]] : i1 +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.logical<4> +! CHECK: %[[CONVERT:.*]] = fir.convert %[[CST]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[CONVERT]] to %[[ALLOCA]] : !fir.ref> +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref> ! CHECK: } combiner { -! CHECK: ^bb0(%[[ARG0:.*]]: !fir.logical<4>, %[[ARG1:.*]]: !fir.logical<4>): -! CHECK: %[[V1:.*]] = fir.convert %[[ARG0]] : (!fir.logical<4>) -> i1 -! CHECK: %[[V2:.*]] = fir.convert %[[ARG1]] : (!fir.logical<4>) -> i1 -! CHECK: %[[EQV:.*]] = arith.cmpi eq, %[[V1]], %[[V2]] : i1 -! CHECK: %[[CONV:.*]] = fir.convert %[[EQV]] : (i1) -> !fir.logical<4> -! CHECK: acc.yield %[[CONV]] : !fir.logical<4> +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): +! CHECK: %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref> +! CHECK: %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref> +! CHECK: %[[CONV0:.*]] = fir.convert %[[LOAD0]] : (!fir.logical<4>) -> i1 +! CHECK: %[[CONV1:.*]] = fir.convert %[[LOAD1]] : (!fir.logical<4>) -> i1 +! CHECK: %[[CMP:.*]] = arith.cmpi eq, %[[CONV0]], %[[CONV1]] : i1 +! CHECK: %[[CMP_CONV:.*]] = fir.convert %[[CMP]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[CMP_CONV]] to %[[ARG0]] : !fir.ref> +! CHECK: acc.yield %[[ARG0]] : !fir.ref> ! CHECK: } -! CHECK-LABEL: acc.reduction.recipe @reduction_lor_l32 : !fir.logical<4> reduction_operator init { -! CHECK: ^bb0(%{{.*}}: !fir.logical<4>): +! CHECK-LABEL: acc.reduction.recipe @reduction_lor_ref_l32 : !fir.ref> reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref>): ! CHECK: %[[CST:.*]] = arith.constant false -! CHECK: acc.yield %[[CST]] : i1 +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.logical<4> +! CHECK: %[[CONVERT:.*]] = fir.convert %[[CST]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[CONVERT]] to %[[ALLOCA]] : !fir.ref> +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref> ! CHECK: } combiner { -! CHECK: ^bb0(%[[ARG0:.*]]: !fir.logical<4>, %[[ARG1:.*]]: !fir.logical<4>): -! CHECK: %[[V1:.*]] = fir.convert %[[ARG0]] : (!fir.logical<4>) -> i1 -! CHECK: %[[V2:.*]] = fir.convert %[[ARG1]] : (!fir.logical<4>) -> i1 -! CHECK: %[[AND:.*]] = arith.ori %[[V1]], %[[V2]] : i1 -! CHECK: %[[CONV:.*]] = fir.convert %[[AND]] : (i1) -> !fir.logical<4> -! CHECK: acc.yield %[[CONV]] : !fir.logical<4> +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): +! CHECK: %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref> +! CHECK: %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref> +! CHECK: %[[CONV0:.*]] = fir.convert %[[LOAD0]] : (!fir.logical<4>) -> i1 +! CHECK: %[[CONV1:.*]] = fir.convert %[[LOAD1]] : (!fir.logical<4>) -> i1 +! CHECK: %[[CMP:.*]] = arith.ori %[[CONV0]], %[[CONV1]] : i1 +! CHECK: %[[CMP_CONV:.*]] = fir.convert %[[CMP]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[CMP_CONV]] to %[[ARG0]] : !fir.ref> +! CHECK: acc.yield %[[ARG0]] : !fir.ref> ! CHECK: } -! CHECK-LABEL: acc.reduction.recipe @reduction_land_l32 : !fir.logical<4> reduction_operator init { -! CHECK: ^bb0(%{{.*}}: !fir.logical<4>): +! CHECK-LABEL: acc.reduction.recipe @reduction_land_ref_l32 : !fir.ref> reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref>): ! CHECK: %[[CST:.*]] = arith.constant true -! CHECK: acc.yield %[[CST]] : i1 +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.logical<4> +! CHECK: %[[CONVERT:.*]] = fir.convert %[[CST]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[CONVERT]] to %[[ALLOCA]] : !fir.ref> +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref> ! CHECK: } combiner { -! CHECK: ^bb0(%[[ARG0:.*]]: !fir.logical<4>, %[[ARG1:.*]]: !fir.logical<4>): -! CHECK: %[[V1:.*]] = fir.convert %[[ARG0]] : (!fir.logical<4>) -> i1 -! CHECK: %[[V2:.*]] = fir.convert %[[ARG1]] : (!fir.logical<4>) -> i1 -! CHECK: %[[AND:.*]] = arith.andi %[[V1]], %[[V2]] : i1 -! CHECK: %[[CONV:.*]] = fir.convert %[[AND]] : (i1) -> !fir.logical<4> -! CHECK: acc.yield %[[CONV]] : !fir.logical<4> +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): +! CHECK: %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref> +! CHECK: %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref> +! CHECK: %[[CONV0:.*]] = fir.convert %[[LOAD0]] : (!fir.logical<4>) -> i1 +! CHECK: %[[CONV1:.*]] = fir.convert %[[LOAD1]] : (!fir.logical<4>) -> i1 +! CHECK: %[[CMP:.*]] = arith.andi %[[CONV0]], %[[CONV1]] : i1 +! CHECK: %[[CMP_CONV:.*]] = fir.convert %[[CMP]] : (i1) -> !fir.logical<4> +! CHECK: fir.store %[[CMP_CONV]] to %[[ARG0]] : !fir.ref> +! CHECK: acc.yield %[[ARG0]] : !fir.ref> ! CHECK: } -! CHECK-LABEL: acc.reduction.recipe @reduction_xor_i32 : i32 reduction_operator init { -! CHECK: ^bb0(%{{.*}}: i32): +! CHECK-LABEL: acc.reduction.recipe @reduction_xor_ref_i32 : !fir.ref reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref): ! CHECK: %[[CST:.*]] = arith.constant 0 : i32 -! CHECK: acc.yield %[[CST]] : i32 +! CHECK: %[[ALLOCA:.*]] = fir.alloca i32 +! CHECK: fir.store %[[CST]] to %[[ALLOCA]] : !fir.ref +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref ! CHECK: } combiner { -! CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32): -! CHECK: %[[COMBINED:.*]] = arith.xori %[[ARG0]], %[[ARG1]] : i32 -! CHECK: acc.yield %[[COMBINED]] : i32 +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref, %[[ARG1:.*]]: !fir.ref): +! CHECK: %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref +! CHECK: %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref +! CHECK: %[[COMBINED:.*]] = arith.xori %[[LOAD0]], %[[LOAD1]] : i32 +! CHECK: fir.store %[[COMBINED]] to %[[ARG0]] : !fir.ref +! CHECK: acc.yield %[[ARG0]] : !fir.ref ! CHECK: } -! CHECK-LABEL: acc.reduction.recipe @reduction_ior_i32 : i32 reduction_operator init { -! CHECK: ^bb0(%{{.*}}: i32): +! CHECK-LABEL: acc.reduction.recipe @reduction_ior_ref_i32 : !fir.ref reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref): ! CHECK: %[[CST:.*]] = arith.constant 0 : i32 -! CHECK: acc.yield %[[CST]] : i32 +! CHECK: %[[ALLOCA:.*]] = fir.alloca i32 +! CHECK: fir.store %[[CST]] to %[[ALLOCA]] : !fir.ref +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref ! CHECK: } combiner { -! CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32): -! CHECK: %[[COMBINED:.*]] = arith.ori %[[ARG0]], %[[ARG1]] : i32 -! CHECK: acc.yield %[[COMBINED]] : i32 +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref, %[[ARG1:.*]]: !fir.ref): +! CHECK: %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref +! CHECK: %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref +! CHECK: %[[COMBINED:.*]] = arith.ori %[[LOAD0]], %[[LOAD1]] : i32 +! CHECK: fir.store %[[COMBINED]] to %[[ARG0]] : !fir.ref +! CHECK: acc.yield %[[ARG0]] : !fir.ref ! CHECK: } -! CHECK-LABEL: acc.reduction.recipe @reduction_iand_i32 : i32 reduction_operator init { -! CHECK: ^bb0(%{{.*}}: i32): +! CHECK-LABEL: acc.reduction.recipe @reduction_iand_ref_i32 : !fir.ref reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref): ! CHECK: %[[CST:.*]] = arith.constant -1 : i32 -! CHECK: acc.yield %[[CST]] : i32 +! CHECK: %[[ALLOCA:.*]] = fir.alloca i32 +! CHECK: fir.store %[[CST]] to %[[ALLOCA]] : !fir.ref +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref ! CHECK: } combiner { -! CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32): -! CHECK: %[[COMBINED:.*]] = arith.andi %[[ARG0]], %[[ARG1]] : i32 -! CHECK: acc.yield %[[COMBINED]] : i32 +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref, %[[ARG1:.*]]: !fir.ref): +! CHECK: %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref +! CHECK: %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref +! CHECK: %[[COMBINED:.*]] = arith.andi %[[LOAD0]], %[[LOAD1]] : i32 +! CHECK: fir.store %[[COMBINED]] to %[[ARG0]] : !fir.ref +! CHECK: acc.yield %[[ARG0]] : !fir.ref ! CHECK: } ! CHECK-LABEL: acc.reduction.recipe @reduction_max_ref_100xf32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%{{.*}}: !fir.ref>): -! CHECK: %[[CST:.*]] = arith.constant dense<-1.401300e-45> : vector<100xf32> -! CHECK: acc.yield %[[CST]] : vector<100xf32> +! CHECK: %[[INIT:.*]] = arith.constant -1.401300e-45 : f32 +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100xf32> +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref> ! CHECK: } combiner { ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): ! CHECK: %[[LB0:.*]] = arith.constant 0 : index @@ -153,21 +203,27 @@ ! CHECK: acc.yield %[[ARG0]] : !fir.ref> ! CHECK: } -! CHECK-LABEL: acc.reduction.recipe @reduction_max_f32 : f32 reduction_operator init { -! CHECK: ^bb0(%{{.*}}: f32): +! CHECK-LABEL: acc.reduction.recipe @reduction_max_ref_f32 : !fir.ref reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref): ! CHECK: %[[INIT:.*]] = arith.constant -1.401300e-45 : f32 -! CHECK: acc.yield %[[INIT]] : f32 +! CHECK: %[[ALLOCA:.*]] = fir.alloca f32 +! CHECK: fir.store %[[INIT]] to %[[ALLOCA]] : !fir.ref +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref ! CHECK: } combiner { -! CHECK: ^bb0(%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32): -! CHECK: %[[CMP:.*]] = arith.cmpf ogt, %[[ARG0]], %[[ARG1]] : f32 -! CHECK: %[[SELECT:.*]] = arith.select %[[CMP]], %[[ARG0]], %[[ARG1]] : f32 -! CHECK: acc.yield %[[SELECT]] : f32 +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref, %[[ARG1:.*]]: !fir.ref): +! CHECK: %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref +! CHECK: %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref +! CHECK: %[[CMP:.*]] = arith.cmpf ogt, %[[LOAD0]], %[[LOAD1]] : f32 +! CHECK: %[[SELECT:.*]] = arith.select %[[CMP]], %[[LOAD0]], %[[LOAD1]] : f32 +! CHECK: fir.store %[[SELECT]] to %[[ARG0]] : !fir.ref +! CHECK: acc.yield %[[ARG0]] : !fir.ref ! CHECK: } ! CHECK-LABEL: acc.reduction.recipe @reduction_max_ref_100x10xi32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%arg0: !fir.ref>): -! CHECK: %[[CST:.*]] = arith.constant dense<-2147483648> : vector<100x10xi32> -! CHECK: acc.yield %[[CST]] : vector<100x10xi32> +! CHECK: %[[INIT:.*]] = arith.constant -2147483648 : i32 +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100x10xi32> +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref> ! CHECK: } combiner { ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): ! CHECK: %[[LB0:.*]] = arith.constant 0 : index @@ -190,21 +246,27 @@ ! CHECK: acc.yield %[[ARG0]] : !fir.ref> ! CHECK: } -! CHECK-LABEL: acc.reduction.recipe @reduction_max_i32 : i32 reduction_operator init { -! CHECK: ^bb0(%arg0: i32): +! CHECK-LABEL: acc.reduction.recipe @reduction_max_ref_i32 : !fir.ref reduction_operator init { +! CHECK: ^bb0(%arg0: !fir.ref): ! CHECK: %[[INIT:.*]] = arith.constant -2147483648 : i32 -! CHECK: acc.yield %[[INIT]] : i32 +! CHECK: %[[ALLOCA:.*]] = fir.alloca i32 +! CHECK: fir.store %[[INIT]] to %[[ALLOCA]] : !fir.ref +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref ! CHECK: } combiner { -! CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32): -! CHECK: %[[CMP:.*]] = arith.cmpi sgt, %[[ARG0]], %[[ARG1]] : i32 -! CHECK: %[[SELECT:.*]] = arith.select %[[CMP]], %[[ARG0]], %[[ARG1]] : i32 -! CHECK: acc.yield %[[SELECT]] : i32 +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref, %[[ARG1:.*]]: !fir.ref): +! CHECK: %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref +! CHECK: %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref +! CHECK: %[[CMP:.*]] = arith.cmpi sgt, %[[LOAD0]], %[[LOAD1]] : i32 +! CHECK: %[[SELECT:.*]] = arith.select %[[CMP]], %[[LOAD0]], %[[LOAD1]] : i32 +! CHECK: fir.store %[[SELECT]] to %[[ARG0]] : !fir.ref +! CHECK: acc.yield %[[ARG0]] : !fir.ref ! CHECK: } ! CHECK-LABEL: acc.reduction.recipe @reduction_min_ref_100x10xf32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%{{.*}}: !fir.ref>): -! CHECK: %[[CST:.*]] = arith.constant dense<3.40282347E+38> : vector<100x10xf32> -! CHECK: acc.yield %[[CST]] : vector<100x10xf32> +! CHECK: %[[INIT:.*]] = arith.constant 3.40282347E+38 : f32 +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100x10xf32> +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref> ! CHECK: } combiner { ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): ! CHECK: %[[LB0:.*]] = arith.constant 0 : index @@ -227,21 +289,27 @@ ! CHECK: acc.yield %[[ARG0]] : !fir.ref> ! CHECK: } -! CHECK-LABEL: acc.reduction.recipe @reduction_min_f32 : f32 reduction_operator init { -! CHECK: ^bb0(%{{.*}}: f32): +! CHECK-LABEL: acc.reduction.recipe @reduction_min_ref_f32 : !fir.ref reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref): ! CHECK: %[[INIT:.*]] = arith.constant 3.40282347E+38 : f32 -! CHECK: acc.yield %[[INIT]] : f32 +! CHECK: %[[ALLOCA:.*]] = fir.alloca f32 +! CHECK: fir.store %[[INIT]] to %[[ALLOCA]] : !fir.ref +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref ! CHECK: } combiner { -! CHECK: ^bb0(%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32): -! CHECK: %[[CMP:.*]] = arith.cmpf olt, %[[ARG0]], %[[ARG1]] : f32 -! CHECK: %[[SELECT:.*]] = arith.select %[[CMP]], %[[ARG0]], %[[ARG1]] : f32 -! CHECK: acc.yield %[[SELECT]] : f32 +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref, %[[ARG1:.*]]: !fir.ref): +! CHECK: %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref +! CHECK: %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref +! CHECK: %[[CMP:.*]] = arith.cmpf olt, %[[LOAD0]], %[[LOAD1]] : f32 +! CHECK: %[[SELECT:.*]] = arith.select %[[CMP]], %[[LOAD0]], %[[LOAD1]] : f32 +! CHECK: fir.store %[[SELECT]] to %[[ARG0]] : !fir.ref +! CHECK: acc.yield %[[ARG0]] : !fir.ref ! CHECK: } ! CHECK-LABEL: acc.reduction.recipe @reduction_min_ref_100xi32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%{{.*}}: !fir.ref>): -! CHECK: %[[CST:.*]] = arith.constant dense<2147483647> : vector<100xi32> -! CHECK: acc.yield %[[CST]] : vector<100xi32> +! CHECK: %[[INIT:.*]] = arith.constant 2147483647 : i32 +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100xi32> +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref> ! CHECK: } combiner { ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): ! CHECK: %[[LB0:.*]] = arith.constant 0 : index @@ -259,31 +327,42 @@ ! CHECK: acc.yield %[[ARG0]] : !fir.ref> ! CHECK: } -! CHECK-LABEL: acc.reduction.recipe @reduction_min_i32 : i32 reduction_operator init { -! CHECK: ^bb0(%arg0: i32): +! CHECK-LABEL: acc.reduction.recipe @reduction_min_ref_i32 : !fir.ref reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref): ! CHECK: %[[INIT:.*]] = arith.constant 2147483647 : i32 -! CHECK: acc.yield %[[INIT]] : i32 +! CHECK: %[[ALLOCA:.*]] = fir.alloca i32 +! CHECK: fir.store %[[INIT]] to %[[ALLOCA]] : !fir.ref +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref ! CHECK: } combiner { -! CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32): -! CHECK: %[[CMP:.*]] = arith.cmpi slt, %[[ARG0]], %[[ARG1]] : i32 -! CHECK: %[[SELECT:.*]] = arith.select %[[CMP]], %[[ARG0]], %[[ARG1]] : i32 -! CHECK: acc.yield %[[SELECT]] : i32 +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref, %[[ARG1:.*]]: !fir.ref): +! CHECK: %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref +! CHECK: %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref +! CHECK: %[[CMP:.*]] = arith.cmpi slt, %[[LOAD0]], %[[LOAD1]] : i32 +! CHECK: %[[SELECT:.*]] = arith.select %[[CMP]], %[[LOAD0]], %[[LOAD1]] : i32 +! CHECK: fir.store %[[SELECT]] to %[[ARG0]] : !fir.ref +! CHECK: acc.yield %[[ARG0]] : !fir.ref ! CHECK: } -! CHECK-LABEL: acc.reduction.recipe @reduction_mul_f32 : f32 reduction_operator init { -! CHECK: ^bb0(%{{.*}}: f32): +! CHECK-LABEL: acc.reduction.recipe @reduction_mul_ref_f32 : !fir.ref reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref): ! CHECK: %[[INIT:.*]] = arith.constant 1.000000e+00 : f32 -! CHECK: acc.yield %[[INIT]] : f32 +! CHECK: %[[ALLOCA:.*]] = fir.alloca f32 +! CHECK: fir.store %[[INIT]] to %[[ALLOCA]] : !fir.ref +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref ! CHECK: } combiner { -! CHECK: ^bb0(%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32): -! CHECK: %[[COMBINED:.*]] = arith.mulf %[[ARG0]], %[[ARG1]] {{.*}} : f32 -! CHECK: acc.yield %[[COMBINED]] : f32 +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref, %[[ARG1:.*]]: !fir.ref): +! CHECK: %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref +! CHECK: %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref +! CHECK: %[[COMBINED:.*]] = arith.mulf %[[LOAD0]], %[[LOAD1]] fastmath : f32 +! CHECK: fir.store %[[COMBINED]] to %[[ARG0]] : !fir.ref +! CHECK: acc.yield %[[ARG0]] : !fir.ref ! CHECK: } ! CHECK-LABEL: acc.reduction.recipe @reduction_mul_ref_100xi32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%{{.*}}: !fir.ref>): -! CHECK: %[[CST:.*]] = arith.constant dense<1> : vector<100xi32> -! CHECK: acc.yield %[[CST]] : vector<100xi32> +! CHECK: %[[INIT:.*]] = arith.constant 1 : i32 +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100xi32> +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref> ! CHECK: } combiner { ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): ! CHECK: %[[LB:.*]] = arith.constant 0 : index @@ -300,20 +379,26 @@ ! CHECK: acc.yield %[[ARG0]] : !fir.ref> ! CHECK: } -! CHECK-LABEL: acc.reduction.recipe @reduction_mul_i32 : i32 reduction_operator init { -! CHECK: ^bb0(%{{.*}}: i32): +! CHECK-LABEL: acc.reduction.recipe @reduction_mul_ref_i32 : !fir.ref reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref): ! CHECK: %[[INIT:.*]] = arith.constant 1 : i32 -! CHECK: acc.yield %[[INIT]] : i32 +! CHECK: %[[ALLOCA:.*]] = fir.alloca i32 +! CHECK: fir.store %[[INIT]] to %[[ALLOCA]] : !fir.ref +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref ! CHECK: } combiner { -! CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32): -! CHECK: %[[COMBINED:.*]] = arith.muli %[[ARG0]], %[[ARG1]] : i32 -! CHECK: acc.yield %[[COMBINED]] : i32 +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref, %[[ARG1:.*]]: !fir.ref): +! CHECK: %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref +! CHECK: %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref +! CHECK: %[[COMBINED:.*]] = arith.muli %[[LOAD0]], %[[LOAD1]] : i32 +! CHECK: fir.store %[[COMBINED]] to %[[ARG0]] : !fir.ref +! CHECK: acc.yield %[[ARG0]] : !fir.ref ! CHECK: } ! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_100xf32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%{{.*}}: !fir.ref>): -! CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<100xf32> -! CHECK: acc.yield %[[CST]] : vector<100xf32> +! CHECK: %[[INIT:.*]] = arith.constant 0.000000e+00 : f32 +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100xf32> +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref> ! CHECK: } combiner { ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): ! CHECK: %[[LB:.*]] = arith.constant 0 : index @@ -330,20 +415,26 @@ ! CHECK: acc.yield %[[ARG0]] : !fir.ref> ! CHECK: } -! CHECK-LABEL: acc.reduction.recipe @reduction_add_f32 : f32 reduction_operator init { -! CHECK: ^bb0(%{{.*}}: f32): +! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_f32 : !fir.ref reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref): ! CHECK: %[[INIT:.*]] = arith.constant 0.000000e+00 : f32 -! CHECK: acc.yield %[[INIT]] : f32 +! CHECK: %[[ALLOCA:.*]] = fir.alloca f32 +! CHECK: fir.store %[[INIT]] to %[[ALLOCA]] : !fir.ref +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref ! CHECK: } combiner { -! CHECK: ^bb0(%[[ARG0:.*]]: f32, %[[ARG1:.*]]: f32): -! CHECK: %[[COMBINED:.*]] = arith.addf %[[ARG0]], %[[ARG1]] {{.*}} : f32 -! CHECK: acc.yield %[[COMBINED]] : f32 +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref, %[[ARG1:.*]]: !fir.ref): +! CHECK: %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref +! CHECK: %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref +! CHECK: %[[COMBINED:.*]] = arith.addf %[[LOAD0]], %[[LOAD1]] fastmath : f32 +! CHECK: fir.store %[[COMBINED]] to %[[ARG0]] : !fir.ref +! CHECK: acc.yield %[[ARG0]] : !fir.ref ! CHECK: } ! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_100x10x2xi32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%{{.*}}: !fir.ref>): -! CHECK: %[[CST:.*]] = arith.constant dense<0> : vector<100x10x2xi32> -! CHECK: acc.yield %[[CST]] : vector<100x10x2xi32> +! CHECK: %[[INIT:.*]] = arith.constant 0 : i32 +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100x10x2xi32> +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref> ! CHECK: } combiner { ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): ! CHECK: %[[LB0:.*]] = arith.constant 0 : index @@ -372,8 +463,9 @@ ! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_100x10xi32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%{{.*}}: !fir.ref>): -! CHECK: %[[CST:.*]] = arith.constant dense<0> : vector<100x10xi32> -! CHECK: acc.yield %[[CST]] : vector<100x10xi32> +! CHECK: %[[INIT:.*]] = arith.constant 0 : i32 +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100x10xi32> +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref> ! CHECK: } combiner { ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): ! CHECK: %[[LB0:.*]] = arith.constant 0 : index @@ -397,8 +489,9 @@ ! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_100xi32 : !fir.ref> reduction_operator init { ! CHECK: ^bb0(%{{.*}}: !fir.ref>): -! CHECK: %[[CST:.*]] = arith.constant dense<0> : vector<100xi32> -! CHECK: acc.yield %[[CST]] : vector<100xi32> +! CHECK: %[[INIT:.*]] = arith.constant 0 : i32 +! CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.array<100xi32> +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref> ! CHECK: } combiner { ! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref>, %[[ARG1:.*]]: !fir.ref>): ! CHECK: %[[LB:.*]] = arith.constant 0 : index @@ -415,14 +508,19 @@ ! CHECK: acc.yield %[[ARG0]] : !fir.ref> ! CHECK: } -! CHECK-LABEL: acc.reduction.recipe @reduction_add_i32 : i32 reduction_operator init { -! CHECK: ^bb0(%{{.*}}: i32): +! CHECK-LABEL: acc.reduction.recipe @reduction_add_ref_i32 : !fir.ref reduction_operator init { +! CHECK: ^bb0(%{{.*}}: !fir.ref): ! CHECK: %[[INIT:.*]] = arith.constant 0 : i32 -! CHECK: acc.yield %[[INIT]] : i32 +! CHECK: %[[ALLOCA:.*]] = fir.alloca i32 +! CHECK: fir.store %[[INIT]] to %[[ALLOCA]] : !fir.ref +! CHECK: acc.yield %[[ALLOCA]] : !fir.ref ! CHECK: } combiner { -! CHECK: ^bb0(%[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32): -! CHECK: %[[COMBINED:.*]] = arith.addi %[[ARG0]], %[[ARG1]] : i32 -! CHECK: acc.yield %[[COMBINED]] : i32 +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.ref, %[[ARG1:.*]]: !fir.ref): +! CHECK: %[[LOAD0:.*]] = fir.load %[[ARG0]] : !fir.ref +! CHECK: %[[LOAD1:.*]] = fir.load %[[ARG1]] : !fir.ref +! CHECK: %[[COMBINED:.*]] = arith.addi %[[LOAD0]], %[[LOAD1]] : i32 +! CHECK: fir.store %[[COMBINED]] to %[[ARG0]] : !fir.ref +! CHECK: acc.yield %[[ARG0]] : !fir.ref ! CHECK: } subroutine acc_reduction_add_int(a, b) @@ -438,7 +536,7 @@ ! CHECK-LABEL: func.func @_QPacc_reduction_add_int( ! CHECK-SAME: %{{.*}}: !fir.ref> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref {fir.bindc_name = "b"}) ! CHECK: %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref) -> !fir.ref {name = "b"} -! CHECK: acc.loop reduction(@reduction_add_i32 -> %[[RED_B]] : !fir.ref) +! CHECK: acc.loop reduction(@reduction_add_ref_i32 -> %[[RED_B]] : !fir.ref) subroutine acc_reduction_add_int_array_1d(a, b) integer :: a(100) @@ -506,7 +604,7 @@ ! CHECK-LABEL: func.func @_QPacc_reduction_add_float( ! CHECK-SAME: %{{.*}}: !fir.ref> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref {fir.bindc_name = "b"}) ! CHECK: %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref) -> !fir.ref {name = "b"} -! CHECK: acc.loop reduction(@reduction_add_f32 -> %[[RED_B]] : !fir.ref) +! CHECK: acc.loop reduction(@reduction_add_ref_f32 -> %[[RED_B]] : !fir.ref) subroutine acc_reduction_add_float_array_1d(a, b) real :: a(100), b(100) @@ -536,7 +634,7 @@ ! CHECK-LABEL: func.func @_QPacc_reduction_mul_int( ! CHECK-SAME: %{{.*}}: !fir.ref> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref {fir.bindc_name = "b"}) ! CHECK: %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref) -> !fir.ref {name = "b"} -! CHECK: acc.loop reduction(@reduction_mul_i32 -> %[[RED_B]] : !fir.ref) +! CHECK: acc.loop reduction(@reduction_mul_ref_i32 -> %[[RED_B]] : !fir.ref) subroutine acc_reduction_mul_int_array_1d(a, b) integer :: a(100) @@ -566,7 +664,7 @@ ! CHECK-LABEL: func.func @_QPacc_reduction_mul_float( ! CHECK-SAME: %{{.*}}: !fir.ref> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref {fir.bindc_name = "b"}) ! CHECK: %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref) -> !fir.ref {name = "b"} -! CHECK: acc.loop reduction(@reduction_mul_f32 -> %[[RED_B]] : !fir.ref) +! CHECK: acc.loop reduction(@reduction_mul_ref_f32 -> %[[RED_B]] : !fir.ref) subroutine acc_reduction_mul_float_array_1d(a, b) real :: a(100), b(100) @@ -596,7 +694,7 @@ ! CHECK-LABEL: func.func @_QPacc_reduction_min_int( ! CHECK-SAME: %{{.*}}: !fir.ref> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref {fir.bindc_name = "b"}) ! CHECK: %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref) -> !fir.ref {name = "b"} -! CHECK: acc.loop reduction(@reduction_min_i32 -> %[[RED_B]] : !fir.ref) +! CHECK: acc.loop reduction(@reduction_min_ref_i32 -> %[[RED_B]] : !fir.ref) subroutine acc_reduction_min_int_array_1d(a, b) integer :: a(100), b(100) @@ -626,7 +724,7 @@ ! CHECK-LABEL: func.func @_QPacc_reduction_min_float( ! CHECK-SAME: %{{.*}}: !fir.ref> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref {fir.bindc_name = "b"}) ! CHECK: %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref) -> !fir.ref {name = "b"} -! CHECK: acc.loop reduction(@reduction_min_f32 -> %[[RED_B]] : !fir.ref) +! CHECK: acc.loop reduction(@reduction_min_ref_f32 -> %[[RED_B]] : !fir.ref) subroutine acc_reduction_min_float_array2d(a, b) real :: a(100, 10), b(100, 10) @@ -659,7 +757,7 @@ ! CHECK-LABEL: func.func @_QPacc_reduction_max_int( ! CHECK-SAME: %{{.*}}: !fir.ref> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref {fir.bindc_name = "b"}) ! CHECL: %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref) -> !fir.ref {name = "b"} -! CHECK: acc.loop reduction(@reduction_max_i32 -> %[[RED_B]] : !fir.ref) +! CHECK: acc.loop reduction(@reduction_max_ref_i32 -> %[[RED_B]] : !fir.ref) subroutine acc_reduction_max_int_array2d(a, b) integer :: a(100, 10), b(100, 10) @@ -691,7 +789,7 @@ ! CHECK-LABEL: func.func @_QPacc_reduction_max_float( ! CHECK-SAME: %{{.*}}: !fir.ref> {fir.bindc_name = "a"}, %[[B:.*]]: !fir.ref {fir.bindc_name = "b"}) ! CHECK: %[[RED_B:.*]] = acc.reduction varPtr(%[[B]] : !fir.ref) -> !fir.ref {name = "b"} -! CHECK: acc.loop reduction(@reduction_max_f32 -> %[[RED_B]] : !fir.ref) +! CHECK: acc.loop reduction(@reduction_max_ref_f32 -> %[[RED_B]] : !fir.ref) subroutine acc_reduction_max_float_array1d(a, b) real :: a(100), b(100) @@ -716,7 +814,7 @@ ! CHECK-LABEL: func.func @_QPacc_reduction_iand() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref) -> !fir.ref {name = "i"} -! CHECK: acc.parallel reduction(@reduction_iand_i32 -> %[[RED]] : !fir.ref) +! CHECK: acc.parallel reduction(@reduction_iand_ref_i32 -> %[[RED]] : !fir.ref) subroutine acc_reduction_ior() integer :: i @@ -726,7 +824,7 @@ ! CHECK-LABEL: func.func @_QPacc_reduction_ior() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref) -> !fir.ref {name = "i"} -! CHECK: acc.parallel reduction(@reduction_ior_i32 -> %[[RED]] : !fir.ref) +! CHECK: acc.parallel reduction(@reduction_ior_ref_i32 -> %[[RED]] : !fir.ref) subroutine acc_reduction_ieor() integer :: i @@ -736,7 +834,7 @@ ! CHECK-LABEL: func.func @_QPacc_reduction_ieor() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref) -> !fir.ref {name = "i"} -! CHECK: acc.parallel reduction(@reduction_xor_i32 -> %[[RED]] : !fir.ref) +! CHECK: acc.parallel reduction(@reduction_xor_ref_i32 -> %[[RED]] : !fir.ref) subroutine acc_reduction_and() logical :: l @@ -746,7 +844,7 @@ ! CHECK-LABEL: func.func @_QPacc_reduction_and() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%0 : !fir.ref>) -> !fir.ref> {name = "l"} -! CHECK: acc.parallel reduction(@reduction_land_l32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.parallel reduction(@reduction_land_ref_l32 -> %[[RED]] : !fir.ref>) subroutine acc_reduction_or() logical :: l @@ -756,7 +854,7 @@ ! CHECK-LABEL: func.func @_QPacc_reduction_or() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "l"} -! CHECK: acc.parallel reduction(@reduction_lor_l32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.parallel reduction(@reduction_lor_ref_l32 -> %[[RED]] : !fir.ref>) subroutine acc_reduction_eqv() logical :: l @@ -766,7 +864,7 @@ ! CHECK-LABEL: func.func @_QPacc_reduction_eqv() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "l"} -! CHECK: acc.parallel reduction(@reduction_eqv_l32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.parallel reduction(@reduction_eqv_ref_l32 -> %[[RED]] : !fir.ref>) subroutine acc_reduction_neqv() logical :: l @@ -776,7 +874,7 @@ ! CHECK-LABEL: func.func @_QPacc_reduction_neqv() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "l"} -! CHECK: acc.parallel reduction(@reduction_neqv_l32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.parallel reduction(@reduction_neqv_ref_l32 -> %[[RED]] : !fir.ref>) subroutine acc_reduction_add_cmplx() complex :: c @@ -786,7 +884,7 @@ ! CHECK-LABEL: func.func @_QPacc_reduction_add_cmplx() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "c"} -! CHECK: acc.parallel reduction(@reduction_add_z32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.parallel reduction(@reduction_add_ref_z32 -> %[[RED]] : !fir.ref>) subroutine acc_reduction_mul_cmplx() complex :: c @@ -796,7 +894,7 @@ ! CHECK-LABEL: func.func @_QPacc_reduction_mul_cmplx() ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "c"} -! CHECK: acc.parallel reduction(@reduction_mul_z32 -> %[[RED]] : !fir.ref>) +! CHECK: acc.parallel reduction(@reduction_mul_ref_z32 -> %[[RED]] : !fir.ref>) subroutine acc_reduction_add_alloc() integer, allocatable :: i @@ -810,7 +908,7 @@ ! CHECK: %[[LOAD:.*]] = fir.load %[[ALLOCA]] : !fir.ref>> ! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box>) -> !fir.heap ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.heap) -> !fir.heap {name = "i"} -! CHECK: acc.parallel reduction(@reduction_add_i32 -> %[[RED]] : !fir.heap) +! CHECK: acc.parallel reduction(@reduction_add_heap_i32 -> %[[RED]] : !fir.heap) subroutine acc_reduction_add_pointer(i) integer, pointer :: i @@ -823,7 +921,7 @@ ! CHECK: %[[LOAD:.*]] = fir.load %[[ARG0]] : !fir.ref>> ! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box>) -> !fir.ptr ! CHECK: %[[RED:.*]] = acc.reduction varPtr(%[[BOX_ADDR]] : !fir.ptr) -> !fir.ptr {name = "i"} -! CHECK: acc.parallel reduction(@reduction_add_i32 -> %[[RED]] : !fir.ptr) +! CHECK: acc.parallel reduction(@reduction_add_ptr_i32 -> %[[RED]] : !fir.ptr) subroutine acc_reduction_add_static_slice(a) integer :: a(100) diff --git a/flang/test/Lower/OpenACC/acc-serial-loop.f90 b/flang/test/Lower/OpenACC/acc-serial-loop.f90 --- a/flang/test/Lower/OpenACC/acc-serial-loop.f90 +++ b/flang/test/Lower/OpenACC/acc-serial-loop.f90 @@ -667,8 +667,8 @@ reduction_i = 1 end do -! CHECK: acc.serial reduction(@reduction_add_f32 -> %{{.*}} : !fir.ref, @reduction_mul_i32 -> %{{.*}} : !fir.ref) { -! CHECK: acc.loop reduction(@reduction_add_f32 -> %{{.*}} : !fir.ref, @reduction_mul_i32 -> %{{.*}} : !fir.ref) { +! CHECK: acc.serial reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref) { +! CHECK: acc.loop reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref) { ! CHECK: fir.do_loop ! CHECK: acc.yield ! CHECK-NEXT: }{{$}} diff --git a/flang/test/Lower/OpenACC/acc-serial.f90 b/flang/test/Lower/OpenACC/acc-serial.f90 --- a/flang/test/Lower/OpenACC/acc-serial.f90 +++ b/flang/test/Lower/OpenACC/acc-serial.f90 @@ -262,7 +262,7 @@ !$acc serial reduction(+:reduction_r) reduction(*:reduction_i) !$acc end serial -! CHECK: acc.serial reduction(@reduction_add_f32 -> %{{.*}} : !fir.ref, @reduction_mul_i32 -> %{{.*}} : !fir.ref) { +! CHECK: acc.serial reduction(@reduction_add_ref_f32 -> %{{.*}} : !fir.ref, @reduction_mul_ref_i32 -> %{{.*}} : !fir.ref) { ! CHECK: acc.yield ! CHECK-NEXT: }{{$}}