diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -3515,6 +3515,57 @@ } } +/// Copies final inscan reductions values to the original variables. +/// The code is the following: +/// \code +/// = buffer[num_iters-1]; +/// \endcode +static void emitScanBasedDirectiveFinals( + CodeGenFunction &CGF, const OMPLoopDirective &S, + llvm::function_ref NumIteratorsGen) { + llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( + NumIteratorsGen(CGF), CGF.SizeTy, /*isSigned=*/false); + SmallVector Shareds; + SmallVector LHSs; + SmallVector RHSs; + SmallVector Privates; + SmallVector CopyOps; + SmallVector CopyArrayElems; + for (const auto *C : S.getClausesOfKind()) { + assert(C->getModifier() == OMPC_REDUCTION_inscan && + "Only inscan reductions are expected."); + Shareds.append(C->varlist_begin(), C->varlist_end()); + LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); + RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); + Privates.append(C->privates().begin(), C->privates().end()); + CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); + CopyArrayElems.append(C->copy_array_elems().begin(), + C->copy_array_elems().end()); + } + // Create temp var and copy LHS value to this temp value. + // LHS = TMP[LastIter]; + llvm::Value *OMPLast = CGF.Builder.CreateNSWSub( + OMPScanNumIterations, + llvm::ConstantInt::get(CGF.SizeTy, 1, /*isSigned=*/false)); + for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { + const Expr *PrivateExpr = Privates[I]; + const Expr *OrigExpr = Shareds[I]; + const Expr *CopyArrayElem = CopyArrayElems[I]; + CodeGenFunction::OpaqueValueMapping IdxMapping( + CGF, + cast( + cast(CopyArrayElem)->getIdx()), + RValue::get(OMPLast)); + LValue DestLVal = CGF.EmitLValue(OrigExpr); + LValue SrcLVal = CGF.EmitLValue(CopyArrayElem); + CGF.EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(CGF), + SrcLVal.getAddress(CGF), + cast(cast(LHSs[I])->getDecl()), + cast(cast(RHSs[I])->getDecl()), + CopyOps[I]); + } +} + /// Emits the code for the directive with inscan reductions. /// The code is the following: /// \code @@ -3709,6 +3760,8 @@ if (!isOpenMPParallelDirective(S.getDirectiveKind())) emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen); emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); + if (!isOpenMPParallelDirective(S.getDirectiveKind())) + emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen); } else { CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), HasCancel); @@ -4282,23 +4335,25 @@ (void)emitWorksharingDirective(CGF, S, S.hasCancel()); }; { - if (llvm::any_of(S.getClausesOfKind(), + const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { + CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); + CGCapturedStmtInfo CGSI(CR_OpenMP); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); + OMPLoopScope LoopScope(CGF, S); + return CGF.EmitScalarExpr(S.getNumIterations()); + }; + bool IsInscan = llvm::any_of(S.getClausesOfKind(), [](const OMPReductionClause *C) { return C->getModifier() == OMPC_REDUCTION_inscan; - })) { - const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { - CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); - CGCapturedStmtInfo CGSI(CR_OpenMP); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); - OMPLoopScope LoopScope(CGF, S); - return CGF.EmitScalarExpr(S.getNumIterations()); - }; + }); + if (IsInscan) emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); - } auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, emitEmptyBoundParameters); + if (IsInscan) + emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen); } // Check for outer lastprivate conditional update. checkForLastprivateConditionalUpdate(*this, S); @@ -4313,23 +4368,25 @@ (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); }; { - if (llvm::any_of(S.getClausesOfKind(), + const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { + CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); + CGCapturedStmtInfo CGSI(CR_OpenMP); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); + OMPLoopScope LoopScope(CGF, S); + return CGF.EmitScalarExpr(S.getNumIterations()); + }; + bool IsInscan = llvm::any_of(S.getClausesOfKind(), [](const OMPReductionClause *C) { return C->getModifier() == OMPC_REDUCTION_inscan; - })) { - const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { - CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); - CGCapturedStmtInfo CGSI(CR_OpenMP); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); - OMPLoopScope LoopScope(CGF, S); - return CGF.EmitScalarExpr(S.getNumIterations()); - }; + }); + if (IsInscan) emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); - } auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen, emitEmptyBoundParameters); + if (IsInscan) + emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen); } // Check for outer lastprivate conditional update. checkForLastprivateConditionalUpdate(*this, S); diff --git a/clang/test/OpenMP/parallel_for_scan_codegen.cpp b/clang/test/OpenMP/parallel_for_scan_codegen.cpp --- a/clang/test/OpenMP/parallel_for_scan_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_scan_codegen.cpp @@ -27,6 +27,13 @@ // CHECK: [[B_BUF:%.+]] = alloca double, i64 10, // CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call( + // CHECK: [[LAST:%.+]] = mul nsw i64 9, % + // CHECK: [[LAST_REF:%.+]] = getelementptr inbounds float, float* [[A_BUF]], i64 [[LAST]] + // CHECK: [[BC:%.+]] = bitcast float* [[LAST_REF]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 bitcast ([10 x float]* @_ZZ3baziE1a to i8*), i8* align 4 [[BC]], i64 %{{.+}}, i1 false) + // CHECK: [[LAST_REF_B:%.+]] = getelementptr inbounds double, double* [[B_BUF]], i64 9 + // CHECK: [[LAST_VAL:%.+]] = load double, double* [[LAST_REF_B]], + // CHECK: store double [[LAST_VAL]], double* @_ZZ3baziE1b, // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]