diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2075,6 +2075,15 @@ if (const auto *C = D.getSingleClause()) if (C->getKind() == OMPC_ORDER_concurrent) LoopStack.setParallel(/*Enable=*/true); + if ((D.getDirectiveKind() == OMPD_simd || + (getLangOpts().OpenMPSimd && + isOpenMPSimdDirective(D.getDirectiveKind()))) && + llvm::any_of(D.getClausesOfKind(), + [](const OMPReductionClause *C) { + return C->getModifier() == OMPC_REDUCTION_inscan; + })) + // Disable parallel access in case of prefix sum. + LoopStack.setParallel(/*Enable=*/false); } void CodeGenFunction::EmitOMPSimdFinal( @@ -2270,6 +2279,8 @@ } void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { + ParentLoopDirectiveForScanRegion ScanRegion(*this, S); + OMPFirstScanLoop = true; auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { emitOMPSimdRegion(CGF, S, Action); }; @@ -4191,14 +4202,15 @@ } void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { - // Do not emit code for non-simd directives in simd-only mode. - if (getLangOpts().OpenMPSimd && !OMPParentLoopDirectiveForScan) + if (!OMPParentLoopDirectiveForScan) return; const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; + bool IsInclusive = S.hasClausesOfKind(); SmallVector Shareds; SmallVector Privates; SmallVector LHSs; SmallVector RHSs; + SmallVector ReductionOps; SmallVector CopyOps; SmallVector CopyArrayTemps; SmallVector CopyArrayElems; @@ -4209,13 +4221,109 @@ Privates.append(C->privates().begin(), C->privates().end()); LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); + ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); CopyArrayTemps.append(C->copy_array_temps().begin(), C->copy_array_temps().end()); CopyArrayElems.append(C->copy_array_elems().begin(), C->copy_array_elems().end()); } - bool IsInclusive = S.hasClausesOfKind(); + if (ParentDir.getDirectiveKind() == OMPD_simd || + (getLangOpts().OpenMPSimd && + isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) { + // For simd directive and simd-based directives in simd only mode, use the + // following codegen: + // int x = 0; + // #pragma omp simd reduction(inscan, +: x) + // for (..) { + // + // #pragma omp scan inclusive(x) + // + // } + // is transformed to: + // int x = 0; + // for (..) { + // int x_priv = 0; + // + // x = x_priv + x; + // x_priv = x; + // + // } + // and + // int x = 0; + // #pragma omp simd reduction(inscan, +: x) + // for (..) { + // + // #pragma omp scan exclusive(x) + // + // } + // to + // int x = 0; + // for (..) { + // int x_priv = 0; + // + // int temp = x; + // x = x_priv + x; + // x_priv = temp; + // + // } + llvm::BasicBlock *OMPScanReduce = createBasicBlock("omp.inscan.reduce"); + EmitBranch(IsInclusive + ? OMPScanReduce + : BreakContinueStack.back().ContinueBlock.getBlock()); + EmitBlock(OMPScanDispatch); + { + // New scope for correct construction/destruction of temp variables for + // exclusive scan. + LexicalScope Scope(*this, S.getSourceRange()); + EmitBranch(IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock); + EmitBlock(OMPScanReduce); + if (!IsInclusive) { + // Create temp var and copy LHS value to this temp value. + // TMP = LHS; + for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { + const Expr *PrivateExpr = Privates[I]; + const Expr *TempExpr = CopyArrayTemps[I]; + EmitAutoVarDecl( + *cast(cast(TempExpr)->getDecl())); + LValue DestLVal = EmitLValue(TempExpr); + LValue SrcLVal = EmitLValue(LHSs[I]); + EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), + SrcLVal.getAddress(*this), + cast(cast(LHSs[I])->getDecl()), + cast(cast(RHSs[I])->getDecl()), + CopyOps[I]); + } + } + CGM.getOpenMPRuntime().emitReduction( + *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, + {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd}); + for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { + const Expr *PrivateExpr = Privates[I]; + LValue DestLVal; + LValue SrcLVal; + if (IsInclusive) { + DestLVal = EmitLValue(RHSs[I]); + SrcLVal = EmitLValue(LHSs[I]); + } else { + const Expr *TempExpr = CopyArrayTemps[I]; + DestLVal = EmitLValue(RHSs[I]); + SrcLVal = EmitLValue(TempExpr); + } + EmitOMPCopy(PrivateExpr->getType(), DestLVal.getAddress(*this), + SrcLVal.getAddress(*this), + cast(cast(LHSs[I])->getDecl()), + cast(cast(RHSs[I])->getDecl()), + CopyOps[I]); + } + } + EmitBranch(IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock); + OMPScanExitBlock = IsInclusive + ? BreakContinueStack.back().ContinueBlock.getBlock() + : OMPScanReduce; + EmitBlock(OMPAfterScanBlock); + return; + } if (!IsInclusive) { EmitBranch(BreakContinueStack.back().ContinueBlock.getBlock()); EmitBlock(OMPScanExitBlock); @@ -6313,6 +6421,7 @@ } if (isOpenMPSimdDirective(D.getDirectiveKind())) { (void)GlobalsScope.Privatize(); + ParentLoopDirectiveForScanRegion ScanRegion(CGF, D); emitOMPSimdRegion(CGF, cast(D), Action); } else { if (const auto *LD = dyn_cast(&D)) { diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -15150,24 +15150,37 @@ S.ActOnFinishFullExpr(CopyOpRes.get(), /*DiscardedValue=*/true); if (!CopyOpRes.isUsable()) continue; - // Build temp array for prefix sum. - auto *Dim = new (S.Context) - OpaqueValueExpr(ELoc, S.Context.getSizeType(), VK_RValue); - QualType ArrayTy = - S.Context.getVariableArrayType(PrivateTy, Dim, ArrayType::Normal, - /*IndexTypeQuals=*/0, {ELoc, ELoc}); - VarDecl *TempArrayVD = - buildVarDecl(S, ELoc, ArrayTy, D->getName(), - D->hasAttrs() ? &D->getAttrs() : nullptr); - // Add a constructor to the temp decl. - S.ActOnUninitializedDecl(TempArrayVD); - TempArrayRes = buildDeclRefExpr(S, TempArrayVD, ArrayTy, ELoc); - TempArrayElem = - S.DefaultFunctionArrayLvalueConversion(TempArrayRes.get()); - auto *Idx = new (S.Context) - OpaqueValueExpr(ELoc, S.Context.getSizeType(), VK_RValue); - TempArrayElem = S.CreateBuiltinArraySubscriptExpr(TempArrayElem.get(), - ELoc, Idx, ELoc); + // For simd directive and simd-based directives in simd mode no need to + // construct temp array, need just a single temp element. + if (Stack->getCurrentDirective() == OMPD_simd || + (S.getLangOpts().OpenMPSimd && + isOpenMPSimdDirective(Stack->getCurrentDirective()))) { + VarDecl *TempArrayVD = + buildVarDecl(S, ELoc, PrivateTy, D->getName(), + D->hasAttrs() ? &D->getAttrs() : nullptr); + // Add a constructor to the temp decl. + S.ActOnUninitializedDecl(TempArrayVD); + TempArrayRes = buildDeclRefExpr(S, TempArrayVD, PrivateTy, ELoc); + } else { + // Build temp array for prefix sum. + auto *Dim = new (S.Context) + OpaqueValueExpr(ELoc, S.Context.getSizeType(), VK_RValue); + QualType ArrayTy = + S.Context.getVariableArrayType(PrivateTy, Dim, ArrayType::Normal, + /*IndexTypeQuals=*/0, {ELoc, ELoc}); + VarDecl *TempArrayVD = + buildVarDecl(S, ELoc, ArrayTy, D->getName(), + D->hasAttrs() ? &D->getAttrs() : nullptr); + // Add a constructor to the temp decl. + S.ActOnUninitializedDecl(TempArrayVD); + TempArrayRes = buildDeclRefExpr(S, TempArrayVD, ArrayTy, ELoc); + TempArrayElem = + S.DefaultFunctionArrayLvalueConversion(TempArrayRes.get()); + auto *Idx = new (S.Context) + OpaqueValueExpr(ELoc, S.Context.getSizeType(), VK_RValue); + TempArrayElem = S.CreateBuiltinArraySubscriptExpr(TempArrayElem.get(), + ELoc, Idx, ELoc); + } } // OpenMP [2.15.4.6, Restrictions, p.2] diff --git a/clang/test/OpenMP/scan_codegen.cpp b/clang/test/OpenMP/scan_codegen.cpp new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/scan_codegen.cpp @@ -0,0 +1,277 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-version=50 -x c++ -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// +// expected-no-diagnostics +#ifndef HEADER +#define HEADER +void foo(); +void bar(); + +// CHECK-LABEL: baz +void baz() { + int a = 0; + + // CHECK: store i32 0, i32* [[A_ADDR:%.+]], + // CHECK: store i32 0, i32* [[OMP_CNT:%.+]], + // CHECK: br label %[[OMP_HEADER:.+]] + + // CHECK: [[OMP_HEADER]]: + // CHECK: [[CNT_VAL:%.+]] = load i32, i32* [[OMP_CNT]], + // CHECK: [[CMP:%.+]] = icmp slt i32 [[CNT_VAL]], 10 + // CHECK: br i1 [[CMP]], label %[[OMP_BODY:.+]], label %[[OMP_END:.+]] +#pragma omp simd reduction(inscan, + : a) + for (int i = 0; i < 10; ++i) { + // CHECK: [[OMP_BODY]]: + + // i = OMP_CNT*1 + 0; + // CHECK: [[CNT_VAL:%.+]] = load i32, i32* [[OMP_CNT]], + // CHECK: [[MUL:%.+]] = mul nsw i32 [[CNT_VAL]], 1 + // CHECK: [[ADD:%.+]] = add nsw i32 0, [[MUL]] + // CHECK: store i32 [[ADD]], i32* [[I_ADDR:%.+]], + + // A_PRIV = 0; + // CHECK: store i32 0, i32* [[A_PRIV_ADDR:%.+]], + + // goto DISPATCH; + // CHECK: br label %[[DISPATCH:[^,]+]] + + // INPUT_PHASE: + // foo(); + // goto REDUCE; + // CHECK: [[INPUT_PHASE:.+]]: + // CHECK: call void @{{.*}}foo{{.*}}() + // CHECK: br label %[[REDUCE:[^,]+]] + foo(); + + // DISPATCH: + // goto INPUT_PHASE; + // CHECK: [[DISPATCH]]: + // CHECK: br label %[[INPUT_PHASE]] + + // REDUCE: + // A = A_PRIV + A; + // A_PRIV = A; + // goto SCAN_PHASE; + // CHECK: [[REDUCE]]: + // CHECK: [[A:%.+]] = load i32, i32* [[A_ADDR]], + // CHECK: [[A_PRIV:%.+]] = load i32, i32* [[A_PRIV_ADDR]], + // CHECK: [[SUM:%.+]] = add nsw i32 [[A]], [[A_PRIV]] + // CHECK: store i32 [[SUM]], i32* [[A_ADDR]], + // CHECK: [[A:%.+]] = load i32, i32* [[A_ADDR]], + // CHECK: store i32 [[A]], i32* [[A_PRIV_ADDR]], + // CHECK: br label %[[SCAN_PHASE:[^,]+]] +#pragma omp scan inclusive(a) + + // SCAN_PHASE: + // bar(); + // goto CONTINUE; + // CHECK: [[SCAN_PHASE]]: + // CHECK: call void @{{.*}}bar{{.*}}() + // CHECK: br label %[[CONTINUE:[^,]+]] + bar(); + + // CHECK: [[CONTINUE]]: + // CHECK: br label %[[INC_BLOCK:[^,]+]] + + // ++OMP_CNT; + // CHECK: [[INC_BLOCK]]: + // CHECK: [[CNT:%.+]] = load i32, i32* [[OMP_CNT]], + // CHECK: [[INC:%.+]] = add nsw i32 [[CNT]], 1 + // CHECK: store i32 [[INC]], i32* [[OMP_CNT]], + // CHECK: br label %[[OMP_HEADER]] + } + // CHECK: [[OMP_END]]: +} + +struct S { + int a; + S() {} + ~S() {} + S& operator+(const S&); + S& operator=(const S&); +}; + +// CHECK-LABEL: xyz +void xyz() { + S s[2]; + + // CHECK: [[S_BEGIN:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ADDR:%.+]], i{{.+}} 0, i{{.+}} 0 + // CHECK: [[S_END:%.+]] = getelementptr {{.*}}%struct.S, %struct.S* [[S_BEGIN]], i{{.+}} 2 + // CHECK: br label %[[ARRAY_INIT:.+]] + // CHECK: [[ARRAY_INIT]]: + // CHECK: [[S_CUR:%.+]] = phi %struct.S* [ [[S_BEGIN]], %{{.+}} ], [ [[S_NEXT:%.+]], %[[ARRAY_INIT]] ] + // CHECK: call void [[CONSTR:@.+]](%struct.S* [[S_CUR]]) + // CHECK: [[S_NEXT]] = getelementptr inbounds %struct.S, %struct.S* [[S_CUR]], i{{.+}} 1 + // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[S_NEXT]], [[S_END]] + // CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_INIT]] + // CHECK: [[DONE]]: + // CHECK: store i32 0, i32* [[OMP_CNT:%.+]], + // CHECK: br label %[[OMP_HEADER:.+]] + + // CHECK: [[OMP_HEADER]]: + // CHECK: [[CNT_VAL:%.+]] = load i32, i32* [[OMP_CNT]], + // CHECK: [[CMP:%.+]] = icmp slt i32 [[CNT_VAL]], 10 + // CHECK: br i1 [[CMP]], label %[[OMP_BODY:.+]], label %[[OMP_END:.+]] +#pragma omp simd reduction(inscan, + : s) + for (int i = 0; i < 10; ++i) { + // CHECK: [[OMP_BODY]]: + + // i = OMP_CNT*1 + 0; + // CHECK: [[CNT_VAL:%.+]] = load i32, i32* [[OMP_CNT]], + // CHECK: [[MUL:%.+]] = mul nsw i32 [[CNT_VAL]], 1 + // CHECK: [[ADD:%.+]] = add nsw i32 0, [[MUL]] + // CHECK: store i32 [[ADD]], i32* [[I_ADDR:%.+]], + + // S S_PRIV[2]; + // CHECK: [[S_BEGIN:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_PRIV_ADDR:%.+]], i{{.+}} 0, i{{.+}} 0 + // CHECK: [[S_END:%.+]] = getelementptr {{.*}}%struct.S, %struct.S* [[S_BEGIN]], i{{.+}} 2 + // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[S_BEGIN]], [[S_END]] + // CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_INIT:[^,]+]] + // CHECK: [[ARRAY_INIT]]: + // CHECK: [[S_CUR:%.+]] = phi %struct.S* [ [[S_BEGIN]], %[[OMP_BODY]] ], [ [[S_NEXT:%.+]], %[[ARRAY_INIT]] ] + // CHECK: call void [[CONSTR]](%struct.S* [[S_CUR]]) + // CHECK: [[S_NEXT]] = getelementptr {{.*}}%struct.S, %struct.S* [[S_CUR]], i{{.+}} 1 + // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[S_NEXT]], [[S_END]] + // CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_INIT]] + // CHECK: [[DONE]]: + // CHECK: [[LHS_BEGIN:%.+]] = bitcast [2 x %struct.S]* [[S_ADDR]] to %struct.S* + // CHECK: [[RHS_BEGIN:%.+]] = bitcast [2 x %struct.S]* [[S_PRIV_ADDR]] to %struct.S* + + // goto DISPATCH; + // CHECK: br label %[[DISPATCH:[^,]+]] + + // SCAN_PHASE: + // foo(); + // goto CONTINUE; + // CHECK: [[SCAN_PHASE:.+]]: + // CHECK: call void @{{.*}}foo{{.*}}() + // CHECK: br label %[[CONTINUE:[^,]+]] + foo(); + + // DISPATCH: + // goto INPUT_PHASE; + // CHECK: [[DISPATCH]]: + // CHECK: br label %[[INPUT_PHASE:[^,]+]] + + // REDUCE: + // TEMP = S; + // S = S_PRIV + S; + // S_PRIV = TEMP; + // goto SCAN_PHASE; + // CHECK: [[REDUCE:.+]]: + + // S TEMP[2]; + // CHECK: [[TEMP_ARR_BEG:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TEMP_ARR:%.+]], i32 0, i32 0 + // CHECK: [[TEMP_ARR_END:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[TEMP_ARR_BEG]], i64 2 + // CHECK: br label %[[BODY:[^,]+]] + // CHECK: [[BODY]]: + // CHECK: [[CUR:%.+]] = phi %struct.S* [ [[TEMP_ARR_BEG]], %[[REDUCE]] ], [ [[NEXT:%.+]], %[[BODY]] ] + // CHECK: call void [[CONSTR]](%struct.S* [[CUR]]) + // CHECK: [[NEXT]] = getelementptr inbounds %struct.S, %struct.S* [[CUR]], i64 1 + // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[NEXT]], [[TEMP_ARR_END]] + // CHECK: br i1 [[IS_DONE]], label %[[EXIT:[^,]+]], label %[[BODY]] + // CHECK: [[EXIT]]: + + // TEMP = S; + // CHECK: [[TEMP_ARR_BEG:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TEMP_ARR]], i32 0, i32 0 + // CHECK: [[TEMP_ARR_END:%.+]] = getelementptr %struct.S, %struct.S* [[TEMP_ARR_BEG]], i64 2 + // CHECK: [[IS_EMPTY:%.+]] = icmp eq %struct.S* [[TEMP_ARR_BEG]], [[TEMP_ARR_END]] + // CHECK: br i1 [[IS_EMPTY]], label %[[EXIT:[^,]+]], label %[[BODY:[^,]+]] + // CHECK: [[BODY]]: + // CHECK: [[CUR_SRC:%.+]] = phi %struct.S* [ [[LHS_BEGIN]], %{{.+}} ], [ [[SRC_NEXT:%.+]], %[[BODY]] ] + // CHECK: [[CUR_DEST:%.+]] = phi %struct.S* [ [[TEMP_ARR_BEG]], %{{.+}} ], [ [[DEST_NEXT:%.+]], %[[BODY]] ] + // CHECK: call {{.*}}%struct.S* [[S_COPY:@.+]](%struct.S* [[CUR_DEST]], %struct.S* {{.*}}[[CUR_SRC]]) + // CHECK: [[DEST_NEXT:%.+]] = getelementptr %struct.S, %struct.S* [[CUR_DEST]], i32 1 + // CHECK: [[SRC_NEXT:%.+]] = getelementptr %struct.S, %struct.S* [[CUR_SRC]], i32 1 + // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[DEST_NEXT]], [[TEMP_ARR_END]] + // CHECK: br i1 [[IS_DONE]], label %[[EXIT]], label %[[BODY]] + // CHECK: [[EXIT]]: + + // S = S_PRIV + S; + // CHECK: [[LHS_END:%.+]] = getelementptr {{.*}}%struct.S, %struct.S* [[LHS_BEGIN]], i{{.+}} 2 + // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[LHS_BEGIN]], [[LHS_END]] + // CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_REDUCE_COPY:[^,]+]] + // CHECK: [[ARRAY_REDUCE_COPY]]: + // CHECK: [[SRC_CUR:%.+]] = phi %struct.S* [ [[RHS_BEGIN]], %[[EXIT]] ], [ [[SRC_NEXT:%.+]], %[[ARRAY_REDUCE_COPY]] ] + // CHECK: [[DEST_CUR:%.+]] = phi %struct.S* [ [[LHS_BEGIN]], %[[EXIT]] ], [ [[DEST_NEXT:%.+]], %[[ARRAY_REDUCE_COPY]] ] + // CHECK: [[SUM:%.+]] = call {{.*}}%struct.S* @{{.+}}(%struct.S* [[DEST_CUR]], %struct.S* {{.*}}[[SRC_CUR]]) + // CHECK: call {{.*}}%struct.S* [[S_COPY]](%struct.S* [[DEST_CUR]], %struct.S* {{.*}}[[SUM]]) + // CHECK: [[DEST_NEXT]] = getelementptr {{.*}}%struct.S, %struct.S* [[DEST_CUR]], i{{.+}} 1 + // CHECK: [[SRC_NEXT]] = getelementptr {{.*}}%struct.S, %struct.S* [[SRC_CUR]], i{{.+}} 1 + // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[DEST_NEXT]], [[LHS_END]] + // CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_REDUCE_COPY]] + // CHECK: [[DONE]]: + + // S_PRIV = TEMP; + // CHECK: [[TEMP_ARR_BEG:%.+]] = bitcast [2 x %struct.S]* [[TEMP_ARR]] to %struct.S* + // CHECK: [[RHS_END:%.+]] = getelementptr %struct.S, %struct.S* [[RHS_BEGIN]], i64 2 + // CHECK: [[IS_EMPTY:%.+]] = icmp eq %struct.S* [[RHS_BEGIN]], [[RHS_END]] + // CHECK: br i1 [[IS_EMPTY]], label %[[EXIT:[^,]+]], label %[[BODY:[^,]+]] + // CHECK: [[BODY]]: + // CHECK: [[CUR_SRC:%.+]] = phi %struct.S* [ [[TEMP_ARR_BEG]], %[[DONE]] ], [ [[SRC_NEXT:%.+]], %[[BODY]] ] + // CHECK: [[CUR_DEST:%.+]] = phi %struct.S* [ [[RHS_BEGIN]], %[[DONE]] ], [ [[DEST_NEXT:%.+]], %[[BODY]] ] + // CHECK: call {{.*}}%struct.S* [[S_COPY]](%struct.S* [[CUR_DEST]], %struct.S* {{.*}}[[CUR_SRC]]) + // CHECK: [[DEST_NEXT]] = getelementptr %struct.S, %struct.S* [[CUR_DEST]], i32 1 + // CHECK: [[SRC_NEXT]] = getelementptr %struct.S, %struct.S* [[CUR_SRC]], i32 1 + // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[DEST_NEXT]], [[RHS_END]] + // CHECK: br i1 [[IS_DONE]], label %[[DONE:[^,]+]], label %[[BODY]] + // CHECK: [[DONE]]: + + // TEMP.~S() + // CHECK: [[TEMP_ARR_BEG:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TEMP_ARR]], i32 0, i32 0 + // CHECK: [[TEMP_ARR_END:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[TEMP_ARR_BEG]], i64 2 + // CHECK: br label %[[BODY:[^,]+]] + // CHECK: [[BODY]]: + // CHECK: [[CUR:%.+]] = phi %struct.S* [ [[TEMP_ARR_END]], %[[DONE]] ], [ [[PREV:%.+]], %[[BODY]] ] + // CHECK: [[PREV]] = getelementptr inbounds %struct.S, %struct.S* [[CUR]], i64 -1 + // CHECK: call void [[DESTR:@.+]](%struct.S* [[PREV]]) + // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[PREV]], [[TEMP_ARR_BEG]] + // CHECK: br i1 [[IS_DONE]], label %[[EXIT:[^,]+]], label %[[BODY]] + // CHECK: [[EXIT]]: + + // goto SCAN_PHASE; + // CHECK: br label %[[SCAN_PHASE]] +#pragma omp scan exclusive(s) + + // INPUT_PHASE: + // bar(); + // goto REDUCE; + // CHECK: [[INPUT_PHASE]]: + // CHECK: call void @{{.*}}bar{{.*}}() + // CHECK: br label %[[REDUCE]] + bar(); + + // CHECK: [[CONTINUE]]: + + // S_PRIV[2].~S(); + // CHECK: [[S_BEGIN:%.+]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_PRIV_ADDR]], i{{.+}} 0, i{{.+}} 0 + // CHECK: [[S_END:%.+]] = getelementptr {{.*}}%struct.S, %struct.S* [[S_BEGIN]], i{{.+}} 2 + // CHECK: br label %[[ARRAY_DESTR:[^,]+]] + // CHECK: [[ARRAY_DESTR]]: + // CHECK: [[S_CUR:%.+]] = phi %struct.S* [ [[S_END]], %[[CONTINUE]] ], [ [[S_PREV:%.+]], %[[ARRAY_DESTR]] ] + // CHECK: [[S_PREV]] = getelementptr {{.*}}%struct.S, %struct.S* [[S_CUR]], i{{.+}} -1 + // CHECK: call void [[DESTR]](%struct.S* [[S_PREV]]) + // CHECK: [[IS_DONE:%.+]] = icmp eq %struct.S* [[S_PREV]], [[S_BEGIN]] + // CHECK: br i1 [[IS_DONE]], label %[[DONE:.+]], label %[[ARRAY_DESTR]] + // CHECK: [[DONE]]: + // CHECK: br label %[[INC_BLOCK:[^,]+]] + + // ++OMP_CNT; + // CHECK: [[INC_BLOCK]]: + // CHECK: [[CNT:%.+]] = load i32, i32* [[OMP_CNT]], + // CHECK: [[INC:%.+]] = add nsw i32 [[CNT]], 1 + // CHECK: store i32 [[INC]], i32* [[OMP_CNT]], + // CHECK: br label %[[OMP_HEADER]] + } + // CHECK: [[OMP_END]]: +} + +// CHECK-NOT: !{!"llvm.loop.parallel_accesses" + +#endif // HEADER