Index: cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp +++ cfe/trunk/lib/CodeGen/CGStmtOpenMP.cpp @@ -680,6 +680,38 @@ } } +static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, + const Expr *Cond, llvm::BasicBlock *TrueBlock, + llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { + CodeGenFunction::OMPPrivateScope PreCondScope(CGF); + EmitPrivateLoopCounters(CGF, PreCondScope, S.counters()); + const VarDecl *IVDecl = + cast(cast(S.getIterationVariable())->getDecl()); + bool IsRegistered = PreCondScope.addPrivate(IVDecl, [&]() -> llvm::Value *{ + // Emit var without initialization. + auto VarEmission = CGF.EmitAutoVarAlloca(*IVDecl); + CGF.EmitAutoVarCleanups(VarEmission); + return VarEmission.getAllocatedAddress(); + }); + assert(IsRegistered && "counter already registered as private"); + // Silence the warning about unused variable. + (void)IsRegistered; + (void)PreCondScope.Privatize(); + // Initialize internal counter to 0 to calculate initial values of real + // counters. + LValue IV = CGF.EmitLValue(S.getIterationVariable()); + CGF.EmitStoreOfScalar( + llvm::ConstantInt::getNullValue( + IV.getAddress()->getType()->getPointerElementType()), + CGF.EmitLValue(S.getIterationVariable()), /*isInit=*/true); + // Get initial values of real counters. + for (auto I : S.updates()) { + CGF.EmitIgnoredExpr(I); + } + // Check that loop is executed at least one time. + CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); +} + static void EmitPrivateLinearVars(CodeGenFunction &CGF, const OMPExecutableDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { @@ -704,7 +736,7 @@ // Pragma 'simd' code depends on presence of 'lastprivate'. // If present, we have to separate last iteration of the loop: // - // if (LastIteration != 0) { + // if (PreCond) { // for (IV in 0..LastIteration-1) BODY; // BODY with updates of lastprivate vars; // ; @@ -712,10 +744,28 @@ // // otherwise (when there's no lastprivate): // + // if (PreCond) { // for (IV in 0..LastIteration) BODY; // ; + // } // + // Emit: if (PreCond) - begin. + // If the condition constant folds and can be elided, avoid emitting the + // whole loop. + bool CondConstant; + llvm::BasicBlock *ContBlock = nullptr; + if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { + if (!CondConstant) + return; + } else { + RegionCounter Cnt = CGF.getPGORegionCounter(&S); + auto *ThenBlock = CGF.createBasicBlock("simd.if.then"); + ContBlock = CGF.createBasicBlock("simd.if.end"); + emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); + CGF.EmitBlock(ThenBlock); + Cnt.beginRegion(CGF.Builder); + } // Walk clauses and process safelen/lastprivate. bool SeparateIter = false; CGF.LoopStack.setParallel(); @@ -780,51 +830,28 @@ } } - if (SeparateIter) { - // Emit: if (LastIteration > 0) - begin. - RegionCounter Cnt = CGF.getPGORegionCounter(&S); - auto ThenBlock = CGF.createBasicBlock("simd.if.then"); - auto ContBlock = CGF.createBasicBlock("simd.if.end"); - CGF.EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, - Cnt.getCount()); - CGF.EmitBlock(ThenBlock); - Cnt.beginRegion(CGF.Builder); - // Emit 'then' code. - { - OMPPrivateScope LoopScope(CGF); - EmitPrivateLoopCounters(CGF, LoopScope, S.counters()); - EmitPrivateLinearVars(CGF, S, LoopScope); - CGF.EmitOMPPrivateClause(S, LoopScope); - (void)LoopScope.Privatize(); - CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), - S.getCond(/*SeparateIter=*/true), S.getInc(), - [&S](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S); - CGF.EmitStopPoint(&S); - }, - [](CodeGenFunction &) {}); - CGF.EmitOMPLoopBody(S, /* SeparateIter */ true); + { + OMPPrivateScope LoopScope(CGF); + EmitPrivateLoopCounters(CGF, LoopScope, S.counters()); + EmitPrivateLinearVars(CGF, S, LoopScope); + CGF.EmitOMPPrivateClause(S, LoopScope); + (void)LoopScope.Privatize(); + CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), + S.getCond(SeparateIter), S.getInc(), + [&S](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); + if (SeparateIter) { + CGF.EmitOMPLoopBody(S, /*SeparateIter=*/true); } - CGF.EmitOMPSimdFinal(S); - // Emit: if (LastIteration != 0) - end. + } + CGF.EmitOMPSimdFinal(S); + // Emit: if (PreCond) - end. + if (ContBlock) { CGF.EmitBranch(ContBlock); CGF.EmitBlock(ContBlock, true); - } else { - { - OMPPrivateScope LoopScope(CGF); - EmitPrivateLoopCounters(CGF, LoopScope, S.counters()); - EmitPrivateLinearVars(CGF, S, LoopScope); - CGF.EmitOMPPrivateClause(S, LoopScope); - (void)LoopScope.Privatize(); - CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), - S.getCond(/*SeparateIter=*/false), S.getInc(), - [&S](CodeGenFunction &CGF) { - CGF.EmitOMPLoopBody(S); - CGF.EmitStopPoint(&S); - }, - [](CodeGenFunction &) {}); - } - CGF.EmitOMPSimdFinal(S); } }; CGM.getOpenMPRuntime().emitInlinedDirective(*this, CodeGen); @@ -1010,12 +1037,22 @@ // Check pre-condition. { // Skip the entire loop if we don't meet the precondition. - RegionCounter Cnt = getPGORegionCounter(&S); - auto ThenBlock = createBasicBlock("omp.precond.then"); - auto ContBlock = createBasicBlock("omp.precond.end"); - EmitBranchOnBoolExpr(S.getPreCond(), ThenBlock, ContBlock, Cnt.getCount()); - EmitBlock(ThenBlock); - Cnt.beginRegion(Builder); + // If the condition constant folds and can be elided, avoid emitting the + // whole loop. + bool CondConstant; + llvm::BasicBlock *ContBlock = nullptr; + if (ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { + if (!CondConstant) + return false; + } else { + RegionCounter Cnt = getPGORegionCounter(&S); + auto *ThenBlock = createBasicBlock("omp.precond.then"); + ContBlock = createBasicBlock("omp.precond.end"); + emitPreCond(*this, S, S.getPreCond(), ThenBlock, ContBlock, + Cnt.getCount()); + EmitBlock(ThenBlock); + Cnt.beginRegion(Builder); + } // Emit 'then' code. { // Emit helper vars inits. @@ -1090,8 +1127,10 @@ S, Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getLocStart()))); } // We're now done with the loop, so jump to the continuation block. - EmitBranch(ContBlock); - EmitBlock(ContBlock, true); + if (ContBlock) { + EmitBranch(ContBlock); + EmitBlock(ContBlock, true); + } } return HasLastprivateClause; } Index: cfe/trunk/lib/Sema/SemaOpenMP.cpp =================================================================== --- cfe/trunk/lib/Sema/SemaOpenMP.cpp +++ cfe/trunk/lib/Sema/SemaOpenMP.cpp @@ -1977,6 +1977,8 @@ bool ShouldSubtractStep() const { return SubtractStep; } /// \brief Build the expression to calculate the number of iterations. Expr *BuildNumIterations(Scope *S, const bool LimitedType) const; + /// \brief Build the precondition expression for the loops. + Expr *BuildPreCond(Scope *S, Expr *Cond) const; /// \brief Build reference expression to the counter be used for codegen. Expr *BuildCounterVar() const; /// \brief Build initization of the counter be used for codegen. @@ -2380,6 +2382,19 @@ return Diff.get(); } +Expr *OpenMPIterationSpaceChecker::BuildPreCond(Scope *S, Expr *Cond) const { + // Try to build LB UB, where is <, >, <=, or >=. + bool Suppress = SemaRef.getDiagnostics().getSuppressAllDiagnostics(); + SemaRef.getDiagnostics().setSuppressAllDiagnostics(/*Val=*/true); + auto CondExpr = SemaRef.BuildBinOp( + S, DefaultLoc, TestIsLessOp ? (TestIsStrictOp ? BO_LT : BO_LE) + : (TestIsStrictOp ? BO_GT : BO_GE), + LB, UB); + SemaRef.getDiagnostics().setSuppressAllDiagnostics(Suppress); + // Otherwise use original loop conditon and evaluate it in runtime. + return CondExpr.isUsable() ? CondExpr.get() : Cond; +} + /// \brief Build reference expression to the counter be used for codegen. Expr *OpenMPIterationSpaceChecker::BuildCounterVar() const { return DeclRefExpr::Create(SemaRef.Context, NestedNameSpecifierLoc(), @@ -2395,6 +2410,8 @@ /// \brief Iteration space of a single for loop. struct LoopIterationSpace { + /// \brief Condition of the loop. + Expr *PreCond; /// \brief This expression calculates the number of iterations in the loop. /// It is always possible to calculate it before starting the loop. Expr *NumIterations; @@ -2535,6 +2552,7 @@ return HasErrors; // Build the loop's iteration space representation. + ResultIterSpace.PreCond = ISC.BuildPreCond(DSA.getCurScope(), For->getCond()); ResultIterSpace.NumIterations = ISC.BuildNumIterations( DSA.getCurScope(), /* LimitedType */ isOpenMPWorksharingDirective(DKind)); ResultIterSpace.CounterVar = ISC.BuildCounterVar(); @@ -2545,7 +2563,8 @@ ResultIterSpace.IncSrcRange = ISC.GetIncrementSrcRange(); ResultIterSpace.Subtract = ISC.ShouldSubtractStep(); - HasErrors |= (ResultIterSpace.NumIterations == nullptr || + HasErrors |= (ResultIterSpace.PreCond == nullptr || + ResultIterSpace.NumIterations == nullptr || ResultIterSpace.CounterVar == nullptr || ResultIterSpace.CounterInit == nullptr || ResultIterSpace.CounterStep == nullptr); @@ -2690,6 +2709,9 @@ // Last iteration number is (I1 * I2 * ... In) - 1, where I1, I2 ... In are // the iteration counts of the collapsed for loops. + // Precondition tests if there is at least one iteration (all conditions are + // true). + auto PreCond = ExprResult(IterSpaces[0].PreCond); auto N0 = IterSpaces[0].NumIterations; ExprResult LastIteration32 = WidenIterationCount(32 /* Bits */, N0, SemaRef); ExprResult LastIteration64 = WidenIterationCount(64 /* Bits */, N0, SemaRef); @@ -2702,6 +2724,10 @@ Scope *CurScope = DSA.getCurScope(); for (unsigned Cnt = 1; Cnt < NestedLoopCount; ++Cnt) { + if (PreCond.isUsable()) { + PreCond = SemaRef.BuildBinOp(CurScope, SourceLocation(), BO_LAnd, + PreCond.get(), IterSpaces[Cnt].PreCond); + } auto N = IterSpaces[Cnt].NumIterations; AllCountsNeedLessThan32Bits &= C.getTypeSize(N->getType()) < 32; if (LastIteration32.isUsable()) @@ -2763,11 +2789,6 @@ SourceLocation InitLoc = IterSpaces[0].InitSrcRange.getBegin(); - // Precondition tests if there is at least one iteration (LastIteration > 0). - ExprResult PreCond = SemaRef.BuildBinOp( - CurScope, InitLoc, BO_GT, LastIteration.get(), - SemaRef.ActOnIntegerConstant(SourceLocation(), 0).get()); - QualType VType = LastIteration.get()->getType(); // Build variables passed into runtime, nesessary for worksharing directives. ExprResult LB, UB, IL, ST, EUB; Index: cfe/trunk/test/OpenMP/for_codegen.cpp =================================================================== --- cfe/trunk/test/OpenMP/for_codegen.cpp +++ cfe/trunk/test/OpenMP/for_codegen.cpp @@ -315,6 +315,31 @@ // CHECK: ret void } +// CHECK-LABEL: test_precond +void test_precond() { + // CHECK: [[A_ADDR:%.+]] = alloca i8, + // CHECK: [[I_ADDR:%.+]] = alloca i8, + char a = 0; + // CHECK: store i32 0, i32* [[IV_ADDR:%.+]], + // CHECK: [[A:%.+]] = load i8, i8* [[A_ADDR]], + // CHECK: [[CONV:%.+]] = sext i8 [[A]] to i32 + // CHECK: [[IV:%.+]] = load i32, i32* [[IV_ADDR]], + // CHECK: [[MUL:%.+]] = mul nsw i32 [[IV]], 1 + // CHECK: [[ADD:%.+]] = add nsw i32 [[CONV]], [[MUL]] + // CHECK: [[CONV:%.+]] = trunc i32 [[ADD]] to i8 + // CHECK: store i8 [[CONV]], i8* [[I_ADDR]], + // CHECK: [[A:%.+]] = load i8, i8* [[A_ADDR]], + // CHECK: [[CONV:%.+]] = sext i8 [[A]] to i32 + // CHECK: [[CMP:%.+]] = icmp slt i32 [[CONV]], 10 + // CHECK: br i1 [[CMP]], label %[[PRECOND_THEN:[^,]+]], label %[[PRECOND_END:[^,]+]] + // CHECK: [[PRECOND_THEN]] + // CHECK: call void @__kmpc_for_static_init_4 +#pragma omp for + for(char i = a; i < 10; ++i); + // CHECK: call void @__kmpc_for_static_fini + // CHECK: [[PRECOND_END]] +} + // TERM_DEBUG-LABEL: foo int foo() {return 0;}; Index: cfe/trunk/test/OpenMP/for_firstprivate_codegen.cpp =================================================================== --- cfe/trunk/test/OpenMP/for_firstprivate_codegen.cpp +++ cfe/trunk/test/OpenMP/for_firstprivate_codegen.cpp @@ -40,7 +40,7 @@ S var(3); #pragma omp parallel #pragma omp for firstprivate(t_var, vec, s_arr, var) - for (int i = 0; i < 0; ++i) { + for (int i = 0; i < 2; ++i) { vec[i] = t_var; s_arr[i] = var; } @@ -146,7 +146,7 @@ return 0; #else #pragma omp for firstprivate(t_var, vec, s_arr, var) - for (int i = 0; i < 0; ++i) { + for (int i = 0; i < 2; ++i) { vec[i] = t_var; s_arr[i] = var; } Index: cfe/trunk/test/OpenMP/for_lastprivate_codegen.cpp =================================================================== --- cfe/trunk/test/OpenMP/for_lastprivate_codegen.cpp +++ cfe/trunk/test/OpenMP/for_lastprivate_codegen.cpp @@ -67,12 +67,14 @@ // LAMBDA: alloca i{{[0-9]+}}, // LAMBDA: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, // LAMBDA: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]], - // LAMBDA: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 %{{.+}}, i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) + // LAMBDA: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}} + // LAMBDA: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] + // LAMBDA: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) // LAMBDA: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], // LAMBDA: [[G_PRIVATE_ADDR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 // LAMBDA: store i{{[0-9]+}}* [[G_PRIVATE_ADDR]], i{{[0-9]+}}** [[G_PRIVATE_ADDR_REF]] // LAMBDA: call void [[INNER_LAMBDA:@.+]](%{{.+}}* [[ARG]]) - // LAMBDA: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}}) + // LAMBDA: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]]) g = 1; // Check for final copying of private values back to original vars. // LAMBDA: [[IS_LAST_VAL:%.+]] = load i32, i32* [[IS_LAST_ADDR]], @@ -86,8 +88,6 @@ // LAMBDA: store volatile i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G]], // LAMBDA: br label %[[LAST_DONE]] // LAMBDA: [[LAST_DONE]] - // LAMBDA: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}} - // LAMBDA: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] // LAMBDA: call i32 @__kmpc_cancel_barrier(%{{.+}}* @{{.+}}, i{{[0-9]+}} [[GTID]]) [&]() { // LAMBDA: define {{.+}} void [[INNER_LAMBDA]](%{{.+}}* [[ARG_PTR:%.+]]) @@ -119,13 +119,15 @@ // BLOCKS: alloca i{{[0-9]+}}, // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = alloca i{{[0-9]+}}, // BLOCKS: store %{{.+}}* [[ARG]], %{{.+}}** [[ARG_REF:%.+]], - // BLOCKS: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 %{{.+}}, i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) + // BLOCKS: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}} + // BLOCKS: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] + // BLOCKS: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) // BLOCKS: store volatile i{{[0-9]+}} 1, i{{[0-9]+}}* [[G_PRIVATE_ADDR]], // BLOCKS-NOT: [[G]]{{[[^:word:]]}} // BLOCKS: i{{[0-9]+}}* [[G_PRIVATE_ADDR]] // BLOCKS-NOT: [[G]]{{[[^:word:]]}} // BLOCKS: call void {{%.+}}(i8 - // BLOCKS: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}}) + // BLOCKS: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]]) g = 1; // Check for final copying of private values back to original vars. // BLOCKS: [[IS_LAST_VAL:%.+]] = load i32, i32* [[IS_LAST_ADDR]], @@ -139,8 +141,6 @@ // BLOCKS: store volatile i{{[0-9]+}} [[G_VAL]], i{{[0-9]+}}* [[G]], // BLOCKS: br label %[[LAST_DONE]] // BLOCKS: [[LAST_DONE]] - // BLOCKS: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** %{{.+}} - // BLOCKS: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] // BLOCKS: call i32 @__kmpc_cancel_barrier(%{{.+}}* @{{.+}}, i{{[0-9]+}} [[GTID]]) g = 1; ^{ @@ -261,9 +261,11 @@ // Check for default initialization. // CHECK-NOT: [[X_PRIV]] -// CHECK: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 %{{.+}}, i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) +// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]] +// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] +// CHECK: call {{.+}} @__kmpc_for_static_init_4(%{{.+}}* @{{.+}}, i32 [[GTID]], i32 34, i32* [[IS_LAST_ADDR:%.+]], i32* %{{.+}}, i32* %{{.+}}, i32* %{{.+}}, i32 1, i32 1) // -// CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 %{{.+}}) +// CHECK: call void @__kmpc_for_static_fini(%{{.+}}* @{{.+}}, i32 [[GTID]]) // Check for final copying of private values back to original vars. // CHECK: [[IS_LAST_VAL:%.+]] = load i32, i32* [[IS_LAST_ADDR]], @@ -278,8 +280,7 @@ // CHECK-NEXT: br label %[[LAST_DONE]] // CHECK: [[LAST_DONE]] -// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[GTID_ADDR_REF]] -// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[GTID_REF]] +// CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]]) // CHECK: call i32 @__kmpc_cancel_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]]) // CHECK: ret void Index: cfe/trunk/test/OpenMP/parallel_for_codegen.cpp =================================================================== --- cfe/trunk/test/OpenMP/parallel_for_codegen.cpp +++ cfe/trunk/test/OpenMP/parallel_for_codegen.cpp @@ -47,11 +47,7 @@ // CHECK-NEXT: br label %{{.+}} } // CHECK: [[LOOP1_END]] -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) // CHECK: ret void } @@ -95,11 +91,7 @@ // CHECK-NEXT: br label %{{.+}} } // CHECK: [[LOOP1_END]] -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) // CHECK: ret void } @@ -162,11 +154,7 @@ // CHECK-NEXT: store i32 [[ADD_UB]], i32* [[OMP_UB]] // CHECK: [[O_LOOP1_END]] -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call void @__kmpc_for_static_fini([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]]) -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) // CHECK: ret void } @@ -181,8 +169,6 @@ // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 35, i64 0, i64 16908287, i64 1, i64 1) // -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]]) // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]] @@ -214,8 +200,6 @@ } // CHECK: [[LOOP1_END]] // CHECK: [[O_LOOP1_END]] -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) // CHECK: ret void } @@ -230,8 +214,6 @@ // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call void @__kmpc_dispatch_init_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 36, i64 0, i64 16908287, i64 1, i64 7) // -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_8u([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i64* [[OMP_LB:%[^,]+]], i64* [[OMP_UB:%[^,]+]], i64* [[OMP_ST:%[^,]+]]) // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]] @@ -263,8 +245,6 @@ } // CHECK: [[LOOP1_END]] // CHECK: [[O_LOOP1_END]] -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call {{.+}} @__kmpc_cancel_barrier([[IDENT_T_TY]]* [[DEFAULT_LOC_BARRIER:[@%].+]], i32 [[GTID]]) // CHECK: ret void } @@ -332,8 +312,6 @@ // CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: call void @__kmpc_dispatch_init_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32 37, i32 0, i32 199, i32 1, i32 1) // -// CHECK: [[GTID_REF:%.+]] = load i32*, i32** [[GTID_REF_ADDR]], -// CHECK: [[GTID:%.+]] = load i32, i32* [[GTID_REF]], // CHECK: [[HASWORK:%.+]] = call i32 @__kmpc_dispatch_next_4([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], i32* [[OMP_ISLAST:%[^,]+]], i32* [[OMP_LB:%[^,]+]], i32* [[OMP_UB:%[^,]+]], i32* [[OMP_ST:%[^,]+]]) // CHECK-NEXT: [[O_CMP:%.+]] = icmp ne i32 [[HASWORK]], 0 // CHECK-NEXT: br i1 [[O_CMP]], label %[[O_LOOP1_BODY:[^,]+]], label %[[O_LOOP1_END:[^,]+]] Index: cfe/trunk/test/OpenMP/simd_codegen.cpp =================================================================== --- cfe/trunk/test/OpenMP/simd_codegen.cpp +++ cfe/trunk/test/OpenMP/simd_codegen.cpp @@ -176,28 +176,10 @@ } // CHECK: [[SIMPLE_LOOP5_END]] +// CHECK-NOT: mul i32 %{{.+}}, 10 #pragma omp simd -// FIXME: I think we would get wrong result using 'unsigned' in the loop below. -// So we'll need to add zero trip test for 'unsigned' counters. -// -// CHECK: store i32 0, i32* [[OMP_IV6:%[^,]+]] - -// CHECK: [[IV6:%.+]] = load i32, i32* [[OMP_IV6]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP6_ID:[0-9]+]] -// CHECK-NEXT: [[CMP6:%.+]] = icmp slt i32 [[IV6]], -8 -// CHECK-NEXT: br i1 [[CMP6]], label %[[SIMPLE_LOOP6_BODY:.+]], label %[[SIMPLE_LOOP6_END:[^,]+]] - for (int i=100; i<10; i+=10) { -// CHECK: [[SIMPLE_LOOP6_BODY]] -// Start of body: calculate i from IV: -// CHECK: [[IV6_0:%.+]] = load i32, i32* [[OMP_IV6]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP6_ID]] -// CHECK-NEXT: [[LC_IT_1:%.+]] = mul nsw i32 [[IV6_0]], 10 -// CHECK-NEXT: [[LC_IT_2:%.+]] = add nsw i32 100, [[LC_IT_1]] -// CHECK-NEXT: store i32 [[LC_IT_2]], i32* {{.+}}, !llvm.mem.parallel_loop_access ![[SIMPLE_LOOP6_ID]] - -// CHECK: [[IV6_2:%.+]] = load i32, i32* [[OMP_IV6]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP6_ID]] -// CHECK-NEXT: [[ADD6_2:%.+]] = add nsw i32 [[IV6_2]], 1 -// CHECK-NEXT: store i32 [[ADD6_2]], i32* [[OMP_IV6]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP6_ID]] + for (unsigned i=100; i<10; i+=10) { } -// CHECK: [[SIMPLE_LOOP6_END]] int A; #pragma omp simd lastprivate(A) @@ -205,8 +187,6 @@ // Test checks that one iteration is separated in presence of lastprivate. // // CHECK: store i64 0, i64* [[OMP_IV7:%[^,]+]] -// CHECK: br i1 true, label %[[SIMPLE_IF7_THEN:.+]], label %[[SIMPLE_IF7_END:[^,]+]] -// CHECK: [[SIMPLE_IF7_THEN]] // CHECK: br label %[[SIMD_LOOP7_COND:[^,]+]] // CHECK: [[SIMD_LOOP7_COND]] // CHECK-NEXT: [[IV7:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.mem.parallel_loop_access ![[SIMPLE_LOOP7_ID:[0-9]+]] @@ -233,9 +213,6 @@ // CHECK: [[LOAD_I:%.+]] = load i64, i64* [[ADDR_I]] // CHECK-NEXT: [[CONV_I:%.+]] = trunc i64 [[LOAD_I]] to i32 // -// CHECK: br label %[[SIMPLE_IF7_END]] -// CHECK: [[SIMPLE_IF7_END]] -// // CHECK: ret void }