diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2234,15 +2234,18 @@ llvm::FunctionCallee ExitCallee; ArrayRef ExitArgs; bool Conditional; + bool WithMemBarrier = false; llvm::BasicBlock *ContBlock = nullptr; public: CommonActionTy(llvm::FunctionCallee EnterCallee, ArrayRef EnterArgs, llvm::FunctionCallee ExitCallee, - ArrayRef ExitArgs, bool Conditional = false) + ArrayRef ExitArgs, bool Conditional = false, + bool WithMemBarrier = false) : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), - ExitArgs(ExitArgs), Conditional(Conditional) {} + ExitArgs(ExitArgs), Conditional(Conditional), + WithMemBarrier(WithMemBarrier) {} void Enter(CodeGenFunction &CGF) override { llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); if (Conditional) { @@ -2253,6 +2256,8 @@ CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); CGF.EmitBlock(ThenBlock); } + if (WithMemBarrier) + CGF.Builder.CreateFence(llvm::AtomicOrdering::Acquire); } void Done(CodeGenFunction &CGF) { // Emit the rest of blocks/branches @@ -2260,6 +2265,8 @@ CGF.EmitBlock(ContBlock, true); } void Exit(CodeGenFunction &CGF) override { + if (WithMemBarrier) + CGF.Builder.CreateFence(llvm::AtomicOrdering::Release); CGF.EmitRuntimeCall(ExitCallee, ExitArgs); } }; @@ -2290,7 +2297,7 @@ EnterArgs, llvm::OpenMPIRBuilder::getOrCreateRuntimeFunction( CGM.getModule(), OMPRTL___kmpc_end_critical), - Args); + Args, /*Conditional=*/false, /*WithMemBarrier=*/true); CriticalOpGen.setAction(Action); emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); } diff --git a/clang/test/OpenMP/critical_codegen.cpp b/clang/test/OpenMP/critical_codegen.cpp --- a/clang/test/OpenMP/critical_codegen.cpp +++ b/clang/test/OpenMP/critical_codegen.cpp @@ -30,28 +30,35 @@ // ALL: [[A_ADDR:%.+]] = alloca i8 char a; -// ALL: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]]) -// ALL: call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]]) -// ALL-NEXT: store i8 2, i8* [[A_ADDR]] -// ALL-NEXT: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]]) +// ALL: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]]) +// ALL: call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]]) +// ALL-NEXT: fence acquire +// ALL-NEXT: store i8 2, i8* [[A_ADDR]] +// ALL-NEXT: fence release +// ALL-NEXT: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[UNNAMED_LOCK]]) #pragma omp critical a = 2; // IRBUILDER: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]]) -// ALL: call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]]) -// IRBUILDER-NEXT: call {{.*}}void [[FOO]]() -// NORMAL-NEXT: invoke {{.*}}void [[FOO]]() -// ALL: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]]) +// ALL: call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]]) +// ALL-NEXT: fence acquire +// IRBUILDER-NEXT: call {{.*}}void [[FOO]]() +// NORMAL-NEXT: invoke {{.*}}void [[FOO]]() +// ALL: fence release +// ALL-NEXT: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]]) #pragma omp critical(the_name) foo(); -// IRBUILDER: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]]) -// ALL: call {{.*}}void @__kmpc_critical_with_hint([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK1]], i{{64|32}} 23) -// IRBUILDER-NEXT: call {{.*}}void [[FOO]]() -// NORMAL-NEXT: invoke {{.*}}void [[FOO]]() -// ALL: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK1]]) +// IRBUILDER: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]]) +// ALL: call {{.*}}void @__kmpc_critical_with_hint([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK1]], i{{64|32}} 23) +// ALL-NEXT: fence acquire +// IRBUILDER-NEXT: call {{.*}}void [[FOO]]() +// NORMAL-NEXT: invoke {{.*}}void [[FOO]]() +// ALL: fence release +// ALL-NEXT: call {{.*}}void @__kmpc_end_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK1]]) #pragma omp critical(the_name1) hint(23) foo(); - // IRBUILDER: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]]) + // IRBUILDER: [[GTID:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT_T_TY]]* [[DEFAULT_LOC:@.+]]) // ALL: call {{.*}}void @__kmpc_critical([[IDENT_T_TY]]* [[DEFAULT_LOC]], i32 [[GTID]], [8 x i32]* [[THE_NAME_LOCK]]) + // ALL-NEXT: fence acquire // ALL: br label // ALL-NOT: call {{.*}}void @__kmpc_end_critical( // ALL: br label @@ -80,10 +87,12 @@ // NORMAL: [[S_REF:%.+]] = load %struct.S*, %struct.S** [[S_ADDR]], // NORMAL: store %struct.S* [[S_REF]], %struct.S** [[S_ADDR:%.+]], // ALL: call void @__kmpc_critical( + // ALL: fence acquire #pragma omp critical // ALL: [[S_REF:%.+]] = load %struct.S*, %struct.S** [[S_ADDR]], // ALL: [[S_A_REF:%.+]] = getelementptr inbounds %struct.S, %struct.S* [[S_REF]], i32 0, i32 0 ++s.a; + // ALL: fence release // ALL: call void @__kmpc_end_critical( } @@ -94,15 +103,17 @@ #pragma omp critical // TERM_DEBUG-NOT: __kmpc_global_thread_num // TERM_DEBUG: call void @__kmpc_critical({{.+}}), !dbg [[DBG_LOC_START:![0-9]+]] + // TERM_DEBUG: fence acquire // TERM_DEBUG: invoke void {{.*}}foo{{.*}}() // TERM_DEBUG: unwind label %[[TERM_LPAD:.+]], // TERM_DEBUG-NOT: __kmpc_global_thread_num + // TERM_DEBUG: fence release // TERM_DEBUG: call void @__kmpc_end_critical({{.+}}), !dbg [[DBG_LOC_END:![0-9]+]] // TERM_DEBUG: [[TERM_LPAD]] // TERM_DEBUG: call void @__clang_call_terminate // TERM_DEBUG: unreachable foo(); } -// TERM_DEBUG-DAG: [[DBG_LOC_START]] = !DILocation(line: [[@LINE-12]], +// TERM_DEBUG-DAG: [[DBG_LOC_START]] = !DILocation(line: [[@LINE-14]], // TERM_DEBUG-DAG: [[DBG_LOC_END]] = !DILocation(line: [[@LINE-3]], #endif diff --git a/clang/test/OpenMP/openmp_win_codegen.cpp b/clang/test/OpenMP/openmp_win_codegen.cpp --- a/clang/test/OpenMP/openmp_win_codegen.cpp +++ b/clang/test/OpenMP/openmp_win_codegen.cpp @@ -54,10 +54,12 @@ // CHECK: [[CATCHSWITCH:%.+]] = catchswitch within none // CHECK: [[CATCHPAD:%.+]] = catchpad within [[CATCHSWITCH]] // CHECK: call void @__kmpc_critical(%struct.ident_t* {{.*}}@0, i32 [[GID:%.+]], +// CHECK-NEXT: fence acquire // CHECK: invoke void @{{.+}}bar // CHECK: call void @__kmpc_end_critical(%struct.ident_t* {{.*}}@0, i32 [[GID]], // CHECK: catchret from [[CATCHPAD]] to // CHECK: cleanuppad within [[CATCHPAD]] [] +// CHECK-NEXT: fence release // CHECK-NEXT: call void @__kmpc_end_critical(%struct.ident_t* {{.*}}@0, i32 [[GID]], // CHECK-NEXT: cleanupret from {{.*}} unwind label %[[CATCHTERM:[^ ]+]] // CHECK: cleanuppad within none [] diff --git a/clang/test/OpenMP/simd_codegen.cpp b/clang/test/OpenMP/simd_codegen.cpp --- a/clang/test/OpenMP/simd_codegen.cpp +++ b/clang/test/OpenMP/simd_codegen.cpp @@ -243,6 +243,7 @@ // CHECK-NEXT: store i32 [[CONV]], i32* [[A_PRIV]],{{.+}}!llvm.access.group // OMP50-NEXT: [[IV:%.+]] = load i64, i64* [[OMP_IV7]],{{.+}}!llvm.access.group // OMP50RT: call void @__kmpc_critical(%struct.ident_t* {{.+}}, i32 [[GTID:%.+]], [8 x i32]* [[A_REGION:@.+]]),{{.+}}!llvm.access.group +// OMP50RT-NEXT: fence acquire // OMP50-NEXT: [[LAST_IV_VAL:%.+]] = load i64, i64* [[LAST_IV]],{{.+}}!llvm.access.group // OMP50-NEXT: [[CMP:%.+]] = icmp sle i64 [[LAST_IV_VAL]], [[IV]] // OMP50-NEXT: br i1 [[CMP]], label %[[LP_THEN:.+]], label %[[LP_DONE:[^,]+]] @@ -252,6 +253,7 @@ // OMP50-NEXT: store i32 [[A_VAL]], i32* [[LAST_A]],{{.+}}!llvm.access.group // OMP50-NEXT: br label %[[LP_DONE]] // OMP50: [[LP_DONE]]: +// OMP50RT-NEXT: fence release // OMP50RT-NEXT: call void @__kmpc_end_critical(%struct.ident_t* {{.+}}, i32 [[GTID]], [8 x i32]* [[A_REGION]]),{{.+}}!llvm.access.group A += i; // CHECK: [[IV7_2:%.+]] = load i64, i64* [[OMP_IV7]]{{.*}}!llvm.access.group diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -354,12 +354,15 @@ /// \param HasFinalize indicate if the directive will require finalization /// and has a finalization callback in the stack that /// should be called. + /// \param WithMemBarrier true if the memory fence instructions are required + /// to prevent optimizations. /// /// \return The insertion position in exit block InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall, - bool HasFinalize = true); + bool HasFinalize = true, + bool WithMemBarrier = false); /// Common Interface to generate OMP inlined regions /// @@ -374,6 +377,8 @@ /// \param HasFinalize indicate if the directive will require finalization /// and has a finalization callback in the stack that /// should be called. + /// \param WithMemBarrier true if the memory fence instructions are required + /// to prevent optimizations. /// /// \return The insertion point after the region @@ -381,7 +386,7 @@ EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional = false, - bool HasFinalize = true); + bool HasFinalize = true, bool WithMemBarrier = false); /// Get the platform-specific name separator. /// \param Parts different parts of the final name that needs separation diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -814,13 +814,14 @@ Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args); return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB, - /*Conditional*/ false, /*hasFinalize*/ true); + /*Conditional*/ false, /*hasFinalize*/ true, + /*WithMemBarrier=*/true); } OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion( Directive OMPD, Instruction *EntryCall, Instruction *ExitCall, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional, - bool HasFinalize) { + bool HasFinalize, bool WithMemBarrier) { if (HasFinalize) FinalizationStack.push_back({FiniCB, OMPD, /*IsCancellable*/ false}); @@ -837,6 +838,8 @@ Builder.SetInsertPoint(EntryBB->getTerminator()); emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional); + if (WithMemBarrier) + Builder.CreateFence(AtomicOrdering::Acquire); // generate body BodyGenCB(/* AllocaIP */ InsertPointTy(), @@ -861,7 +864,7 @@ assert(FiniBB->getTerminator()->getNumSuccessors() == 1 && FiniBB->getTerminator()->getSuccessor(0) == ExitBB && "Unexpected control flow graph state!!"); - emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize); + emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize, WithMemBarrier); assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB && "Unexpected Control Flow State!"); MergeBlockIntoPredecessor(FiniBB); @@ -919,7 +922,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit( omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall, - bool HasFinalize) { + bool HasFinalize, bool WithMemBarrier) { Builder.restoreIP(FinIP); @@ -940,6 +943,8 @@ Builder.SetInsertPoint(FiniBBTI); } + if (WithMemBarrier) + Builder.CreateFence(AtomicOrdering::Release); // place the Exitcall as last instruction before Finalization block terminator ExitCall->removeFromParent(); Builder.Insert(ExitCall);