diff --git a/clang/test/OpenMP/PR44893.c b/clang/test/OpenMP/PR44893.c new file mode 100644 --- /dev/null +++ b/clang/test/OpenMP/PR44893.c @@ -0,0 +1,15 @@ +// RUN: %clang -fopenmp -O -g -x c %s -c -disable-output + +// Do not crash ;) + +void foo() +{ +#pragma omp critical + ; +} + +void bar() +{ + foo(); + foo(); +} diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -188,7 +188,9 @@ ///} -private: + /// Return the insertion point used by the underlying IRBuilder. + InsertPointTy getInsertionPoint() { return Builder.saveIP(); } + /// Update the internal location to \p Loc. bool updateToLocation(const LocationDescription &Loc) { Builder.restoreIP(Loc.IP); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -201,6 +201,49 @@ return Changed; } + static Value *combinedIdentStruct(Value *Ident0, Value *Ident1, + bool GlobalOnly) { + // TODO: Figure out how to actually combine multiple debug locations. For + // now we just keep the first we find. + if (Ident0) + return Ident0; + if (!GlobalOnly || isa(Ident1)) + return Ident1; + return nullptr; + } + + /// Return an `struct ident_t*` value that represents the ones used in the + /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not + /// return a local `struct ident_t*`. For now, if we cannot find a suitable + /// return value we create one from scratch. We also do not yet combine + /// information, e.g., the source locations, see combinedIdentStruct. + Value *getCombinedIdentFromCallUsesIn(RuntimeFunctionInfo &RFI, Function &F, + bool GlobalOnly) { + Value *Ident = nullptr; + auto CombineIdentStruct = [&](Use &U, Function &Caller) { + CallInst *CI = getCallIfRegularCall(U, &RFI); + if (!CI || &F != &Caller) + return false; + Ident = combinedIdentStruct(Ident, CI->getArgOperand(0), + /* GlobalOnly */ true); + return false; + }; + RFI.foreachUse(CombineIdentStruct); + + if (!Ident) { + // The IRBuilder uses the insertion block to get to the module, this is + // unfortunate but we work around it for now. + if (!OMPBuilder.getInsertionPoint().getBlock()) + OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy( + &F.getEntryBlock(), F.getEntryBlock().begin())); + // Create a fallback location if non was found. + // TODO: Use the debug locations of the calls instead. + Constant *Loc = OMPBuilder.getOrCreateDefaultSrcLocStr(); + Ident = OMPBuilder.getOrCreateIdent(Loc); + } + return Ident; + } + /// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or /// \p ReplVal if given. bool deduplicateRuntimeCalls(Function &F, RuntimeFunctionInfo &RFI, @@ -216,9 +259,25 @@ assert((!ReplVal || (isa(ReplVal) && cast(ReplVal)->getParent() == &F)) && "Unexpected replacement value!"); + + // TODO: Use dominance to find a good position instead. + auto CanBeMoved = [](CallBase &CB) { + unsigned NumArgs = CB.getNumArgOperands(); + if (NumArgs == 0) + return true; + if (CB.getArgOperand(0)->getType() != IdentPtr) + return false; + for (unsigned u = 1; u < NumArgs; ++u) + if (isa(CB.getArgOperand(u))) + return false; + return true; + }; + if (!ReplVal) { for (Use *U : Uses) if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { + if (!CanBeMoved(*CI)) + continue; CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); ReplVal = CI; break; @@ -227,6 +286,18 @@ return false; } + // If we use a call as a replacement value we need to make sure the ident is + // valid at the new location. For now we just pick a global one, either + // existing and used by one of the calls, or created from scratch. + if (CallBase *CI = dyn_cast(ReplVal)) { + if (CI->getNumArgOperands() > 0 && + CI->getArgOperand(0)->getType() == IdentPtr) { + Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F, + /* GlobalOnly */ true); + CI->setArgOperand(0, Ident); + } + } + bool Changed = false; auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { CallInst *CI = getCallIfRegularCall(U, &RFI); diff --git a/llvm/test/Transforms/OpenMP/deduplication.ll b/llvm/test/Transforms/OpenMP/deduplication.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/OpenMP/deduplication.ll @@ -0,0 +1,223 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes +; RUN: opt -openmpopt -S < %s | FileCheck %s +; RUN: opt -passes=openmpopt -S < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +%struct.ident_t = type { i32, i32, i32, i32, i8* } + +@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str1, i32 0, i32 0) }, align 8 +@2 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str2, i32 0, i32 0) }, align 8 +@.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 +@.str1 = private unnamed_addr constant [23 x i8] c";file001;loc0001;0;0;;\00", align 1 +@.str2 = private unnamed_addr constant [23 x i8] c";file002;loc0002;0;0;;\00", align 1 + +; UTC_ARGS: --disable +; CHECK-DAG: @0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8 +; CHECK-DAG: @1 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str1, i32 0, i32 0) }, align 8 +; CHECK-DAG: @2 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str2, i32 0, i32 0) }, align 8 +; CHECK-DAG: @.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 +; CHECK-DAG: @.str1 = private unnamed_addr constant [23 x i8] c";file001;loc0001;0;0;;\00", align 1 +; CHECK-DAG: @.str2 = private unnamed_addr constant [23 x i8] c";file002;loc0002;0;0;;\00", align 1 +; CHECK-DAG: @3 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8 +; UTC_ARGS: --enable + + +declare i32 @__kmpc_global_thread_num(%struct.ident_t*) +declare void @useI32(i32) + +define void @external(i1 %c) { +; CHECK-LABEL: define {{[^@]+}}@external +; CHECK-SAME: (i1 [[C:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C2:%.*]] = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) +; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[E:%.*]] +; CHECK: t: +; CHECK-NEXT: call void @internal(i32 [[C2]], i32 [[C2]]) +; CHECK-NEXT: call void @useI32(i32 [[C2]]) +; CHECK-NEXT: br label [[M:%.*]] +; CHECK: e: +; CHECK-NEXT: call void @internal(i32 [[C2]], i32 [[C2]]) +; CHECK-NEXT: call void @useI32(i32 [[C2]]) +; CHECK-NEXT: br label [[M]] +; CHECK: m: +; CHECK-NEXT: call void @internal(i32 0, i32 [[C2]]) +; CHECK-NEXT: call void @useI32(i32 [[C2]]) +; CHECK-NEXT: ret void +; +entry: + br i1 %c, label %t, label %e +t: + %c0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + call void @internal(i32 %c0, i32 %c0) + call void @useI32(i32 %c0) + br label %m +e: + %c1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + call void @internal(i32 %c1, i32 %c1) + call void @useI32(i32 %c1) + br label %m +m: + %c2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + call void @internal(i32 0, i32 %c2) + call void @useI32(i32 %c2) + ret void +} + +define internal void @internal(i32 %not_gtid, i32 %gtid) { +; CHECK-LABEL: define {{[^@]+}}@internal +; CHECK-SAME: (i32 [[NOT_GTID:%.*]], i32 [[GTID:%.*]]) +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[GTID]], [[GTID]] +; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[E:%.*]] +; CHECK: t: +; CHECK-NEXT: call void @useI32(i32 [[GTID]]) +; CHECK-NEXT: call void @external(i1 [[C]]) +; CHECK-NEXT: br label [[M:%.*]] +; CHECK: e: +; CHECK-NEXT: call void @useI32(i32 [[GTID]]) +; CHECK-NEXT: br label [[M]] +; CHECK: m: +; CHECK-NEXT: call void @useI32(i32 [[GTID]]) +; CHECK-NEXT: ret void +; +entry: + %cc = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + %c = icmp eq i32 %cc, %gtid + br i1 %c, label %t, label %e +t: + %c0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + call void @useI32(i32 %c0) + call void @external(i1 %c) + br label %m +e: + %c1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + call void @useI32(i32 %c1) + br label %m +m: + %c2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + call void @useI32(i32 %c2) + ret void +} + + +define void @local_and_global_gtid_calls() { +; CHECK-LABEL: define {{[^@]+}}@local_and_global_gtid_calls() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) +; CHECK-NEXT: [[DOTKMPC_LOC_ADDR:%.*]] = alloca [[STRUCT_IDENT_T:%.*]], align 8 +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: ret void +; +entry: + %.kmpc_loc.addr = alloca %struct.ident_t, align 8 + %tid0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr) + %tid1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) + %tid2 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr) + call void @useI32(i32 %tid0) + call void @useI32(i32 %tid1) + call void @useI32(i32 %tid2) + %tid3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr) + %tid4 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) + %tid5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr) + call void @useI32(i32 %tid3) + call void @useI32(i32 %tid4) + call void @useI32(i32 %tid5) + ret void +} + +define void @local_gtid_calls_only() { +; CHECK-LABEL: define {{[^@]+}}@local_gtid_calls_only() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @3) +; CHECK-NEXT: [[DOTKMPC_LOC_ADDR1:%.*]] = alloca [[STRUCT_IDENT_T:%.*]], align 8 +; CHECK-NEXT: [[DOTKMPC_LOC_ADDR2:%.*]] = alloca [[STRUCT_IDENT_T]], align 8 +; CHECK-NEXT: [[DOTKMPC_LOC_ADDR3:%.*]] = alloca [[STRUCT_IDENT_T]], align 8 +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: ret void +; +entry: + %.kmpc_loc.addr1 = alloca %struct.ident_t, align 8 + %.kmpc_loc.addr2 = alloca %struct.ident_t, align 8 + %.kmpc_loc.addr3 = alloca %struct.ident_t, align 8 + %tid0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr1) + %tid1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr2) + %tid2 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr3) + call void @useI32(i32 %tid0) + call void @useI32(i32 %tid1) + call void @useI32(i32 %tid2) + %tid3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr1) + %tid4 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr2) + %tid5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr3) + call void @useI32(i32 %tid3) + call void @useI32(i32 %tid4) + call void @useI32(i32 %tid5) + ret void +} + +declare i32 @omp_get_level() +define void @local_and_global_glvl_calls() { +; CHECK-LABEL: define {{[^@]+}}@local_and_global_glvl_calls() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TID5:%.*]] = call i32 @omp_get_level() +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: ret void +; +entry: + %tid0 = call i32 @omp_get_level() + %tid1 = call i32 @omp_get_level() + %tid2 = call i32 @omp_get_level() + call void @useI32(i32 %tid0) + call void @useI32(i32 %tid1) + call void @useI32(i32 %tid2) + %tid3 = call i32 @omp_get_level() + %tid4 = call i32 @omp_get_level() + %tid5 = call i32 @omp_get_level() + call void @useI32(i32 %tid3) + call void @useI32(i32 %tid4) + call void @useI32(i32 %tid5) + ret void +} + +define void @local_glvl_calls_only() { +; CHECK-LABEL: define {{[^@]+}}@local_glvl_calls_only() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TID5:%.*]] = call i32 @omp_get_level() +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: ret void +; +entry: + %tid0 = call i32 @omp_get_level() + %tid1 = call i32 @omp_get_level() + %tid2 = call i32 @omp_get_level() + call void @useI32(i32 %tid0) + call void @useI32(i32 %tid1) + call void @useI32(i32 %tid2) + %tid3 = call i32 @omp_get_level() + %tid4 = call i32 @omp_get_level() + %tid5 = call i32 @omp_get_level() + call void @useI32(i32 %tid3) + call void @useI32(i32 %tid4) + call void @useI32(i32 %tid5) + ret void +} diff --git a/llvm/test/Transforms/OpenMP/gtid.ll b/llvm/test/Transforms/OpenMP/gtid.ll deleted file mode 100644 --- a/llvm/test/Transforms/OpenMP/gtid.ll +++ /dev/null @@ -1,86 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature -; RUN: opt -openmpopt -S < %s | FileCheck %s -; RUN: opt -passes=openmpopt -S < %s | FileCheck %s -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" - -%struct.ident_t = type { i32, i32, i32, i32, i8* } - -@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 -@.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 - -declare i32 @__kmpc_global_thread_num(%struct.ident_t*) -declare void @useI32(i32) - -define void @external(i1 %c) { -; CHECK-LABEL: define {{[^@]+}}@external -; CHECK-SAME: (i1 [[C:%.*]]) -; CHECK-NEXT: entry: -; CHECK-NEXT: [[C2:%.*]] = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) -; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[E:%.*]] -; CHECK: t: -; CHECK-NEXT: call void @internal(i32 [[C2]], i32 [[C2]]) -; CHECK-NEXT: call void @useI32(i32 [[C2]]) -; CHECK-NEXT: br label [[M:%.*]] -; CHECK: e: -; CHECK-NEXT: call void @internal(i32 [[C2]], i32 [[C2]]) -; CHECK-NEXT: call void @useI32(i32 [[C2]]) -; CHECK-NEXT: br label [[M]] -; CHECK: m: -; CHECK-NEXT: call void @internal(i32 0, i32 [[C2]]) -; CHECK-NEXT: call void @useI32(i32 [[C2]]) -; CHECK-NEXT: ret void -; -entry: - br i1 %c, label %t, label %e -t: - %c0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) - call void @internal(i32 %c0, i32 %c0) - call void @useI32(i32 %c0) - br label %m -e: - %c1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) - call void @internal(i32 %c1, i32 %c1) - call void @useI32(i32 %c1) - br label %m -m: - %c2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) - call void @internal(i32 0, i32 %c2) - call void @useI32(i32 %c2) - ret void -} - -define internal void @internal(i32 %not_gtid, i32 %gtid) { -; CHECK-LABEL: define {{[^@]+}}@internal -; CHECK-SAME: (i32 [[NOT_GTID:%.*]], i32 [[GTID:%.*]]) -; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[GTID]], [[GTID]] -; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[E:%.*]] -; CHECK: t: -; CHECK-NEXT: call void @useI32(i32 [[GTID]]) -; CHECK-NEXT: call void @external(i1 [[C]]) -; CHECK-NEXT: br label [[M:%.*]] -; CHECK: e: -; CHECK-NEXT: call void @useI32(i32 [[GTID]]) -; CHECK-NEXT: br label [[M]] -; CHECK: m: -; CHECK-NEXT: call void @useI32(i32 [[GTID]]) -; CHECK-NEXT: ret void -; -entry: - %cc = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) - %c = icmp eq i32 %cc, %gtid - br i1 %c, label %t, label %e -t: - %c0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) - call void @useI32(i32 %c0) - call void @external(i1 %c) - br label %m -e: - %c1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) - call void @useI32(i32 %c1) - br label %m -m: - %c2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) - call void @useI32(i32 %c2) - ret void -}