diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -188,7 +188,9 @@ ///} -private: + /// Return the insertion point used by the underlying IRBuilder. + InsertPointTy getInsertionPoint() { return Builder.saveIP(); } + /// Update the internal location to \p Loc. bool updateToLocation(const LocationDescription &Loc) { Builder.restoreIP(Loc.IP); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -201,6 +201,49 @@ return Changed; } + static Value *combinedIdentStruct(Value *Ident0, Value *Ident1, + bool GlobalOnly) { + // TODO: Figure out how to actually combine multiple debug locations. For + // now we just keep the first we find. + if (Ident0) + return Ident0; + if (!GlobalOnly || isa(Ident1)) + return Ident1; + return nullptr; + } + + /// Return an `struct ident_t*` value that represents the ones used in the + /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not + /// return a local `struct ident_t*`. For now, if we cannot find a suitable + /// return value we create one from scratch. We also do not yet combine + /// information, e.g., the source locations, see combinedIdentStruct. + Value *getCombinedIdentFromCallUsesIn(RuntimeFunctionInfo &RFI, Function &F, + bool GlobalOnly) { + Value *Ident = nullptr; + auto CombineIdentStruct = [&](Use &U, Function &Caller) { + CallInst *CI = getCallIfRegularCall(U, &RFI); + if (!CI || &F != &Caller) + return false; + Ident = combinedIdentStruct(Ident, CI->getArgOperand(0), + /* GlobalOnly */ true); + return false; + }; + RFI.foreachUse(CombineIdentStruct); + + if (!Ident) { + // The IRBuilder uses the insertion block to get to the module, this is + // unfortunate but we work around it for now. + if (!OMPBuilder.getInsertionPoint().getBlock()) + OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy( + &F.getEntryBlock(), F.getEntryBlock().begin())); + // Create a fallback location if non was found. + // TODO: Use the debug locations of the calls instead. + Constant *Loc = OMPBuilder.getOrCreateDefaultSrcLocStr(); + Ident = OMPBuilder.getOrCreateIdent(Loc); + } + return Ident; + } + /// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or /// \p ReplVal if given. bool deduplicateRuntimeCalls(Function &F, RuntimeFunctionInfo &RFI, @@ -227,6 +270,15 @@ return false; } + // If we use a call as a replacement value we need to make sure the ident is + // valid at the new location. For now we just pick a global one, either + // existing and used by one of the calls, or created from scratch. + if (CallBase *CI = dyn_cast(ReplVal)) { + Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F, + /* GlobalOnly */ true); + CI->setArgOperand(0, Ident); + } + bool Changed = false; auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { CallInst *CI = getCallIfRegularCall(U, &RFI); diff --git a/llvm/test/Transforms/OpenMP/gtid.ll b/llvm/test/Transforms/OpenMP/gtid.ll --- a/llvm/test/Transforms/OpenMP/gtid.ll +++ b/llvm/test/Transforms/OpenMP/gtid.ll @@ -1,12 +1,27 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes ; RUN: opt -openmpopt -S < %s | FileCheck %s ; RUN: opt -passes=openmpopt -S < %s | FileCheck %s target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" %struct.ident_t = type { i32, i32, i32, i32, i8* } -@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 -@.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 +@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8 +@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str1, i32 0, i32 0) }, align 8 +@2 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str2, i32 0, i32 0) }, align 8 +@.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 +@.str1 = private unnamed_addr constant [23 x i8] c";file001;loc0001;0;0;;\00", align 1 +@.str2 = private unnamed_addr constant [23 x i8] c";file002;loc0002;0;0;;\00", align 1 + +; UTC_ARGS: --disable +; CHECK-DAG: @0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8 +; CHECK-DAG: @1 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str1, i32 0, i32 0) }, align 8 +; CHECK-DAG: @2 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str2, i32 0, i32 0) }, align 8 +; CHECK-DAG: @.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 +; CHECK-DAG: @.str1 = private unnamed_addr constant [23 x i8] c";file001;loc0001;0;0;;\00", align 1 +; CHECK-DAG: @.str2 = private unnamed_addr constant [23 x i8] c";file002;loc0002;0;0;;\00", align 1 +; CHECK-DAG: @3 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8 +; UTC_ARGS: --enable + declare i32 @__kmpc_global_thread_num(%struct.ident_t*) declare void @useI32(i32) @@ -84,3 +99,68 @@ call void @useI32(i32 %c2) ret void } + + +define void @local_and_global_gtid_calls() { +; CHECK-LABEL: define {{[^@]+}}@local_and_global_gtid_calls() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) +; CHECK-NEXT: [[DOTKMPC_LOC_ADDR:%.*]] = alloca [[STRUCT_IDENT_T:%.*]], align 8 +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: ret void +; +entry: + %.kmpc_loc.addr = alloca %struct.ident_t, align 8 + %tid0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr) + %tid1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1) + %tid2 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr) + call void @useI32(i32 %tid0) + call void @useI32(i32 %tid1) + call void @useI32(i32 %tid2) + %tid3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr) + %tid4 = call i32 @__kmpc_global_thread_num(%struct.ident_t* @2) + %tid5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr) + call void @useI32(i32 %tid3) + call void @useI32(i32 %tid4) + call void @useI32(i32 %tid5) + ret void +} + +define void @local_gtid_calls_only() { +; CHECK-LABEL: define {{[^@]+}}@local_gtid_calls_only() +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TID5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @3) +; CHECK-NEXT: [[DOTKMPC_LOC_ADDR1:%.*]] = alloca [[STRUCT_IDENT_T:%.*]], align 8 +; CHECK-NEXT: [[DOTKMPC_LOC_ADDR2:%.*]] = alloca [[STRUCT_IDENT_T]], align 8 +; CHECK-NEXT: [[DOTKMPC_LOC_ADDR3:%.*]] = alloca [[STRUCT_IDENT_T]], align 8 +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: call void @useI32(i32 [[TID5]]) +; CHECK-NEXT: ret void +; +entry: + %.kmpc_loc.addr1 = alloca %struct.ident_t, align 8 + %.kmpc_loc.addr2 = alloca %struct.ident_t, align 8 + %.kmpc_loc.addr3 = alloca %struct.ident_t, align 8 + %tid0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr1) + %tid1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr2) + %tid2 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr3) + call void @useI32(i32 %tid0) + call void @useI32(i32 %tid1) + call void @useI32(i32 %tid2) + %tid3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr1) + %tid4 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr2) + %tid5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* %.kmpc_loc.addr3) + call void @useI32(i32 %tid3) + call void @useI32(i32 %tid4) + call void @useI32(i32 %tid5) + ret void +}