diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -153,6 +153,7 @@ Changed |= deduplicateRuntimeCalls(); Changed |= deleteParallelRegions(); + Changed |= analysisConstantThreadNum(); return Changed; } @@ -384,6 +385,33 @@ return Changed; } + bool analysisConstantThreadNum() { + for (Function *F : SCC) { + auto &RFI = RFIs[OMPRTL___kmpc_push_num_threads]; + auto *UV = RFI.getUseVector(*F); + if (UV) { + for (Use *U : *UV) { + if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { + auto *Arg = CI->getArgOperand(RFI.getNumArgs() - 1); + if (ConstantInt *C = dyn_cast(Arg)) { + + auto Remark = [&](OptimizationRemarkAnalysis ORA) { + return ORA << "Use of OpenMP parallel region with a constant (" + << ore::NV("ConstantThreadNumber", C) + << ") number of threads. "; + }; + emitRemark(CI, + "OpenMPAnalysisConstantThreadCount", Remark); + + } + } + } + } + } + + return false; + } + /// Collect arguments that represent the global thread id in \p GTIdArgs. void collectGlobalThreadIdArguments(SmallSetVector >IdArgs) { // TODO: Below we basically perform a fixpoint iteration with a pessimistic diff --git a/llvm/test/Transforms/OpenMP/constant_thread_count_analysis.ll b/llvm/test/Transforms/OpenMP/constant_thread_count_analysis.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/OpenMP/constant_thread_count_analysis.ll @@ -0,0 +1,119 @@ +; RUN: opt -openmpopt -pass-remarks-analysis=openmp-opt -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes=openmpopt -pass-remarks-analysis=openmp-opt -disable-output < %s 2>&1 | FileCheck %s +; ModuleID = 'constant_thread_count_analysis.ll' +source_filename = "constant_thread_count_analysis.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +%struct.ident_t = type { i32, i32, i32, i32, i8* } + + +@.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 +@0 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8 + +; CHECK: remark: constant_thread_count_analysis.c:6:5: Use of OpenMP parallel region with a constant (2) number of threads. +define dso_local void @constant() local_unnamed_addr !dbg !13 { + %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + call void @__kmpc_push_num_threads(%struct.ident_t* nonnull @0, i32 %1, i32 2), !dbg !16 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)), !dbg !16 + ret void, !dbg !17 +} + +define void @variable(i32 %0) local_unnamed_addr !dbg !18 { + %2 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0) + call void @__kmpc_push_num_threads(%struct.ident_t* nonnull @0, i32 %2, i32 %0), !dbg !24 + call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* nonnull @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)), !dbg !24 + ret void, !dbg !25 +} + +define void @.omp_outlined._debug__() !dbg !26 { + call void (...) @do_work(), !dbg !36 + ret void, !dbg !38 +} + +define void @.omp_outlined.(i32* %0, i32* %1) !dbg !39 { + call void @.omp_outlined._debug__(), !dbg !43 + ret void, !dbg !43 +} + +define void @.omp_outlined._debug__.1() unnamed_addr !dbg !44 { + call void (...) @do_work(), !dbg !48 + ret void, !dbg !50 +} + +define void @.omp_outlined..2(i32* noalias nocapture readnone %0, i32* noalias nocapture readnone %1) !dbg !51 { + call void @.omp_outlined._debug__.1(), !dbg !55 + ret void, !dbg !55 +} + +declare !dbg !4 void @do_work(...) + +declare i32 @__kmpc_global_thread_num(%struct.ident_t*) + +declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) + +declare !callback !56 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8, !9, !10, !11} +!llvm.ident = !{!12} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 10.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "constant_thread_count_analysis.c", directory: "/tmp") +!2 = !{} +!3 = !{!4} +!4 = !DISubprogram(name: "do_work", scope: !1, file: !1, line: 3, type: !5, spFlags: DISPFlagOptimized, retainedNodes: !2) +!5 = !DISubroutineType(types: !6) +!6 = !{null, null} +!7 = !{i32 7, !"Dwarf Version", i32 4} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{i32 7, !"PIC Level", i32 2} +!11 = !{i32 7, !"PIE Level", i32 2} +!12 = !{!"clang version 10.0.0 "} +!13 = distinct !DISubprogram(name: "constant", scope: !1, file: !1, line: 5, type: !14, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2) +!14 = !DISubroutineType(types: !15) +!15 = !{null} +!16 = !DILocation(line: 6, column: 5, scope: !13) +!17 = !DILocation(line: 8, column: 1, scope: !13) +!18 = distinct !DISubprogram(name: "variable", scope: !1, file: !1, line: 10, type: !19, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !22) +!19 = !DISubroutineType(types: !20) +!20 = !{null, !21} +!21 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!22 = !{!23} +!23 = !DILocalVariable(name: "n", arg: 1, scope: !18, file: !1, line: 10, type: !21) +!24 = !DILocation(line: 11, column: 5, scope: !18) +!25 = !DILocation(line: 13, column: 1, scope: !18) +!26 = distinct !DISubprogram(name: ".omp_outlined._debug__", scope: !1, file: !1, line: 7, type: !27, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !33) +!27 = !DISubroutineType(types: !28) +!28 = !{null, !29, !29} +!29 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !30) +!30 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !31) +!31 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !32, size: 64) +!32 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !21) +!33 = !{!34, !35} +!34 = !DILocalVariable(name: ".global_tid.", arg: 1, scope: !26, type: !29, flags: DIFlagArtificial) +!35 = !DILocalVariable(name: ".bound_tid.", arg: 2, scope: !26, type: !29, flags: DIFlagArtificial) +!36 = !DILocation(line: 7, column: 7, scope: !37) +!37 = distinct !DILexicalBlock(scope: !26, file: !1, line: 7, column: 5) +!38 = !DILocation(line: 7, column: 18, scope: !26) +!39 = distinct !DISubprogram(name: ".omp_outlined.", scope: !1, file: !1, line: 7, type: !27, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !40) +!40 = !{!41, !42} +!41 = !DILocalVariable(name: ".global_tid.", arg: 1, scope: !39, type: !29, flags: DIFlagArtificial) +!42 = !DILocalVariable(name: ".bound_tid.", arg: 2, scope: !39, type: !29, flags: DIFlagArtificial) +!43 = !DILocation(line: 7, column: 5, scope: !39) +!44 = distinct !DISubprogram(name: ".omp_outlined._debug__.1", scope: !1, file: !1, line: 12, type: !27, scopeLine: 12, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !45) +!45 = !{!46, !47} +!46 = !DILocalVariable(name: ".global_tid.", arg: 1, scope: !44, type: !29, flags: DIFlagArtificial) +!47 = !DILocalVariable(name: ".bound_tid.", arg: 2, scope: !44, type: !29, flags: DIFlagArtificial) +!48 = !DILocation(line: 12, column: 7, scope: !49) +!49 = distinct !DILexicalBlock(scope: !44, file: !1, line: 12, column: 5) +!50 = !DILocation(line: 12, column: 18, scope: !44) +!51 = distinct !DISubprogram(name: ".omp_outlined..2", scope: !1, file: !1, line: 12, type: !27, scopeLine: 12, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !52) +!52 = !{!53, !54} +!53 = !DILocalVariable(name: ".global_tid.", arg: 1, scope: !51, type: !29, flags: DIFlagArtificial) +!54 = !DILocalVariable(name: ".bound_tid.", arg: 2, scope: !51, type: !29, flags: DIFlagArtificial) +!55 = !DILocation(line: 12, column: 5, scope: !51) +!56 = !{!57} +!57 = !{i64 2, i64 -1, i64 -1, i1 true} +; ModuleID = 'constant_thread_count_analysis.ll'