diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1813,8 +1813,45 @@ return true; } +static const char *memFuncName(Intrinsic::ID ID) { + switch (ID) { + case Intrinsic::memcpy_inline: + return "memcpy_inline"; + case Intrinsic::memcpy: + return "memcpy"; + case Intrinsic::memset: + return "memset"; + case Intrinsic::memmove: + return "memmove"; + case Intrinsic::memmove_element_unordered_atomic: + return "memmove_element_unordered_atomic"; + case Intrinsic::memcpy_element_unordered_atomic: + return "memcpy_element_unordered_atomic"; + case Intrinsic::memset_element_unordered_atomic: + return "memset_element_unordered_atomic"; + default: + llvm_unreachable("unknown mem intrinsic"); + } +} + bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, MachineIRBuilder &MIRBuilder) { + if (auto *MI = dyn_cast(&CI)) { + if (auto *ConstantSize = dyn_cast(MI->getLength())) { + ORE->emit([&]() { + using namespace ore; + return OptimizationRemarkAnalysis("memsize", "static", &CI) + << NV("Function", memFuncName(ID)) << " of " + << NV("Size", ConstantSize->getZExtValue()) << " bytes"; + }); + } else { + ORE->emit([&]() { + using namespace ore; + return OptimizationRemarkAnalysis("memsize", "dynamic", &CI) + << NV("Function", memFuncName(ID)) << " with dynamic size"; + }); + } + } // If this is a simple intrinsic (that is, we just need to add a def of // a vreg, and uses for each arg operand, then translate it. @@ -2244,6 +2281,35 @@ Args.push_back(getOrCreateVRegs(*Arg)); } + if (auto *CI = dyn_cast(&CB)) { + if (auto *F = CI->getCalledFunction()) { + if (F->hasName()) { + if (auto *Name = llvm::StringSwitch(F->getName()) + .Cases("__memcpy_chk", "memcpy", "memcpy") + .Cases("__mempcpy_chk", "mempcpy", "mempcpy") + .Cases("__memset_chk", "memset", "memset") + .Cases("__memmove_chk", "memmove", "memmove") + .Default(nullptr)) { + if (auto *ConstantSize = + dyn_cast(CI->getArgOperand(2))) { + ORE->emit([&]() { + using namespace ore; + return OptimizationRemarkAnalysis("memsize", "static", &CB) + << NV("Function", Name) << " of " + << NV("Size", ConstantSize->getZExtValue()) << " bytes"; + }); + } else { + ORE->emit([&]() { + using namespace ore; + return OptimizationRemarkAnalysis("memsize", "dynamic", &CB) + << NV("Function", Name) << " with dynamic size"; + }); + } + } + } + } + } + // We don't set HasCalls on MFI here yet because call lowering may decide to // optimize into tail calls. Instead, we defer that to selection where a final // scan is done to check if any instructions are calls. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -6650,6 +6651,7 @@ bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { + const Function &F = getMachineFunction().getFunction(); // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast(Size); @@ -6658,11 +6660,26 @@ if (ConstantSize->isNullValue()) return Chain; + ORE->emit([&]() { + using namespace ore; + return OptimizationRemarkAnalysis("memsize", "static", dl.getDebugLoc(), + &F.getEntryBlock()) + << NV("Function", "memcpy") << " of " + << NV("Size", ConstantSize->getConstantIntValue()) << " bytes"; + }); + SDValue Result = getMemcpyLoadsAndStores( *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, isVol, false, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; + } else { + ORE->emit([&]() { + using namespace ore; + return OptimizationRemarkAnalysis("memsize", "dynamic", dl.getDebugLoc(), + &F.getEntryBlock()) + << NV("Function", "memcpy") << " with dynamic size"; + }); } // Then check to see if we should lower the memcpy with target-specific @@ -6764,6 +6781,8 @@ bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { + const Function &F = getMachineFunction().getFunction(); + // Check to see if we should lower the memmove to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast(Size); @@ -6772,11 +6791,26 @@ if (ConstantSize->isNullValue()) return Chain; + ORE->emit([&]() { + using namespace ore; + return OptimizationRemarkAnalysis("memsize", "static", dl.getDebugLoc(), + &F.getEntryBlock()) + << NV("Function", "memmove") << " of " + << NV("Size", ConstantSize->getConstantIntValue()) << " bytes"; + }); + SDValue Result = getMemmoveLoadsAndStores( *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, isVol, false, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; + } else { + ORE->emit([&]() { + using namespace ore; + return OptimizationRemarkAnalysis("memsize", "dynamic", dl.getDebugLoc(), + &F.getEntryBlock()) + << NV("Function", "memmove") << " with dynamic size"; + }); } // Then check to see if we should lower the memmove with target-specific @@ -6865,6 +6899,8 @@ SDValue Src, SDValue Size, Align Alignment, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo) { + const Function &F = getMachineFunction().getFunction(); + // Check to see if we should lower the memset to stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast(Size); @@ -6873,12 +6909,27 @@ if (ConstantSize->isNullValue()) return Chain; + ORE->emit([&]() { + using namespace ore; + return OptimizationRemarkAnalysis("memsize", "static", dl.getDebugLoc(), + &F.getEntryBlock()) + << NV("Function", "memset") << " of " + << NV("Size", ConstantSize->getConstantIntValue()) << " bytes"; + }); + SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, isVol, DstPtrInfo); if (Result.getNode()) return Result; + } else { + ORE->emit([&]() { + using namespace ore; + return OptimizationRemarkAnalysis("memsize", "dynamic", dl.getDebugLoc(), + &F.getEntryBlock()) + << NV("Function", "memset") << " with dynamic size"; + }); } // Then check to see if we should lower the memset with target-specific diff --git a/llvm/test/CodeGen/AArch64/memsize-remarks.ll b/llvm/test/CodeGen/AArch64/memsize-remarks.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/memsize-remarks.ll @@ -0,0 +1,208 @@ +; RUN: llc %s -pass-remarks-analysis=memsize -o /dev/null 2>&1 | FileCheck %s --check-prefix=SDAG --implicit-check-not=SDAG +; RUN: llc %s -pass-remarks-analysis=memsize -global-isel -o /dev/null 2>&1 | FileCheck %s --check-prefix=GISEL --implicit-check-not=GISEL + +source_filename = "memsize.c" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-ios7.0.0" + +declare i8* @__memmove_chk(i8*, i8*, i64, i64) #1 + +declare i8* @__memcpy_chk(i8*, i8*, i64, i64) #1 + +declare i8* @__memset_chk(i8*, i32, i64, i64) #1 + +declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) #2 + +define void @memcpy_dynamic(i8* %d, i8* %s, i64 %l) #0 !dbg !14 { +entry: + %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %d, i1 false, i1 true, i1 false), !dbg !16 +; SDAG: remark: memsize.c:4:3: memcpy with dynamic size +; GISEL: remark: memsize.c:4:3: memcpy with dynamic size + %call = call i8* @__memcpy_chk(i8* %d, i8* %s, i64 %l, i64 %0) #4, !dbg !17 + ret void, !dbg !18 +} + +define void @memcpy_single(i8* %d, i8* %s, i64 %l) #0 !dbg !23 { +entry: + %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %d, i1 false, i1 true, i1 false), !dbg !24 +; SDAG: remark: memsize.c:10:3: memcpy of 1 bytes +; GISEL: remark: memsize.c:10:3: memcpy of 1 bytes + %call = call i8* @__memcpy_chk(i8* %d, i8* %s, i64 1, i64 %0) #4, !dbg !25 + ret void, !dbg !26 +} + +define void @memcpy_static(i8* %d, i8* %s, i64 %l) #0 !dbg !27 { +entry: + %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %d, i1 false, i1 true, i1 false), !dbg !28 +; SDAG: remark: memsize.c:13:3: memcpy of 100 bytes +; GISEL: remark: memsize.c:13:3: memcpy of 100 bytes + %call = call i8* @__memcpy_chk(i8* %d, i8* %s, i64 100, i64 %0) #4, !dbg !29 + ret void, !dbg !30 +} + +define void @memcpy_huge(i8* %d, i8* %s, i64 %l) #0 !dbg !31 { +entry: + %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %d, i1 false, i1 true, i1 false), !dbg !32 +; SDAG: remark: memsize.c:16:3: memcpy of 100000 bytes +; GISEL: remark: memsize.c:16:3: memcpy of 100000 bytes + %call = call i8* @__memcpy_chk(i8* %d, i8* %s, i64 100000, i64 %0) #4, !dbg !33 + ret void, !dbg !34 +} + +define void @memmove_dynamic(i8* %d, i8* %s, i64 %l) #0 { +entry: + %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %d, i1 false, i1 true, i1 false) +; SDAG: remark: :0:0: memmove with dynamic size +; GISEL: remark: :0:0: memmove with dynamic size + %call = call i8* @__memmove_chk(i8* %d, i8* %s, i64 %l, i64 %0) #4 + ret void +} + +define void @memmove_single(i8* %d, i8* %s, i64 %l) #0 { +entry: + %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %d, i1 false, i1 true, i1 false) +; SDAG: remark: :0:0: memmove of 1 bytes +; GISEL: remark: :0:0: memmove of 1 bytes + %call = call i8* @__memmove_chk(i8* %d, i8* %s, i64 1, i64 %0) #4 + ret void +} + +define void @memmove_static(i8* %d, i8* %s, i64 %l) #0 { +entry: + %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %d, i1 false, i1 true, i1 false) +; SDAG: remark: :0:0: memmove of 100 bytes +; GISEL: remark: :0:0: memmove of 100 bytes + %call = call i8* @__memmove_chk(i8* %d, i8* %s, i64 100, i64 %0) #4 + ret void +} + +define void @memmove_huge(i8* %d, i8* %s, i64 %l) #0 { +entry: + %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %d, i1 false, i1 true, i1 false) +; SDAG: remark: :0:0: memmove of 100000 bytes +; GISEL: remark: :0:0: memmove of 100000 bytes + %call = call i8* @__memmove_chk(i8* %d, i8* %s, i64 100000, i64 %0) #4 + ret void +} + +define void @memset_dynamic(i8* %d, i64 %l) #0 !dbg !38 { +entry: + %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %d, i1 false, i1 true, i1 false), !dbg !39 +; SDAG: remark: memsize.c:22:3: memset with dynamic size +; GISEL: remark: memsize.c:22:3: memset with dynamic size + %call = call i8* @__memset_chk(i8* %d, i32 0, i64 %l, i64 %0) #4, !dbg !40 + ret void, !dbg !41 +} + +define void @memset_single(i8* %d, i64 %l) #0 !dbg !46 { +entry: + %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %d, i1 false, i1 true, i1 false), !dbg !47 +; SDAG: remark: memsize.c:28:3: memset of 1 bytes +; GISEL: remark: memsize.c:28:3: memset of 1 bytes + %call = call i8* @__memset_chk(i8* %d, i32 0, i64 1, i64 %0) #4, !dbg !48 + ret void, !dbg !49 +} + +define void @memset_static(i8* %d, i64 %l) #0 !dbg !50 { +entry: + %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %d, i1 false, i1 true, i1 false), !dbg !51 +; SDAG: remark: memsize.c:31:3: memset of 100 bytes +; GISEL: remark: memsize.c:31:3: memset of 100 bytes + %call = call i8* @__memset_chk(i8* %d, i32 0, i64 100, i64 %0) #4, !dbg !52 + ret void, !dbg !53 +} + +define void @memset_huge(i8* %d, i64 %l) #0 !dbg !54 { +entry: + %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %d, i1 false, i1 true, i1 false), !dbg !55 +; SDAG: remark: memsize.c:34:3: memset of 100000 bytes +; GISEL: remark: memsize.c:34:3: memset of 100000 bytes + %call = call i8* @__memset_chk(i8* %d, i32 0, i64 100000, i64 %0) #4, !dbg !56 + ret void, !dbg !57 +} + +define void @memset_empty(i8* %d, i64 %l) #0 !dbg !42 { +entry: + %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %d, i1 false, i1 true, i1 false), !dbg !43 +; GISEL: remark: memsize.c:25:3: memset of 0 bytes + %call = call i8* @__memset_chk(i8* %d, i32 0, i64 0, i64 %0) #4, !dbg !44 + ret void, !dbg !45 +} + +define void @memcpy_empty(i8* %d, i8* %s, i64 %l) #0 !dbg !19 { +entry: + %0 = call i64 @llvm.objectsize.i64.p0i8(i8* %d, i1 false, i1 true, i1 false), !dbg !20 +; GISEL: remark: memsize.c:7:3: memcpy of 0 bytes + %call = call i8* @__memcpy_chk(i8* %d, i8* %s, i64 0, i64 %0) #4, !dbg !21 + ret void, !dbg !22 +} + +attributes #0 = { noinline nounwind ssp uwtable "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-a7" "target-features"="+aes,+crypto,+fp-armv8,+neon,+sha2,+zcm,+zcz" } +attributes #1 = { nounwind "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-a7" "target-features"="+aes,+crypto,+fp-armv8,+neon,+sha2,+zcm,+zcz" } +attributes #2 = { nofree nosync nounwind readnone speculatable willreturn } +attributes #3 = { argmemonly nofree nosync nounwind willreturn } +attributes #4 = { nounwind } + +!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9} +!llvm.dbg.cu = !{!10} +!llvm.ident = !{!13} + +!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 12, i32 0]} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = !{i32 1, !"wchar_size", i32 4} +!3 = !{i32 1, !"branch-target-enforcement", i32 0} +!4 = !{i32 1, !"sign-return-address", i32 0} +!5 = !{i32 1, !"sign-return-address-all", i32 0} +!6 = !{i32 1, !"sign-return-address-with-bkey", i32 0} +!7 = !{i32 7, !"PIC Level", i32 2} +!8 = !{i32 7, !"uwtable", i32 1} +!9 = !{i32 7, !"frame-pointer", i32 1} +!10 = distinct !DICompileUnit(language: DW_LANG_C99, file: !11, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !12, splitDebugInlining: false, nameTableKind: None, sysroot: "/") +!11 = !DIFile(filename: "memsize.c", directory: "") +!12 = !{} +!13 = !{!"clang"} +!14 = distinct !DISubprogram(name: "memcpy_dynamic", scope: !11, file: !11, line: 3, type: !15, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !10, retainedNodes: !12) +!15 = !DISubroutineType(types: !12) +!16 = !DILocation(line: 4, column: 36, scope: !14) +!17 = !DILocation(line: 4, column: 3, scope: !14) +!18 = !DILocation(line: 5, column: 1, scope: !14) +!19 = distinct !DISubprogram(name: "memcpy_empty", scope: !11, file: !11, line: 6, type: !15, scopeLine: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !10, retainedNodes: !12) +!20 = !DILocation(line: 7, column: 36, scope: !19) +!21 = !DILocation(line: 7, column: 3, scope: !19) +!22 = !DILocation(line: 8, column: 1, scope: !19) +!23 = distinct !DISubprogram(name: "memcpy_single", scope: !11, file: !11, line: 9, type: !15, scopeLine: 9, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !10, retainedNodes: !12) +!24 = !DILocation(line: 10, column: 36, scope: !23) +!25 = !DILocation(line: 10, column: 3, scope: !23) +!26 = !DILocation(line: 11, column: 1, scope: !23) +!27 = distinct !DISubprogram(name: "memcpy_static", scope: !11, file: !11, line: 12, type: !15, scopeLine: 12, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !10, retainedNodes: !12) +!28 = !DILocation(line: 13, column: 38, scope: !27) +!29 = !DILocation(line: 13, column: 3, scope: !27) +!30 = !DILocation(line: 14, column: 1, scope: !27) +!31 = distinct !DISubprogram(name: "memcpy_huge", scope: !11, file: !11, line: 15, type: !15, scopeLine: 15, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !10, retainedNodes: !12) +!32 = !DILocation(line: 16, column: 41, scope: !31) +!33 = !DILocation(line: 16, column: 3, scope: !31) +!34 = !DILocation(line: 17, column: 1, scope: !31) +!35 = distinct !DISubprogram(name: "memcpy_inline", scope: !11, file: !11, line: 18, type: !15, scopeLine: 18, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !10, retainedNodes: !12) +!36 = !DILocation(line: 19, column: 3, scope: !35) +!37 = !DILocation(line: 20, column: 1, scope: !35) +!38 = distinct !DISubprogram(name: "memset_dynamic", scope: !11, file: !11, line: 21, type: !15, scopeLine: 21, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !10, retainedNodes: !12) +!39 = !DILocation(line: 22, column: 36, scope: !38) +!40 = !DILocation(line: 22, column: 3, scope: !38) +!41 = !DILocation(line: 23, column: 1, scope: !38) +!42 = distinct !DISubprogram(name: "memset_empty", scope: !11, file: !11, line: 24, type: !15, scopeLine: 24, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !10, retainedNodes: !12) +!43 = !DILocation(line: 25, column: 36, scope: !42) +!44 = !DILocation(line: 25, column: 3, scope: !42) +!45 = !DILocation(line: 26, column: 1, scope: !42) +!46 = distinct !DISubprogram(name: "memset_single", scope: !11, file: !11, line: 27, type: !15, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !10, retainedNodes: !12) +!47 = !DILocation(line: 28, column: 36, scope: !46) +!48 = !DILocation(line: 28, column: 3, scope: !46) +!49 = !DILocation(line: 29, column: 1, scope: !46) +!50 = distinct !DISubprogram(name: "memset_static", scope: !11, file: !11, line: 30, type: !15, scopeLine: 30, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !10, retainedNodes: !12) +!51 = !DILocation(line: 31, column: 38, scope: !50) +!52 = !DILocation(line: 31, column: 3, scope: !50) +!53 = !DILocation(line: 32, column: 1, scope: !50) +!54 = distinct !DISubprogram(name: "memset_huge", scope: !11, file: !11, line: 33, type: !15, scopeLine: 33, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !10, retainedNodes: !12) +!55 = !DILocation(line: 34, column: 41, scope: !54) +!56 = !DILocation(line: 34, column: 3, scope: !54) +!57 = !DILocation(line: 35, column: 1, scope: !54) +!58 = distinct !DISubprogram(name: "auto_init", scope: !11, file: !11, line: 37, type: !15, scopeLine: 37, spFlags: DISPFlagDefinition, unit: !10, retainedNodes: !12)