diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -798,13 +798,6 @@ return Count; } - sampleprof_error addBodySamplesForProbe(uint32_t Index, uint64_t Num, - uint64_t Weight = 1) { - SampleRecord S; - S.addSamples(Num, Weight); - return BodySamples[LineLocation(Index, 0)].merge(S, Weight); - } - // Accumulate all call target samples to update the body samples. void updateCallsiteSamples() { for (auto &I : BodySamples) { diff --git a/llvm/test/tools/llvm-profgen/Inputs/fs-discriminator-probe.raw.prof b/llvm/test/tools/llvm-profgen/Inputs/fs-discriminator-probe.raw.prof new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/Inputs/fs-discriminator-probe.raw.prof @@ -0,0 +1,72 @@ +43 +650-66c:445 +650-675:409 +650-68b:175 +650-6a2:66 +685-68b:199 +685-6a2:205 +688-68b:453 +68d-6a2:782 +6b0-6b7:775 +6b0-6c0:2778 +6b0-6dc:856 +6b0-6f1:1550 +6b9-6c0:463 +6b9-6dc:122 +6b9-6f1:211 +6d4-6dc:2259 +6d4-6f1:1019 +700-71c:508 +720-734:194 +720-73c:751 +720-741:382 +720-751:1226 +722-734:306 +722-73c:2808 +722-751:790 +736-73c:315 +736-741:196 +758-76e:503 +770-77a:849 +770-798:678 +790-798:910 +79a-7ab:1478 +7b0-7b9:885 +7b0-7c3:682 +7bb-7c3:873 +800-81f:9 +824-852:9 +860-860:2542 +865-894:8 +865-8b0:2549 +865-8b5:302 +883-8b0:246 +89a-8b5:8 +27 +ffffffffffc00001->865:2868 +66c->688:458 +675->685:423 +68b->6b9:848 +6a2->79a:1086 +6b7->68d:793 +6c0->6d4:3343 +6dc->6b0:3340 +6f1->6b0:2873 +71c->736:520 +734->758:516 +73c->722:4012 +741->720:600 +751->720:2079 +76e->79a:524 +77a->7bb:879 +798->650:1117 +798->700:516 +7ab->770:1526 +7b9->790:916 +7c3->7b0:1597 +7c3->824:9 +81f->770:10 +852->89a:9 +894->800:9 +8b0->860:2831 +8b5->883:321 diff --git a/llvm/test/tools/llvm-profgen/fs-discriminator-probe.test b/llvm/test/tools/llvm-profgen/fs-discriminator-probe.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-profgen/fs-discriminator-probe.test @@ -0,0 +1,139 @@ +; RUN: llvm-profgen --unsymbolized-profile=%S/Inputs/fs-discriminator-probe.raw.prof --binary=%S/Inputs/fs-discriminator-probe.perfbin --output=%t1 +; RUN: llvm-profdata show --sample --show-sec-info-only %t1 | FileCheck %s --check-prefix=CHECK-SECTION +; RUN: llvm-profdata merge --sample %t1 -o %t2 --text +; RUN: FileCheck %s --input-file %t2 --check-prefix=CHECK + +; CHECK-SECTION: ProfileSummarySection - Offset: [[#]], Size: [[#]], Flags: {fs-discriminator} + +; CHECK: partition_pivot_first:29661:2739 +; CHECK-NEXT: 1: 2739 +; CHECK-NEXT: 2.1: 2739 +; CHECK-NEXT: 2.11265: 0 +; CHECK-NEXT: 3: 6457 +; CHECK-NEXT: 4: 508 +; CHECK-NEXT: 5.1: 508 +; CHECK-NEXT: 6.2: 508 +; CHECK-NEXT: 7.3: 2780 +; CHECK-NEXT: 8.4: 0 +; CHECK-NEXT: 9.2: 0 +; CHECK-NEXT: 10: 6457 +; CHECK-NEXT: 11: swap:508 +; CHECK-NEXT: 1.7680: 508 +; CHECK-NEXT: !CFGChecksum: 4294967295 +; CHECK-NEXT: 12: swap:6457 +; CHECK-NEXT: 1.7168: 6457 +; CHECK-NEXT: !CFGChecksum: 4294967295 +; CHECK-NEXT: !CFGChecksum: 563159988274199 +; CHECK-NEXT: main:16724:1478 +; CHECK-NEXT: 1: 1478 +; CHECK-NEXT: 2.1: 1478 +; CHECK-NEXT: 2.3585: 0 +; CHECK-NEXT: 3: 3105 +; CHECK-NEXT: 5.1: 9 +; CHECK-NEXT: 5.1537: 0 +; CHECK-NEXT: 6: 0 +; CHECK-NEXT: 7: 2859 +; CHECK-NEXT: 8.1: 9 +; CHECK-NEXT: 10.3: 0 +; CHECK-NEXT: 11.2: 0 +; CHECK-NEXT: 12: 0 +; CHECK-NEXT: 13.2: 0 +; CHECK-NEXT: 14: 3105 +; CHECK-NEXT: 15: 1567 +; CHECK-NEXT: 16: 9 +; CHECK-NEXT: 17: 0 +; CHECK-NEXT: 18: 3105 +; CHECK-NEXT: !CFGChecksum: 1126178599120658 +; CHECK-NEXT: partition_pivot_last:10497:0 +; CHECK-NEXT: 1: 0 +; CHECK-NEXT: 2.1: 0 +; CHECK-NEXT: 2.6145: 1095 +; CHECK-NEXT: 2.7681: 241 +; CHECK-NEXT: 2.8193: 0 +; CHECK-NEXT: 3: 1095 +; CHECK-NEXT: 4: 0 +; CHECK-NEXT: 4.3072: 1098 +; CHECK-NEXT: 4.12800: 0 +; CHECK-NEXT: 5: 0 +; CHECK-NEXT: 5.5632: 0 +; CHECK-NEXT: 5.6144: 1053 +; CHECK-NEXT: 6.1: 1095 +; CHECK-NEXT: 6.12801: 0 +; CHECK-NEXT: 6.13825: 241 +; CHECK-NEXT: 7.2: 1095 +; CHECK-NEXT: 7.7170: 241 +; CHECK-NEXT: 7.7682: 0 +; CHECK-NEXT: 8: 1095 +; CHECK-NEXT: 9: swap:1053 +; CHECK-NEXT: 1.1024: 1053 +; CHECK-NEXT: 1.4608: 0 +; CHECK-NEXT: 1.15360: 0 +; CHECK-NEXT: !CFGChecksum: 4294967295 +; CHECK-NEXT: 10: swap:1095 +; CHECK-NEXT: 1.14848: 1095 +; CHECK-NEXT: !CFGChecksum: 4294967295 +; CHECK-NEXT: !CFGChecksum: 563108639284859 +; CHECK-NEXT: quick_sort:4881:2519 +; CHECK-NEXT: 1: 2016 +; CHECK-NEXT: 1.15360: 503 +; CHECK-NEXT: 2: 503 +; CHECK-NEXT: 3: 678 +; CHECK-NEXT: 4: 503 +; CHECK-NEXT: 5: 678 +; CHECK-NEXT: !CFGChecksum: 844480566202114 + + + +; original code: +; clang -O3 -g -mllvm --enable-fs-discriminator -fdebug-info-for-profiling -fpseudo-probe-for-profiling qsort.c -o a.out +#include +#include + +void swap(int *a, int *b) { + int t = *a; + *a = *b; + *b = t; +} + +int partition_pivot_last(int* array, int low, int high) { + int pivot = array[high]; + int i = low - 1; + for (int j = low; j < high; j++) + if (array[j] < pivot) + swap(&array[++i], &array[j]); + swap(&array[i + 1], &array[high]); + return (i + 1); +} + +int partition_pivot_first(int* array, int low, int high) { + int pivot = array[low]; + int i = low + 1; + for (int j = low + 1; j <= high; j++) + if (array[j] < pivot) { if (j != i) swap(&array[i], &array[j]); i++;} + swap(&array[i - 1], &array[low]); + return i - 1; +} + +void quick_sort(int* array, int low, int high, int (*partition_func)(int *, int, int)) { + if (low < high) { + int pi = (*partition_func)(array, low, high); + quick_sort(array, low, pi - 1, partition_func); + quick_sort(array, pi + 1, high, partition_func); + } +} + +int main() { + const int size = 200; + int sum = 0; + int *array = malloc(size * sizeof(int)); + for(int i = 0; i < 100 * 1000; i++) { + for(int j = 0; j < size; j++) + array[j] = j % 10 ? rand() % size: j; + int (*fptr)(int *, int, int) = i % 3 ? partition_pivot_last : partition_pivot_first; + quick_sort(array, 0, size - 1, fptr); + sum += array[i % size]; + } + printf("sum=%d\n", sum); + + return 0; +} diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -480,8 +480,6 @@ } void HybridPerfReader::unwindSamples() { - if (Binary->useFSDiscriminator()) - exitWithError("FS discriminator is not supported in CS profile."); VirtualUnwinder Unwinder(&SampleCounters, Binary); for (const auto &Item : AggregatedSamples) { const PerfSample *Sample = Item.first.getPtr(); diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp --- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp +++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp @@ -123,8 +123,6 @@ bool ProfileIsCS) { std::unique_ptr Generator; if (ProfileIsCS) { - if (Binary->useFSDiscriminator()) - exitWithError("FS discriminator is not supported in CS profile."); Generator.reset(new CSProfileGenerator(Binary, SampleCounters)); } else { Generator.reset(new ProfileGenerator(Binary, SampleCounters)); @@ -140,8 +138,6 @@ bool ProfileIsCS) { std::unique_ptr Generator; if (ProfileIsCS) { - if (Binary->useFSDiscriminator()) - exitWithError("FS discriminator is not supported in CS profile."); Generator.reset(new CSProfileGenerator(Binary, Profiles)); } else { Generator.reset(new ProfileGenerator(Binary, std::move(Profiles))); @@ -562,7 +558,8 @@ Binary->getInlineContextForProbe(Probe, FrameVec, true); FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(FrameVec, Count); - FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); + FunctionProfile.addBodySamples(Probe->getIndex(), Probe->getDiscriminator(), + Count); if (Probe->isEntry()) FunctionProfile.addHeadSamples(Count); } @@ -593,7 +590,9 @@ FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(FrameVec, 0); FunctionProfile.addCalledTargetSamples( - FrameVec.back().Location.LineOffset, 0, CalleeName, Count); + FrameVec.back().Location.LineOffset, + FrameVec.back().Location.Discriminator, + CalleeName, Count); } } } @@ -1160,7 +1159,8 @@ // collected for non-danglie probes. This is for reporting all of the // zero count probes of the frame later. FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile); - FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count); + FunctionProfile.addBodySamples(Probe->getIndex(), Probe->getDiscriminator(), + Count); FunctionProfile.addTotalSamples(Count); if (Probe->isEntry()) { FunctionProfile.addHeadSamples(Count); @@ -1172,14 +1172,17 @@ // context id to infer caller's context id to ensure they share the // same context prefix. uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset; + uint64_t CallerDiscriminator = ContextNode->getCallSiteLoc().Discriminator; assert(CallerIndex && "Inferred caller's location index shouldn't be zero!"); + assert(!CallerDiscriminator && + "Callsite probe should not have a discriminator!"); FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode); CallerProfile.setFunctionHash(InlinerDesc->FuncHash); - CallerProfile.addBodySamples(CallerIndex, 0, Count); + CallerProfile.addBodySamples(CallerIndex, CallerDiscriminator, Count); CallerProfile.addTotalSamples(Count); - CallerProfile.addCalledTargetSamples(CallerIndex, 0, + CallerProfile.addCalledTargetSamples(CallerIndex, CallerDiscriminator, ContextNode->getFuncName(), Count); } } @@ -1191,7 +1194,8 @@ for (auto &I : FrameSamples) { for (auto *FunctionProfile : I.second) { for (auto *Probe : I.first->getProbes()) { - FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0); + FunctionProfile->addBodySamples(Probe->getIndex(), + Probe->getDiscriminator(), 0); } } } @@ -1214,8 +1218,9 @@ StringRef CalleeName = getCalleeNameForAddress(TargetAddress); if (CalleeName.size() == 0) continue; - FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName, - Count); + FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), + CallProbe->getDiscriminator(), + CalleeName, Count); } }