This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
docs/
-
ReleaseNotes.rst
-
lib/Transforms/Scalar/
-
Transforms/
-
Scalar/
2/3
LoopIdiomRecognize.cpp
-
test/Transforms/LoopIdiom/
-
Transforms/
-
LoopIdiom/
-
bcmp-basic.ll
-
bcmp-debugify-remarks.ll
-
bcmp-widening.ll

Differential D61144

[LoopIdiomRecognize] BCmp loop idiom recognition
AbandonedPublic

Authored by lebedev.ri on Apr 25 2019, 1:22 PM.

Download Raw Diff

Details

Reviewers

reames
fhahn
mkazantsev
chandlerc
craig.topper
courbet

Commits

rG76cdcf25b883: [LoopIdiomRecognize] Recommit: BCmp loop idiom recognition
rL374662: [LoopIdiomRecognize] Recommit: BCmp loop idiom recognition
rG5c9f3cfec78f: [LoopIdiomRecognize] BCmp loop idiom recognition
rL370454: [LoopIdiomRecognize] BCmp loop idiom recognition

Summary

@mclow.lists brought up this issue up in IRC.
It is a reasonably common problem to compare some two values for equality.
Those may be just some integers, strings or arrays of integers.

In C, there is memcmp(), bcmp() functions.
In C++, there exists std::equal() algorithm.
One can also write that function manually.

libstdc++'s std::equal() is specialized to directly call memcmp() for
various types, but not std::byte from C++2a. https://godbolt.org/z/mx2ejJ

libc++ does not do anything like that, it simply relies on simple C++'s
operator==(). https://godbolt.org/z/er0Zwf (GOOD!)

So likely, there exists a certain performance opportunities.
Let's compare performance of naive std::equal() (no memcmp()) with one that
is using memcmp() (in this case, compiled with modified compiler).

llvm-bcmp-bench.cc3 KBDownload

#include <algorithm>
#include <cmath>
#include <cstdint>
#include <iterator>
#include <limits>
#include <random>
#include <type_traits>
#include <utility>
#include <vector>

#include "benchmark/benchmark.h"

template <class T>
bool equal(T* a, T* a_end, T* b) noexcept {
  for (; a != a_end; ++a, ++b) {
    if (*a != *b) return false;
  }
  return true;
}

template <typename T>
std::vector<T> getVectorOfRandomNumbers(size_t count) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_int_distribution<T> dis(std::numeric_limits<T>::min(),
                                       std::numeric_limits<T>::max());
  std::vector<T> v;
  v.reserve(count);
  std::generate_n(std::back_inserter(v), count,
                  [&dis, &gen]() { return dis(gen); });
  assert(v.size() == count);
  return v;
}

struct Identical {
  template <typename T>
  static std::pair<std::vector<T>, std::vector<T>> Gen(size_t count) {
    auto Tmp = getVectorOfRandomNumbers<T>(count);
    return std::make_pair(Tmp, std::move(Tmp));
  }
};

struct InequalHalfway {
  template <typename T>
  static std::pair<std::vector<T>, std::vector<T>> Gen(size_t count) {
    auto V0 = getVectorOfRandomNumbers<T>(count);
    auto V1 = V0;
    V1[V1.size() / size_t(2)]++;  // just change the value.
    return std::make_pair(std::move(V0), std::move(V1));
  }
};

template <class T, class Gen>
void BM_bcmp(benchmark::State& state) {
  const size_t Length = state.range(0);

  const std::pair<std::vector<T>, std::vector<T>> Data =
      Gen::template Gen<T>(Length);
  const std::vector<T>& a = Data.first;
  const std::vector<T>& b = Data.second;
  assert(a.size() == Length && b.size() == a.size());

  benchmark::ClobberMemory();
  benchmark::DoNotOptimize(a);
  benchmark::DoNotOptimize(a.data());
  benchmark::DoNotOptimize(b);
  benchmark::DoNotOptimize(b.data());

  for (auto _ : state) {
    const bool is_equal = equal(a.data(), a.data() + a.size(), b.data());
    benchmark::DoNotOptimize(is_equal);
  }
  state.SetComplexityN(Length);
  state.counters["eltcnt"] =
      benchmark::Counter(Length, benchmark::Counter::kIsIterationInvariant);
  state.counters["eltcnt/sec"] =
      benchmark::Counter(Length, benchmark::Counter::kIsIterationInvariantRate);
  const size_t BytesRead = 2 * sizeof(T) * Length;
  state.counters["bytes_read/iteration"] =
      benchmark::Counter(BytesRead, benchmark::Counter::kDefaults,
                         benchmark::Counter::OneK::kIs1024);
  state.counters["bytes_read/sec"] = benchmark::Counter(
      BytesRead, benchmark::Counter::kIsIterationInvariantRate,
      benchmark::Counter::OneK::kIs1024);
}

template <typename T>
static void CustomArguments(benchmark::internal::Benchmark* b) {
  const size_t L2SizeBytes = []() {
    for (const benchmark::CPUInfo::CacheInfo& I :
         benchmark::CPUInfo::Get().caches) {
      if (I.level == 2) return I.size;
    }
    return 0;
  }();
  // What is the largest range we can check to always fit within given L2 cache?
  const size_t MaxLen = L2SizeBytes / /*total bufs*/ 2 /
                        /*maximal elt size*/ sizeof(T) / /*safety margin*/ 2;
  b->RangeMultiplier(2)->Range(1, MaxLen)->Complexity(benchmark::oN);
}

BENCHMARK_TEMPLATE(BM_bcmp, uint8_t, Identical)
    ->Apply(CustomArguments<uint8_t>);
BENCHMARK_TEMPLATE(BM_bcmp, uint16_t, Identical)
    ->Apply(CustomArguments<uint16_t>);
BENCHMARK_TEMPLATE(BM_bcmp, uint32_t, Identical)
    ->Apply(CustomArguments<uint32_t>);
BENCHMARK_TEMPLATE(BM_bcmp, uint64_t, Identical)
    ->Apply(CustomArguments<uint64_t>);

BENCHMARK_TEMPLATE(BM_bcmp, uint8_t, InequalHalfway)
    ->Apply(CustomArguments<uint8_t>);
BENCHMARK_TEMPLATE(BM_bcmp, uint16_t, InequalHalfway)
    ->Apply(CustomArguments<uint16_t>);
BENCHMARK_TEMPLATE(BM_bcmp, uint32_t, InequalHalfway)
    ->Apply(CustomArguments<uint32_t>);
BENCHMARK_TEMPLATE(BM_bcmp, uint64_t, InequalHalfway)
    ->Apply(CustomArguments<uint64_t>);

llvm-bcmp-benchmark-results.txt73 KBDownload

$ ~/src/googlebenchmark/tools/compare.py --no-utest benchmarks build-{old,new}/test/llvm-bcmp-bench
RUNNING: build-old/test/llvm-bcmp-bench --benchmark_out=/tmp/tmpb6PEUx
2019-04-25 21:17:11
Running build-old/test/llvm-bcmp-bench
Run on (8 X 4000 MHz CPU s)
CPU Caches:
  L1 Data 16K (x8)
  L1 Instruction 64K (x4)
  L2 Unified 2048K (x4)
  L3 Unified 8192K (x1)
Load Average: 0.65, 3.90, 4.14
---------------------------------------------------------------------------------------------------
Benchmark                                         Time             CPU   Iterations UserCounters...
---------------------------------------------------------------------------------------------------
<...>
BM_bcmp<uint8_t, Identical>/512000           432131 ns       432101 ns         1613 bytes_read/iteration=1000k bytes_read/sec=2.20706G/s eltcnt=825.856M eltcnt/sec=1.18491G/s
BM_bcmp<uint8_t, Identical>_BigO               0.86 N          0.86 N
BM_bcmp<uint8_t, Identical>_RMS                   8 %             8 %
<...>
BM_bcmp<uint16_t, Identical>/256000          161408 ns       161409 ns         4027 bytes_read/iteration=1000k bytes_read/sec=5.90843G/s eltcnt=1030.91M eltcnt/sec=1.58603G/s
BM_bcmp<uint16_t, Identical>_BigO              0.67 N          0.67 N
BM_bcmp<uint16_t, Identical>_RMS                 25 %            25 %
<...>
BM_bcmp<uint32_t, Identical>/128000           81497 ns        81488 ns         8415 bytes_read/iteration=1000k bytes_read/sec=11.7032G/s eltcnt=1077.12M eltcnt/sec=1.57078G/s
BM_bcmp<uint32_t, Identical>_BigO              0.71 N          0.71 N
BM_bcmp<uint32_t, Identical>_RMS                 42 %            42 %
<...>
BM_bcmp<uint64_t, Identical>/64000            50138 ns        50138 ns        10909 bytes_read/iteration=1000k bytes_read/sec=19.0209G/s eltcnt=698.176M eltcnt/sec=1.27647G/s
BM_bcmp<uint64_t, Identical>_BigO              0.84 N          0.84 N
BM_bcmp<uint64_t, Identical>_RMS                 27 %            27 %
<...>
BM_bcmp<uint8_t, InequalHalfway>/512000      192405 ns       192392 ns         3638 bytes_read/iteration=1000k bytes_read/sec=4.95694G/s eltcnt=1.86266G eltcnt/sec=2.66124G/s
BM_bcmp<uint8_t, InequalHalfway>_BigO          0.38 N          0.38 N
BM_bcmp<uint8_t, InequalHalfway>_RMS              3 %             3 %
<...>
BM_bcmp<uint16_t, InequalHalfway>/256000     127858 ns       127860 ns         5477 bytes_read/iteration=1000k bytes_read/sec=7.45873G/s eltcnt=1.40211G eltcnt/sec=2.00219G/s
BM_bcmp<uint16_t, InequalHalfway>_BigO         0.50 N          0.50 N
BM_bcmp<uint16_t, InequalHalfway>_RMS             0 %             0 %
<...>
BM_bcmp<uint32_t, InequalHalfway>/128000      49140 ns        49140 ns        14281 bytes_read/iteration=1000k bytes_read/sec=19.4072G/s eltcnt=1.82797G eltcnt/sec=2.60478G/s
BM_bcmp<uint32_t, InequalHalfway>_BigO         0.40 N          0.40 N
BM_bcmp<uint32_t, InequalHalfway>_RMS            18 %            18 %
<...>
BM_bcmp<uint64_t, InequalHalfway>/64000       32101 ns        32099 ns        21786 bytes_read/iteration=1000k bytes_read/sec=29.7101G/s eltcnt=1.3943G eltcnt/sec=1.99381G/s
BM_bcmp<uint64_t, InequalHalfway>_BigO         0.50 N          0.50 N
BM_bcmp<uint64_t, InequalHalfway>_RMS             1 %             1 %
RUNNING: build-new/test/llvm-bcmp-bench --benchmark_out=/tmp/tmpQ46PP0
2019-04-25 21:19:29
Running build-new/test/llvm-bcmp-bench
Run on (8 X 4000 MHz CPU s)
CPU Caches:
  L1 Data 16K (x8)
  L1 Instruction 64K (x4)
  L2 Unified 2048K (x4)
  L3 Unified 8192K (x1)
Load Average: 1.01, 2.85, 3.71
---------------------------------------------------------------------------------------------------
Benchmark                                         Time             CPU   Iterations UserCounters...
---------------------------------------------------------------------------------------------------
<...>
BM_bcmp<uint8_t, Identical>/512000            18593 ns        18590 ns        37565 bytes_read/iteration=1000k bytes_read/sec=51.2991G/s eltcnt=19.2333G eltcnt/sec=27.541G/s
BM_bcmp<uint8_t, Identical>_BigO               0.04 N          0.04 N
BM_bcmp<uint8_t, Identical>_RMS                  37 %            37 %
<...>
BM_bcmp<uint16_t, Identical>/256000           18950 ns        18948 ns        37223 bytes_read/iteration=1000k bytes_read/sec=50.3324G/s eltcnt=9.52909G eltcnt/sec=13.511G/s
BM_bcmp<uint16_t, Identical>_BigO              0.08 N          0.08 N
BM_bcmp<uint16_t, Identical>_RMS                 34 %            34 %
<...>
BM_bcmp<uint32_t, Identical>/128000           18627 ns        18627 ns        37895 bytes_read/iteration=1000k bytes_read/sec=51.198G/s eltcnt=4.85056G eltcnt/sec=6.87168G/s
BM_bcmp<uint32_t, Identical>_BigO              0.16 N          0.16 N
BM_bcmp<uint32_t, Identical>_RMS                 35 %            35 %
<...>
BM_bcmp<uint64_t, Identical>/64000            18855 ns        18855 ns        37458 bytes_read/iteration=1000k bytes_read/sec=50.5791G/s eltcnt=2.39731G eltcnt/sec=3.3943G/s
BM_bcmp<uint64_t, Identical>_BigO              0.32 N          0.32 N
BM_bcmp<uint64_t, Identical>_RMS                 33 %            33 %
<...>
BM_bcmp<uint8_t, InequalHalfway>/512000        9570 ns         9569 ns        73500 bytes_read/iteration=1000k bytes_read/sec=99.6601G/s eltcnt=37.632G eltcnt/sec=53.5046G/s
BM_bcmp<uint8_t, InequalHalfway>_BigO          0.02 N          0.02 N
BM_bcmp<uint8_t, InequalHalfway>_RMS             29 %            29 %
<...>
BM_bcmp<uint16_t, InequalHalfway>/256000       9547 ns         9547 ns        74343 bytes_read/iteration=1000k bytes_read/sec=99.8971G/s eltcnt=19.0318G eltcnt/sec=26.8159G/s
BM_bcmp<uint16_t, InequalHalfway>_BigO         0.04 N          0.04 N
BM_bcmp<uint16_t, InequalHalfway>_RMS            29 %            29 %
<...>
BM_bcmp<uint32_t, InequalHalfway>/128000       9396 ns         9394 ns        73521 bytes_read/iteration=1000k bytes_read/sec=101.518G/s eltcnt=9.41069G eltcnt/sec=13.6255G/s
BM_bcmp<uint32_t, InequalHalfway>_BigO         0.08 N          0.08 N
BM_bcmp<uint32_t, InequalHalfway>_RMS            30 %            30 %
<...>
BM_bcmp<uint64_t, InequalHalfway>/64000        9499 ns         9498 ns        73802 bytes_read/iteration=1000k bytes_read/sec=100.405G/s eltcnt=4.72333G eltcnt/sec=6.73808G/s
BM_bcmp<uint64_t, InequalHalfway>_BigO         0.16 N          0.16 N
BM_bcmp<uint64_t, InequalHalfway>_RMS            28 %            28 %
Comparing build-old/test/llvm-bcmp-bench to build-new/test/llvm-bcmp-bench
Benchmark                                                  Time             CPU      Time Old      Time New       CPU Old       CPU New
---------------------------------------------------------------------------------------------------------------------------------------
<...>
BM_bcmp<uint8_t, Identical>/512000                      -0.9570         -0.9570        432131         18593        432101         18590
<...>
BM_bcmp<uint16_t, Identical>/256000                     -0.8826         -0.8826        161408         18950        161409         18948
<...>
BM_bcmp<uint32_t, Identical>/128000                     -0.7714         -0.7714         81497         18627         81488         18627
<...>
BM_bcmp<uint64_t, Identical>/64000                      -0.6239         -0.6239         50138         18855         50138         18855
<...>
BM_bcmp<uint8_t, InequalHalfway>/512000                 -0.9503         -0.9503        192405          9570        192392          9569
<...>
BM_bcmp<uint16_t, InequalHalfway>/256000                -0.9253         -0.9253        127858          9547        127860          9547
<...>
BM_bcmp<uint32_t, InequalHalfway>/128000                -0.8088         -0.8088         49140          9396         49140          9394
<...>
BM_bcmp<uint64_t, InequalHalfway>/64000                 -0.7041         -0.7041         32101          9499         32099          9498

What can we tell from the benchmark?

Performance of naive equality check somewhat improves with element size, maxing out at eltcnt/sec=1.58603G/s for uint16_t, or bytes_read/sec=19.0209G/s for uint64_t. I think, that instability implies performance problems.
Performance of memcmp()-aware benchmark always maxes out at around bytes_read/sec=51.2991G/s for every type. That is 2.6x the throughput of the naive variant!
eltcnt/sec metric for the memcmp()-aware benchmark maxes out at eltcnt/sec=27.541G/s for uint8_t (was: eltcnt/sec=1.18491G/s, so 24x) and linearly decreases with element size. For uint64_t, it's ~4x+ the elements/second.
The call obvious is more pricey than the loop, with small element count. As it can be seen from the full output
llvm-bcmp-benchmark-results.txt73 KBDownload
, the memcmp() is almost universally worse, independent of the element size (and thus buffer size) when element count is less than 8.

So all in all, bcmp idiom does indeed pose untapped performance headroom.
This diff does implement said idiom recognition. I think a reasonable test
coverage is present, but do tell if there is anything obvious missing.

Now, quality. This does succeed to build and pass the test-suite, at least
without any non-bundled elements.

result-old.json3 MBDownload

result-new.json3 MBDownload

This transform fires 91 times:

$ /build/test-suite/utils/compare.py -m loop-idiom.NumBCmp result-new.json
Tests: 1149
Metric: loop-idiom.NumBCmp

Program                                         result-new

MultiSourc...Benchmarks/7zip/7zip-benchmark    79.00
MultiSource/Applications/d/make_dparser         3.00
SingleSource/UnitTests/vla                      2.00
MultiSource/Applications/Burg/burg              1.00
MultiSourc.../Applications/JM/lencod/lencod     1.00
MultiSource/Applications/lemon/lemon            1.00
MultiSource/Benchmarks/Bullet/bullet            1.00
MultiSourc...e/Benchmarks/MallocBench/gs/gs     1.00
MultiSourc...gs-C/TimberWolfMC/timberwolfmc     1.00
MultiSourc...Prolangs-C/simulator/simulator     1.00

The size changes are:
I'm not sure what's going on with SingleSource/UnitTests/vla.test yet, did not look.

$ /build/test-suite/utils/compare.py -m size..text result-{old,new}.json --filter-hash
Tests: 1149
Same hash: 907 (filtered out)
Remaining: 242
Metric: size..text

Program                                        result-old result-new diff
test-suite...ingleSource/UnitTests/vla.test   753.00     833.00     10.6%
test-suite...marks/7zip/7zip-benchmark.test   1001697.00 966657.00  -3.5%
test-suite...ngs-C/simulator/simulator.test   32369.00   32321.00   -0.1%
test-suite...plications/d/make_dparser.test   89585.00   89505.00   -0.1%
test-suite...ce/Applications/Burg/burg.test   40817.00   40785.00   -0.1%
test-suite.../Applications/lemon/lemon.test   47281.00   47249.00   -0.1%
test-suite...TimberWolfMC/timberwolfmc.test   250065.00  250113.00   0.0%
test-suite...chmarks/MallocBench/gs/gs.test   149889.00  149873.00  -0.0%
test-suite...ications/JM/lencod/lencod.test   769585.00  769569.00  -0.0%
test-suite.../Benchmarks/Bullet/bullet.test   770049.00  770049.00   0.0%
test-suite...HMARK_ANISTROPIC_DIFFUSION/128    NaN        NaN        nan%
test-suite...HMARK_ANISTROPIC_DIFFUSION/256    NaN        NaN        nan%
test-suite...CHMARK_ANISTROPIC_DIFFUSION/64    NaN        NaN        nan%
test-suite...CHMARK_ANISTROPIC_DIFFUSION/32    NaN        NaN        nan%
test-suite...ENCHMARK_BILATERAL_FILTER/64/4    NaN        NaN        nan%
Geomean difference                                                   nan%
         result-old    result-new       diff
count  1.000000e+01  10.00000      10.000000
mean   3.152090e+05  311695.40000  0.006749
std    3.790398e+05  372091.42232  0.036605
min    7.530000e+02  833.00000    -0.034981
25%    4.243300e+04  42401.00000  -0.000866
50%    1.197370e+05  119689.00000 -0.000392
75%    6.397050e+05  639705.00000 -0.000005
max    1.001697e+06  966657.00000  0.106242

I don't have timings though.

And now to the code. The basic idea is to completely replace the whole loop.
If we can't fully kill it, don't transform.
I have left one or two comments in the code, so hopefully it can be understood.

Also, there is a few TODO's that i have left for follow-ups:

widening of memcmp()/bcmp()
step smaller than the comparison size
Metadata propagation
more than two blocks as long as there is still a single backedge?
???

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

lebedev.ri created this revision.Apr 25 2019, 1:22 PM

Herald added a subscriber: jfb. · View Herald TranscriptApr 25 2019, 1:22 PM

lebedev.ri added parent revisions: D61013: [NFC] BasicBlock: refactor changePhiUses() out of replacePhiUsesWith(), use it, D61011: [NFC] PHINode: introduce replaceIncomingBlockWith() function, use it, D61010: [NFC] Instruction: introduce replaceSuccessorWith() function, use it, D61007: [NFC] SCEVExpander: add SetCurrentDebugLocation() / getCurrentDebugLocation() wrappers, D61008: [NFC][Utils] deleteDeadLoop(): add an assert that exit block has some non-PHI instruction.Apr 25 2019, 1:22 PM

Rebased, ping, any thoughts, no matter how high-level?

ping

Initial set of changes to reduce patch complexity. Note that I've skipped most of the interesting logic and will not review it until unrelated pieces have been split apart.

lib/Transforms/Scalar/LoopIdiomRecognize.cpp
147 ↗	(On Diff #198225)	The addition of ORE appears to be a separable change which causes test diffs of it's own. Please split and review separately.
376 ↗	(On Diff #198225)	Unrelated changes, please split and land. (Applies for almost all LLVM_DEBUG usage.)
1853 ↗	(On Diff #198225)	Nope, remove.
1976 ↗	(On Diff #198225)	You don't appear to be bailing out for atomic or volatile loads.
1989 ↗	(On Diff #198225)	Just mod 8. LLVM does not support bytes of other size.
2127 ↗	(On Diff #198225)	Actually, you do handle volatile. Move this closer to the def for readability would you?
test/Transforms/LoopIdiom/bcmp-basic.ll
2 ↗	(On Diff #198225)	Unrelated test change. Remove please.

This revision now requires changes to proceed.May 28 2019, 4:14 PM

Diffusion mentioned this in rL362022: [LoopIdiomRecognize][NFC] Use DEBUG_TYPE, add LLVM_DEBUG() to….May 29 2019, 1:09 PM

lebedev.ri mentioned this in rG95dec50a35da: [LoopIdiomRecognize][NFC] Use DEBUG_TYPE, add LLVM_DEBUG() to….May 29 2019, 1:10 PM

lebedev.ri mentioned this in D62631: [LoopIdiom] Basic OptimizationRemarkEmitter handling.May 29 2019, 2:40 PM

Thank you for taking a look!
I believe i have addressed review notes.

lebedev.ri added a parent revision: D62631: [LoopIdiom] Basic OptimizationRemarkEmitter handling.May 29 2019, 2:53 PM

lebedev.ri added inline comments.

lib/Transforms/Scalar/LoopIdiomRecognize.cpp
1989 ↗	(On Diff #198225)	Is this sufficiently-obscure, or shall i inline the variable, too?
2127 ↗	(On Diff #198225)	The important difference is - if i bailout at the very beginning, no further analysis will be done. But if i bailout here, then if this fires then one who sees this diagnostic may know that the issue here isn't the volatile loads themselves, but rather the missing codegen. As in, all other legality checks have passed here. With that info, the comment still stands?
test/Transforms/LoopIdiom/bcmp-basic.ll
2 ↗	(On Diff #198225)	How is this unrelated? This is one of the dedicated test files for this diff, and that change is showing that the patch does not anger any of these verifiers.

Use ore::NV

Diffusion mentioned this in rL362091: [LoopIdiomRecognize][NFC] Sort includes.May 30 2019, 6:03 AM

Diffusion mentioned this in rL362092: [LoopIdiom] Basic OptimizationRemarkEmitter handling.

lebedev.ri mentioned this in rGfae2e46766c4: [LoopIdiomRecognize][NFC] Sort includes.May 30 2019, 6:03 AM

lebedev.ri mentioned this in rGe8578953ac6c: [LoopIdiom] Basic OptimizationRemarkEmitter handling.

(Review interrupted, a few useful comments, more to come)

lib/Transforms/Scalar/LoopIdiomRecognize.cpp
1989 ↗	(On Diff #198225)	Looks fine.
2127 ↗	(On Diff #198225)	Really not sure what that gives the user? If the loop contains a volatile or ordered load, we're done. The transform isn't going to run, no matter what other legality predicates are met.
1924 ↗	(On Diff #202070)	Using m_CombineAnd like this is correct, but I found quite confusing. You might want to consider pulling out a helper function and using early return matching instead. e.g. struct MatchedBCmpCompairsonState {...}; bool matchBCmpComparison(Instruction *Term, MatchState&)
2020 ↗	(On Diff #202070)	If the loop has unique exits, then each phi must have a single predecessor which must be in the loop. LoopSimplify creates unique exits, so I'd just bail on the non-unique case.
2036 ↗	(On Diff #202070)	Given the base of each object and the length, you can form a MemoryLocation for the entire portion being read from both arrays. Given that, a simple alias query can check for modification or ordering. MemoryLocation LocA(Src1, Size); for (Instruction &I : instructions(L)) if (I aliases LocA) return false; Note that if you length is non-constant, this ends up being quite conservative, but so does your current code. See also: mayLoopAccessLocation
test/Transforms/LoopIdiom/bcmp-basic.ll
2 ↗	(On Diff #198225)	Presumably it didn't anger any of the verifiiers before, so the change can be checked in and doesn't need to be part of this diff.
test/Transforms/LoopIdiom/bcmp-debugify-remarks.ll
2 ↗	(On Diff #202070)	Same here.
test/Transforms/LoopIdiom/bcmp-widening.ll
2 ↗	(On Diff #202070)	Same here.

Thank you for taking a look!

I think it is unproductive for me to reply/fix until there is a review with the entire change in mind, not parts of it.

lib/Transforms/Scalar/LoopIdiomRecognize.cpp
2127 ↗	(On Diff #198225)	(note the `OptimizationRemarkMissed` and the `LLVM_DEBUG` printout?)
1924 ↗	(On Diff #202070)	I'm sorry, i do not understand what you are saying here. This for sure will need refactoring when adding memcmp support, but until then i'm not yet seeing how to rewrite this while improving readability at the same time.
2020 ↗	(On Diff #202070)	Similarly, i do not understand what you are saying here. You have read the comment just before these loops? // No loop instructions must be used outside of the loop. It doesn't matter whether we are in LCSSA form or not (we are), whether we have dedicated exits or not (we do), whether we have unique exits or not (that's the blocks we are checking here, so we do?). The check is specifically that no instructions "other than the latches" are used outside of the loop. As in, that we can fully erase the entire loop and replace it with a single `bcmp` call. It's great the LCSSA form allows us to only check the PHI nodes, but i'm not sure how else the check i'm looking for can be performed. This is tested with `@loop_instruction_used_in_phi_node_outside_loop` in `bcmp-negative-tests.ll`.
2036 ↗	(On Diff #202070)	I think i should wait until there is a full review.. Similarly, i'm not sure what this is about. The end goal here is completely zap the entire old loop, to fully delete it out of existence, replace with a single `bcmp` call. If the loop has any other side-effects aside from those you'd get if you inline `bcmp`, then the loop can not be transformed.
test/Transforms/LoopIdiom/bcmp-basic.ll
2 ↗	(On Diff #198225)	Presumably it didn't anger any of the verifiiers before But that is precisely the point here, those options aren't free, why have them in-tree for who knows how long (until this change lands, which is far from given) until then?

nikic added a subscriber: nikic.Jun 5 2019, 2:02 PM

nikic added inline comments.

lib/Transforms/Scalar/LoopIdiomRecognize.cpp
2129 ↗	(On Diff #202070)	Is this kind reasoning legal on the level of LLVM IR? Especially if no inbounds GEPs are involved, aren't we just dealing in raw memory and there isn't necessary any object with representable size involved? (Context: Wondering about this in https://reviews.llvm.org/D61934#inline-559489 and your comment seems like a plausible explanation.)

lebedev.ri marked an inline comment as done.Jun 5 2019, 2:25 PM

lebedev.ri added inline comments.

lib/Transforms/Scalar/LoopIdiomRecognize.cpp
2129 ↗	(On Diff #202070)	I don't have an answer. The obvious alternative would be to do saturating multiplication, i guess?

This seems interesting!

Since @courbet did some work with bcmp, I added him as a reviewer.

Yeah, based on reviews/time on my patches, loop-related reviews seems most congested.

courbet added inline comments.Aug 22 2019, 2:24 PM

lib/Transforms/Scalar/LoopIdiomRecognize.cpp
1924 ↗	(On Diff #202070)	I think what was meant is: If you refactor the matches to separate functions, the control flow for this function becomes clearer: struct CmpLoopStructure { Value BCmpValue, LatchCmpValue; BasicBlock HeaderBrEqualBB, HeaderBrUnequalBB; BasicBlock LatchBrFinishBB, LatchBrContinueBB; }; CmpLoopStructure CmpLoop; if (!matchCmpLoopStructure(LoopHeaderBB, CmpLoop)) { return false; } struct CmpOfLoads { ICmpInst::Predicate BCmpPred; Value LoadSrcA, LoadSrcB; Value LoadA_, LoadB_; } if (!matchCmpOfLoads(BcmpLoop.BcmpValue, CmpOfLoads)) { return false; }
1947 ↗	(On Diff #202070)	I don't find the comment very clear. Maybe rephrase as: "FIXME: memcmp()/bcmp() calls have the same semantics as icmp. Match them too."
1953 ↗	(On Diff #202070)	This is redundant with the previous check. Do you want to remove it from the previous one (this will make handling `memcmp()` easier in the future).
1961 ↗	(On Diff #202070)	Typo: "Be vary"
1986 ↗	(On Diff #202070)	If you refactor as mentioned above, you can do the casts in the subfunctions. That being said I don't think the framework allows the cast during the match, which would be cool.
2129 ↗	(On Diff #202070)	Saturating multiplication would actually change the semantics of the code. The memset loop idiom already has this issue actually. I don't think the explanation is convincing because we could indeed be loading from raw memory. I guess the only thing that is saving us right now is that it's unlikely that people are memset/memcmp-ing more than size_t-max bytes of memory in real life. But the optimization is buggy :(

@courbet thank you for taking a look!

Rebased, chopped LoopIdiomRecognize::detectBCmpIdiom() into several smaller functions.

Herald added a subscriber: hiraditya. · View Herald TranscriptAug 23 2019, 1:51 AM

lebedev.ri added inline comments.Aug 23 2019, 1:51 AM

lib/Transforms/Scalar/LoopIdiomRecognize.cpp
1986 ↗	(On Diff #202070)	Indeed, you can't cast during patternmatch, this seems like the best place for this.
2129 ↗	(On Diff #202070)	Added FIXME for now.

LGTM. Maybe let's way a couple days to see if @reames has blocking issues ?

llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
228	Is there any reason for the trailing `_` ?
2579	rm

lebedev.ri added inline comments.Aug 23 2019, 10:20 AM

llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
228	Right. No reason now that they are in a struct. I'll drop trailing `_`.

Dropped unneeded _ from member variable names.

lebedev.ri mentioned this in D61007: [NFC] SCEVExpander: add SetCurrentDebugLocation() / getCurrentDebugLocation() wrappers.Aug 24 2019, 10:07 AM

In D61144#1643152, @courbet wrote:

LGTM. Maybe let's way a couple days to see if @reames has blocking issues ?

@reames please could you clarify whether you have blocking issues here?

Any other loop folk want to review? :)

FWIW i personally don't expect this to have any issues, the legality check
is really (too?) strict and the rewriting part is doing the right thing
in all the edge-cases i have stumbled into in test-suite.

On Wed, Aug 28, 2019 at 11:44 PM Philip Reames <> wrote:

I have not been following the thread recently; you do not need to wait
for me if someone else has reviewed the code.

Philip

Well, okay then.
Reviews never catch everything anyways.

Proceeding to landing.

This revision was not accepted when it landed; it landed in state Needs Review.Aug 30 2019, 2:56 AM

Closed by commit rL370454: [LoopIdiomRecognize] BCmp loop idiom recognition (authored by lebedevri). · Explain Why

This revision was automatically updated to reflect the committed changes.

Could you please look into https://bugs.llvm.org/show_bug.cgi?id=43206 ?

Reverted in rL370788 due to https://bugs.llvm.org/show_bug.cgi?id=43206, will investigate...

This revision was not accepted when it landed; it landed in state Needs Review.Oct 12 2019, 8:37 AM

Closed by commit rG76cdcf25b883: [LoopIdiomRecognize] Recommit: BCmp loop idiom recognition (authored by lebedev.ri). · Explain Why

This revision was automatically updated to reflect the committed changes.

I found a strange sanitizer error as a result of this revision, could you take a quick look at https://bugs.llvm.org/show_bug.cgi?id=43870?

Reverted in rGc4b757be026150eee32050e120026b03d92eb421.

lebedev.ri abandoned this revision.Nov 2 2019, 2:50 AM

Since this was reverted I am wondering..

libstdc++'s std::equal() is specialized to directly call memcmp() for various types.

Is this legal? If yes, libcxx should do it too.

Revision Contents

Path

Size

llvm/

docs/

ReleaseNotes.rst

3 lines

lib/

Transforms/

Scalar/

LoopIdiomRecognize.cpp

878 lines

test/

Transforms/

LoopIdiom/

bcmp-basic.ll

881 lines

bcmp-debugify-remarks.ll

119 lines

bcmp-widening.ll

2 lines

Diff 224747

llvm/docs/ReleaseNotes.rst

Show First 20 Lines • Show All 60 Lines • ▼ Show 20 Lines	* As per :ref:`LLVM Language Reference Manual <i_getelementptr>`,
Since `r369789 <https://reviews.llvm.org/rL369789>`_		Since `r369789 <https://reviews.llvm.org/rL369789>`_
(`D66608 <https://reviews.llvm.org/D66608>`_ ``[InstCombine] icmp eq/ne (gep		(`D66608 <https://reviews.llvm.org/D66608>`_ ``[InstCombine] icmp eq/ne (gep
inbounds P, Idx..), null -> icmp eq/ne P, null``) LLVM uses that for		inbounds P, Idx..), null -> icmp eq/ne P, null``) LLVM uses that for
transformations. If the original source violates these requirements this		transformations. If the original source violates these requirements this
may result in code being miscompiled. If you are using Clang front-end,		may result in code being miscompiled. If you are using Clang front-end,
Undefined Behaviour Sanitizer ``-fsanitize=pointer-overflow`` check		Undefined Behaviour Sanitizer ``-fsanitize=pointer-overflow`` check
will now catch such cases.		will now catch such cases.

		* The Loop Idiom Recognition (``-loop-idiom``) pass has learned to recognize
		``bcmp`` pattern, and convert it into a call to ``bcmp`` (or ``memcmp``)
		function.

Changes to the LLVM IR		Changes to the LLVM IR
----------------------		----------------------

* Unnamed function arguments now get printed with their automatically		* Unnamed function arguments now get printed with their automatically
generated name (e.g. "i32 %0") in definitions. This may require front-ends		generated name (e.g. "i32 %0") in definitions. This may require front-ends
to update their tests; if so there is a script utils/add_argument_names.py		to update their tests; if so there is a script utils/add_argument_names.py
that correctly converted 80-90% of Clang tests. Some manual work will almost		that correctly converted 80-90% of Clang tests. Some manual work will almost
▲ Show 20 Lines • Show All 92 Lines • Show Last 20 Lines

llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp

Show All 35 Lines
//		//
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"		#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
#include "llvm/ADT/APInt.h"		#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"		#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"		#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"		#include "llvm/ADT/MapVector.h"
		#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"		#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"		#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"		#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"		#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"		#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"		#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"		#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"		#include "llvm/Analysis/LoopInfo.h"
Show All 20 Lines
#include "llvm/IR/InstrTypes.h"		#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"		#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"		#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"		#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"		#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"		#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"		#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"		#include "llvm/IR/PassManager.h"
		#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"		#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"		#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"		#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"		#include "llvm/IR/ValueHandle.h"
		#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"		#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"		#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"		#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"		#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"		#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"		#include "llvm/Transforms/Scalar.h"
		#include "llvm/Transforms/Scalar/LoopPassManager.h"
		#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"		#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"		#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"		#include "llvm/Transforms/Utils/LoopUtils.h"
#include <algorithm>		#include <algorithm>
#include <cassert>		#include <cassert>
#include <cstdint>		#include <cstdint>
#include <utility>		#include <utility>
#include <vector>		#include <vector>

using namespace llvm;		using namespace llvm;

#define DEBUG_TYPE "loop-idiom"		#define DEBUG_TYPE "loop-idiom"

STATISTIC(NumMemSet, "Number of memset's formed from loop stores");		STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");		STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
		STATISTIC(NumBCmp, "Number of memcmp's formed from loop 2xload+eq-compare");

static cl::opt<bool> UseLIRCodeSizeHeurs(		static cl::opt<bool> UseLIRCodeSizeHeurs(
"use-lir-code-size-heurs",		"use-lir-code-size-heurs",
cl::desc("Use loop idiom recognition code size heuristics when compiling"		cl::desc("Use loop idiom recognition code size heuristics when compiling"
"with -Os/-Oz"),		"with -Os/-Oz"),
cl::init(true), cl::Hidden);		cl::init(true), cl::Hidden);

namespace {		namespace {

		// FIXME: reinventing the wheel much? Is there a cleaner solution?
		struct PMAbstraction {
		virtual void markLoopAsDeleted(Loop *L) = 0;
		virtual ~PMAbstraction() = default;
		};
		struct LegacyPMAbstraction : PMAbstraction {
		LPPassManager &LPM;
		LegacyPMAbstraction(LPPassManager &LPM) : LPM(LPM) {}
		virtual ~LegacyPMAbstraction() = default;
		void markLoopAsDeleted(Loop L) override { LPM.markLoopAsDeleted(L); }
		};
		struct NewPMAbstraction : PMAbstraction {
		LPMUpdater &Updater;
		NewPMAbstraction(LPMUpdater &Updater) : Updater(Updater) {}
		virtual ~NewPMAbstraction() = default;
		void markLoopAsDeleted(Loop *L) override {
		Updater.markLoopAsDeleted(*L, L->getName());
		}
		};

class LoopIdiomRecognize {		class LoopIdiomRecognize {
Loop *CurLoop = nullptr;		Loop *CurLoop = nullptr;
AliasAnalysis *AA;		AliasAnalysis *AA;
DominatorTree *DT;		DominatorTree *DT;
LoopInfo *LI;		LoopInfo *LI;
ScalarEvolution *SE;		ScalarEvolution *SE;
TargetLibraryInfo *TLI;		TargetLibraryInfo *TLI;
const TargetTransformInfo *TTI;		const TargetTransformInfo *TTI;
const DataLayout *DL;		const DataLayout *DL;
		PMAbstraction &LoopDeleter;
OptimizationRemarkEmitter &ORE;		OptimizationRemarkEmitter &ORE;
bool ApplyCodeSizeHeuristics;		bool ApplyCodeSizeHeuristics;

public:		public:
explicit LoopIdiomRecognize(AliasAnalysis AA, DominatorTree DT,		explicit LoopIdiomRecognize(AliasAnalysis AA, DominatorTree DT,
LoopInfo LI, ScalarEvolution SE,		LoopInfo LI, ScalarEvolution SE,
TargetLibraryInfo *TLI,		TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI,		const TargetTransformInfo *TTI,
const DataLayout *DL,		const DataLayout *DL, PMAbstraction &LoopDeleter,
OptimizationRemarkEmitter &ORE)		OptimizationRemarkEmitter &ORE)
: AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL), ORE(ORE) {}		: AA(AA), DT(DT), LI(LI), SE(SE), TLI(TLI), TTI(TTI), DL(DL),
		LoopDeleter(LoopDeleter), ORE(ORE) {}

bool runOnLoop(Loop *L);		bool runOnLoop(Loop *L);

private:		private:
using StoreList = SmallVector<StoreInst *, 8>;		using StoreList = SmallVector<StoreInst *, 8>;
using StoreListMap = MapVector<Value *, StoreList>;		using StoreListMap = MapVector<Value *, StoreList>;

StoreListMap StoreRefsForMemset;		StoreListMap StoreRefsForMemset;
StoreListMap StoreRefsForMemsetPattern;		StoreListMap StoreRefsForMemsetPattern;
StoreList StoreRefsForMemcpy;		StoreList StoreRefsForMemcpy;
bool HasMemset;		bool HasMemset;
bool HasMemsetPattern;		bool HasMemsetPattern;
bool HasMemcpy;		bool HasMemcpy;
		bool HasMemCmp;
		bool HasBCmp;

/// Return code for isLegalStore()		/// Return code for isLegalStore()
enum LegalStoreKind {		enum LegalStoreKind {
None = 0,		None = 0,
Memset,		Memset,
MemsetPattern,		MemsetPattern,
Memcpy,		Memcpy,
UnorderedAtomicMemcpy,		UnorderedAtomicMemcpy,
Show All 26 Lines	bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
bool IsLoopMemset = false);		bool IsLoopMemset = false);

/// @}		/// @}
/// \name Noncountable Loop Idiom Handling		/// \name Noncountable Loop Idiom Handling
/// @{		/// @{

bool runOnNoncountableLoop();		bool runOnNoncountableLoop();

		struct CmpLoopStructure {
		Value BCmpValue, LatchCmpValue;
		BasicBlock HeaderBrEqualBB, HeaderBrUnequalBB;
		BasicBlock LatchBrFinishBB, LatchBrContinueBB;
		};
		bool matchBCmpLoopStructure(CmpLoopStructure &CmpLoop) const;
		struct CmpOfLoads {
		ICmpInst::Predicate BCmpPred;
		Value LoadSrcA, LoadSrcB;
		Value LoadA, LoadB;
		courbetUnsubmitted Done Reply Inline Actions Is there any reason for the trailing `_` ? courbet: Is there any reason for the trailing `_` ?
		lebedev.riAuthorUnsubmitted Done Reply Inline Actions Right. No reason now that they are in a struct. I'll drop trailing `_`. lebedev.ri: Right. No reason now that they are in a struct. I'll drop trailing `_`.
		};
		bool matchBCmpOfLoads(Value *BCmpValue, CmpOfLoads &CmpOfLoads) const;
		bool recognizeBCmpLoopControlFlow(const CmpOfLoads &CmpOfLoads,
		CmpLoopStructure &CmpLoop) const;
		bool recognizeBCmpLoopSCEV(uint64_t BCmpTyBytes, CmpOfLoads &CmpOfLoads,
		const SCEV &SrcA, const SCEV &SrcB,
		const SCEV *&Iterations) const;
		bool detectBCmpIdiom(ICmpInst &BCmpInst, CmpInst &LatchCmpInst,
		LoadInst &LoadA, LoadInst &LoadB, const SCEV *&SrcA,
		const SCEV &SrcB, const SCEV &NBytes) const;
		BasicBlock transformBCmpControlFlow(ICmpInst ComparedEqual);
		void transformLoopToBCmp(ICmpInst BCmpInst, CmpInst LatchCmpInst,
		LoadInst LoadA, LoadInst LoadB, const SCEV *SrcA,
		const SCEV SrcB, const SCEV NBytes);
		bool recognizeBCmp();

bool recognizePopcount();		bool recognizePopcount();
void transformLoopToPopcount(BasicBlock PreCondBB, Instruction CntInst,		void transformLoopToPopcount(BasicBlock PreCondBB, Instruction CntInst,
PHINode CntPhi, Value Var);		PHINode CntPhi, Value Var);
bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz		bool recognizeAndInsertFFS(); /// Find First Set: ctlz or cttz
void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,		void transformLoopToCountable(Intrinsic::ID IntrinID, BasicBlock *PreCondBB,
Instruction CntInst, PHINode CntPhi,		Instruction CntInst, PHINode CntPhi,
Value Var, Instruction DefX,		Value Var, Instruction DefX,
const DebugLoc &DL, bool ZeroCheck,		const DebugLoc &DL, bool ZeroCheck,
Show All 21 Lines	bool runOnLoop(Loop *L, LPPassManager &LPM) override {
ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();		ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
TargetLibraryInfo *TLI =		TargetLibraryInfo *TLI =
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(		&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
*L->getHeader()->getParent());		*L->getHeader()->getParent());
const TargetTransformInfo *TTI =		const TargetTransformInfo *TTI =
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(		&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
*L->getHeader()->getParent());		*L->getHeader()->getParent());
const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();		const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
		LegacyPMAbstraction LoopDeleter(LPM);

// For the old PM, we can't use OptimizationRemarkEmitter as an analysis		// For the old PM, we can't use OptimizationRemarkEmitter as an analysis
// pass. Function analyses need to be preserved across loop transformations		// pass. Function analyses need to be preserved across loop transformations
// but ORE cannot be preserved (see comment before the pass definition).		// but ORE cannot be preserved (see comment before the pass definition).
OptimizationRemarkEmitter ORE(L->getHeader()->getParent());		OptimizationRemarkEmitter ORE(L->getHeader()->getParent());

LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, ORE);		LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, DL, LoopDeleter, ORE);
return LIR.runOnLoop(L);		return LIR.runOnLoop(L);
}		}

/// This transformation requires natural loop information & requires that		/// This transformation requires natural loop information & requires that
/// loop preheaders be inserted into the CFG.		/// loop preheaders be inserted into the CFG.
void getAnalysisUsage(AnalysisUsage &AU) const override {		void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();		AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();		AU.addRequired<TargetTransformInfoWrapperPass>();
getLoopAnalysisUsage(AU);		getLoopAnalysisUsage(AU);
}		}
};		};

} // end anonymous namespace		} // end anonymous namespace

char LoopIdiomRecognizeLegacyPass::ID = 0;		char LoopIdiomRecognizeLegacyPass::ID = 0;

PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,		PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,		LoopStandardAnalysisResults &AR,
LPMUpdater &) {		LPMUpdater &Updater) {
const auto *DL = &L.getHeader()->getModule()->getDataLayout();		const auto *DL = &L.getHeader()->getModule()->getDataLayout();

const auto &FAM =		const auto &FAM =
AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();		AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager();
Function *F = L.getHeader()->getParent();		Function *F = L.getHeader()->getParent();

auto ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(F);		auto ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(F);
// FIXME: This should probably be optional rather than required.		// FIXME: This should probably be optional rather than required.
if (!ORE)		if (!ORE)
report_fatal_error(		report_fatal_error(
"LoopIdiomRecognizePass: OptimizationRemarkEmitterAnalysis not cached "		"LoopIdiomRecognizePass: OptimizationRemarkEmitterAnalysis not cached "
"at a higher level");		"at a higher level");

		NewPMAbstraction LoopDeleter(Updater);
LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL,		LoopIdiomRecognize LIR(&AR.AA, &AR.DT, &AR.LI, &AR.SE, &AR.TLI, &AR.TTI, DL,
*ORE);		LoopDeleter, *ORE);
if (!LIR.runOnLoop(&L))		if (!LIR.runOnLoop(&L))
return PreservedAnalyses::all();		return PreservedAnalyses::all();

return getLoopPassPreservedAnalyses();		return getLoopPassPreservedAnalyses();
}		}

INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom",		INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom",
"Recognize loop idioms", false, false)		"Recognize loop idioms", false, false)
Show All 20 Lines	bool LoopIdiomRecognize::runOnLoop(Loop *L) {
CurLoop = L;		CurLoop = L;
// If the loop could not be converted to canonical form, it must have an		// If the loop could not be converted to canonical form, it must have an
// indirectbr in it, just give up.		// indirectbr in it, just give up.
if (!L->getLoopPreheader())		if (!L->getLoopPreheader())
return false;		return false;

// Disable loop idiom recognition if the function's name is a common idiom.		// Disable loop idiom recognition if the function's name is a common idiom.
StringRef Name = L->getHeader()->getParent()->getName();		StringRef Name = L->getHeader()->getParent()->getName();
if (Name == "memset" \|\| Name == "memcpy")		if (Name == "memset" \|\| Name == "memcpy" \|\| Name == "memcmp" \|\|
		Name == "bcmp")
return false;		return false;

// Determine if code size heuristics need to be applied.		// Determine if code size heuristics need to be applied.
ApplyCodeSizeHeuristics =		ApplyCodeSizeHeuristics =
L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;		L->getHeader()->getParent()->hasOptSize() && UseLIRCodeSizeHeurs;

HasMemset = TLI->has(LibFunc_memset);		HasMemset = TLI->has(LibFunc_memset);
HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);		HasMemsetPattern = TLI->has(LibFunc_memset_pattern16);
HasMemcpy = TLI->has(LibFunc_memcpy);		HasMemcpy = TLI->has(LibFunc_memcpy);
		HasMemCmp = TLI->has(LibFunc_memcmp);
		HasBCmp = TLI->has(LibFunc_bcmp);

if (HasMemset \|\| HasMemsetPattern \|\| HasMemcpy)		if (HasMemset \|\| HasMemsetPattern \|\| HasMemcpy \|\| HasMemCmp \|\| HasBCmp)
if (SE->hasLoopInvariantBackedgeTakenCount(L))		if (SE->hasLoopInvariantBackedgeTakenCount(L))
return runOnCountableLoop();		return runOnCountableLoop();

return runOnNoncountableLoop();		return runOnNoncountableLoop();
}		}

bool LoopIdiomRecognize::runOnCountableLoop() {		bool LoopIdiomRecognize::runOnCountableLoop() {
const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);		const SCEV *BECount = SE->getBackedgeTakenCount(CurLoop);
▲ Show 20 Lines • Show All 822 Lines • ▼ Show 20 Lines
}		}

bool LoopIdiomRecognize::runOnNoncountableLoop() {		bool LoopIdiomRecognize::runOnNoncountableLoop() {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["		LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F["
<< CurLoop->getHeader()->getParent()->getName()		<< CurLoop->getHeader()->getParent()->getName()
<< "] Noncountable Loop %"		<< "] Noncountable Loop %"
<< CurLoop->getHeader()->getName() << "\n");		<< CurLoop->getHeader()->getName() << "\n");

return recognizePopcount() \|\| recognizeAndInsertFFS();		return recognizeBCmp() \|\| recognizePopcount() \|\| recognizeAndInsertFFS();
}		}

/// Check if the given conditional branch is based on the comparison between		/// Check if the given conditional branch is based on the comparison between
/// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is		/// a variable and zero, and if the variable is non-zero or zero (JmpOnZero is
/// true), the control yields to the loop entry. If the branch matches the		/// true), the control yields to the loop entry. If the branch matches the
/// behavior, the variable involved in the comparison is returned. This function		/// behavior, the variable involved in the comparison is returned. This function
/// will be called to see if the precondition and postcondition of the loop are		/// will be called to see if the precondition and postcondition of the loop are
/// in desirable form.		/// in desirable form.
▲ Show 20 Lines • Show All 657 Lines • ▼ Show 20 Lines	void LoopIdiomRecognize::transformLoopToPopcount(BasicBlock *PreCondBB,
// the loop are replaced with the NewCount -- the value returned from		// the loop are replaced with the NewCount -- the value returned from
// __builtin_ctpop().		// __builtin_ctpop().
CntInst->replaceUsesOutsideBlock(NewCount, Body);		CntInst->replaceUsesOutsideBlock(NewCount, Body);

// step 5: Forget the "non-computable" trip-count SCEV associated with the		// step 5: Forget the "non-computable" trip-count SCEV associated with the
// loop. The loop would otherwise not be deleted even if it becomes empty.		// loop. The loop would otherwise not be deleted even if it becomes empty.
SE->forgetLoop(CurLoop);		SE->forgetLoop(CurLoop);
}		}

		bool LoopIdiomRecognize::matchBCmpLoopStructure(
		CmpLoopStructure &CmpLoop) const {
		ICmpInst::Predicate BCmpPred;

		// We are looking for the following basic layout:
		// PreheaderBB: <preheader> ; preds = ???
		// <...>
		// br label %LoopHeaderBB
		// LoopHeaderBB: <header,exiting> ; preds = %PreheaderBB,%LoopLatchBB
		// <...>
		// %BCmpValue = icmp <...>
		// br i1 %BCmpValue, label %LoopLatchBB, label %Successor0
		// LoopLatchBB: <latch,exiting> ; preds = %LoopHeaderBB
		// <...>
		// %LatchCmpValue = <are we done, or do next iteration?>
		// br i1 %LatchCmpValue, label %Successor1, label %LoopHeaderBB
		// Successor0: <exit> ; preds = %LoopHeaderBB
		// <...>
		// Successor1: <exit> ; preds = %LoopLatchBB
		// <...>
		//
		// Successor0 and Successor1 may or may not be the same basic block.

		// Match basic frame-work of this supposedly-comparison loop.
		using namespace PatternMatch;
		if (!match(CurLoop->getHeader()->getTerminator(),
		m_Br(m_CombineAnd(m_ICmp(BCmpPred, m_Value(), m_Value()),
		m_Value(CmpLoop.BCmpValue)),
		CmpLoop.HeaderBrEqualBB, CmpLoop.HeaderBrUnequalBB)) \|\|
		!match(CurLoop->getLoopLatch()->getTerminator(),
		m_Br(m_CombineAnd(m_Cmp(), m_Value(CmpLoop.LatchCmpValue)),
		CmpLoop.LatchBrFinishBB, CmpLoop.LatchBrContinueBB))) {
		LLVM_DEBUG(dbgs() << "Basic control-flow layout unrecognized.\n");
		return false;
		}
		LLVM_DEBUG(dbgs() << "Recognized basic control-flow layout.\n");
		return true;
		}

		bool LoopIdiomRecognize::matchBCmpOfLoads(Value *BCmpValue,
		CmpOfLoads &CmpOfLoads) const {
		using namespace PatternMatch;
		LLVM_DEBUG(dbgs() << "Analyzing header icmp " << *BCmpValue
		<< " as bcmp pattern.\n");

		// Match bcmp-style loop header cmp. It must be an eq-icmp of loads. Example:
		// %v0 = load <...>, <...>* %LoadSrcA
		// %v1 = load <...>, <...>* %LoadSrcB
		// %CmpLoop.BCmpValue = icmp eq <...> %v0, %v1
		// There won't be any no-op bitcasts between load and icmp,
		// they would have been transformed into a load of bitcast.
		// FIXME: {b,mem}cmp() calls have the same semantics as icmp. Match them too.
		if (!match(BCmpValue,
		m_ICmp(CmpOfLoads.BCmpPred,
		m_CombineAnd(m_Load(m_Value(CmpOfLoads.LoadSrcA)),
		m_Value(CmpOfLoads.LoadA)),
		m_CombineAnd(m_Load(m_Value(CmpOfLoads.LoadSrcB)),
		m_Value(CmpOfLoads.LoadB)))) \|\|
		!ICmpInst::isEquality(CmpOfLoads.BCmpPred)) {
		LLVM_DEBUG(dbgs() << "Loop header icmp did not match bcmp pattern.\n");
		return false;
		}
		LLVM_DEBUG(dbgs() << "Recognized header icmp as bcmp pattern with loads:\n\t"
		<< CmpOfLoads.LoadA << "\n\t" << CmpOfLoads.LoadB
		<< "\n");
		// FIXME: handle memcmp pattern?
		return true;
		}

		bool LoopIdiomRecognize::recognizeBCmpLoopControlFlow(
		const CmpOfLoads &CmpOfLoads, CmpLoopStructure &CmpLoop) const {
		BasicBlock *LoopHeaderBB = CurLoop->getHeader();
		BasicBlock *LoopLatchBB = CurLoop->getLoopLatch();

		// Be wary, comparisons can be inverted, canonicalize order.
		// If this 'element' comparison passed, we expect to proceed to the next elt.
		if (CmpOfLoads.BCmpPred != ICmpInst::Predicate::ICMP_EQ)
		std::swap(CmpLoop.HeaderBrEqualBB, CmpLoop.HeaderBrUnequalBB);
		// The predicate on loop latch does not matter, just canonicalize some order.
		if (CmpLoop.LatchBrContinueBB != LoopHeaderBB)
		std::swap(CmpLoop.LatchBrFinishBB, CmpLoop.LatchBrContinueBB);

		// Check that control-flow between blocks is as expected.
		if (CmpLoop.HeaderBrEqualBB != LoopLatchBB \|\|
		CmpLoop.LatchBrContinueBB != LoopHeaderBB) {
		LLVM_DEBUG(dbgs() << "Loop control-flow not recognized.\n");
		return false;
		}

		SmallVector<BasicBlock *, 2> ExitBlocks;
		CurLoop->getUniqueExitBlocks(ExitBlocks);
		assert(ExitBlocks.size() <= 2U && "Can't have more than two exit blocks.");

		assert(!is_contained(ExitBlocks, CmpLoop.HeaderBrEqualBB) &&
		is_contained(ExitBlocks, CmpLoop.HeaderBrUnequalBB) &&
		!is_contained(ExitBlocks, CmpLoop.LatchBrContinueBB) &&
		is_contained(ExitBlocks, CmpLoop.LatchBrFinishBB) &&
		"Unexpected exit edges.");

		LLVM_DEBUG(dbgs() << "Recognized loop control-flow.\n");

		LLVM_DEBUG(dbgs() << "Performing side-effect analysis on the loop.\n");
		assert(CurLoop->isLCSSAForm(*DT) && "Should only get LCSSA-form loops here.");
		// No loop instructions must be used outside of the loop. Since we are in
		// LCSSA form, we only need to check successor block's PHI nodes's incoming
		// values for incoming blocks that are the loop basic blocks.
		for (const BasicBlock *ExitBB : ExitBlocks) {
		for (const PHINode &PHI : ExitBB->phis()) {
		for (const BasicBlock *LoopBB :
		make_filter_range(PHI.blocks(), [this](BasicBlock *PredecessorBB) {
		return CurLoop->contains(PredecessorBB);
		})) {
		const auto *I =
		dyn_cast<Instruction>(PHI.getIncomingValueForBlock(LoopBB));
		if (I && CurLoop->contains(I)) {
		LLVM_DEBUG(dbgs()
		<< "Loop contains instruction " << *I
		<< " which is used outside of the loop in basic block "
		<< ExitBB->getName() << " in phi node " << PHI << "\n");
		return false;
		}
		}
		}
		}
		// Similarly, the loop should not have any other observable side-effects
		// other than the final comparison result.
		for (BasicBlock *LoopBB : CurLoop->blocks()) {
		for (Instruction &I : *LoopBB) {
		if (isa<DbgInfoIntrinsic>(I)) // Ignore dbginfo.
		continue; // FIXME: anything else? lifetime info?
		if ((I.mayHaveSideEffects() \|\| I.isAtomic() \|\| I.isFenceLike()) &&
		&I != CmpOfLoads.LoadA && &I != CmpOfLoads.LoadB) {
		LLVM_DEBUG(
		dbgs() << "Loop contains instruction with potential side-effects: "
		<< I << "\n");
		return false;
		}
		}
		}
		LLVM_DEBUG(dbgs() << "No loop instructions deemed to have side-effects.\n");
		return true;
		}

		bool LoopIdiomRecognize::recognizeBCmpLoopSCEV(uint64_t BCmpTyBytes,
		CmpOfLoads &CmpOfLoads,
		const SCEV *&SrcA,
		const SCEV *&SrcB,
		const SCEV *&Iterations) const {
		// Try to compute SCEV of the loads, for this loop's scope.
		const auto *ScevForSrcA = dyn_cast<SCEVAddRecExpr>(
		SE->getSCEVAtScope(CmpOfLoads.LoadSrcA, CurLoop));
		const auto *ScevForSrcB = dyn_cast<SCEVAddRecExpr>(
		SE->getSCEVAtScope(CmpOfLoads.LoadSrcB, CurLoop));
		if (!ScevForSrcA \|\| !ScevForSrcB) {
		LLVM_DEBUG(dbgs() << "Failed to get SCEV expressions for load sources.\n");
		return false;
		}

		LLVM_DEBUG(dbgs() << "Got SCEV expressions (at loop scope) for loads:\n\t"
		<< ScevForSrcA << "\n\t" << ScevForSrcB << "\n");

		// Loads must have folloving SCEV exprs: {%ptr,+,BCmpTyBytes}<%LoopHeaderBB>
		const SCEV RecStepForA = ScevForSrcA->getStepRecurrence(SE);
		const SCEV RecStepForB = ScevForSrcB->getStepRecurrence(SE);
		if (!ScevForSrcA->isAffine() \|\| !ScevForSrcB->isAffine() \|\|
		ScevForSrcA->getLoop() != CurLoop \|\| ScevForSrcB->getLoop() != CurLoop \|\|
		RecStepForA != RecStepForB \|\| !isa<SCEVConstant>(RecStepForA) \|\|
		cast<SCEVConstant>(RecStepForA)->getAPInt() != BCmpTyBytes) {
		LLVM_DEBUG(dbgs() << "Unsupported SCEV expressions for loads. Only support "
		"affine SCEV expressions originating in the loop we "
		"are analysing with identical constant positive step, "
		"equal to the count of bytes compared. Got:\n\t"
		<< RecStepForA << "\n\t" << RecStepForB << "\n");
		return false;
		// FIXME: can support BCmpTyBytes > Step.
		// But will need to account for the extra bytes compared at the end.
		}

		SrcA = ScevForSrcA->getStart();
		SrcB = ScevForSrcB->getStart();
		LLVM_DEBUG(dbgs() << "Got SCEV expressions for load sources:\n\t" << *SrcA
		<< "\n\t" << *SrcB << "\n");

		// The load sources must be loop-invants that dominate the loop header.
		if (SrcA == SE->getCouldNotCompute() \|\| SrcB == SE->getCouldNotCompute() \|\|
		!SE->isAvailableAtLoopEntry(SrcA, CurLoop) \|\|
		!SE->isAvailableAtLoopEntry(SrcB, CurLoop)) {
		LLVM_DEBUG(dbgs() << "Unsupported SCEV expressions for loads, unavaliable "
		"prior to loop header.\n");
		return false;
		}

		LLVM_DEBUG(dbgs() << "SCEV expressions for loads are acceptable.\n");

		// For how many iterations is loop guaranteed not to exit via LoopLatch?
		// This is one less than the maximal number of comparisons,and is: n + -1
		const SCEV *LoopExitCount =
		SE->getExitCount(CurLoop, CurLoop->getLoopLatch());
		LLVM_DEBUG(dbgs() << "Got SCEV expression for loop latch exit count: "
		<< *LoopExitCount << "\n");
		// Exit count, similarly, must be loop-invant that dominates the loop header.
		if (LoopExitCount == SE->getCouldNotCompute() \|\|
		!LoopExitCount->getType()->isIntOrPtrTy() \|\|
		!SE->isAvailableAtLoopEntry(LoopExitCount, CurLoop)) {
		LLVM_DEBUG(dbgs() << "Unsupported SCEV expression for loop latch exit.\n");
		return false;
		}

		// LoopExitCount is always one less than the actual count of iterations.
		// Do this before cast, else we will be stuck with 1 + zext(-1 + n)
		Iterations = SE->getAddExpr(
		LoopExitCount, SE->getOne(LoopExitCount->getType()), SCEV::FlagNUW);
		assert(Iterations != SE->getCouldNotCompute() &&
		"Shouldn't fail to increment by one.");

		LLVM_DEBUG(dbgs() << "Computed iteration count: " << *Iterations << "\n");
		return true;
		}

		/// Return true iff the bcmp idiom is detected in the loop.
		///
		/// Additionally:
		/// 1) \p BCmpInst is set to the root byte-comparison instruction.
		/// 2) \p LatchCmpInst is set to the comparison that controls the latch.
		/// 3) \p LoadA is set to the first LoadInst.
		/// 4) \p LoadB is set to the second LoadInst.
		/// 5) \p SrcA is set to the first source location that is being compared.
		/// 6) \p SrcB is set to the second source location that is being compared.
		/// 7) \p NBytes is set to the number of bytes to compare.
		bool LoopIdiomRecognize::detectBCmpIdiom(ICmpInst *&BCmpInst,
		CmpInst *&LatchCmpInst,
		LoadInst &LoadA, LoadInst &LoadB,
		const SCEV &SrcA, const SCEV &SrcB,
		const SCEV *&NBytes) const {
		LLVM_DEBUG(dbgs() << "Recognizing bcmp idiom\n");

		// Give up if the loop is not in normal form, or has more than 2 blocks.
		if (!CurLoop->isLoopSimplifyForm() \|\| CurLoop->getNumBlocks() > 2) {
		LLVM_DEBUG(dbgs() << "Basic loop structure unrecognized.\n");
		return false;
		}
		LLVM_DEBUG(dbgs() << "Recognized basic loop structure.\n");

		CmpLoopStructure CmpLoop;
		if (!matchBCmpLoopStructure(CmpLoop))
		return false;

		CmpOfLoads CmpOfLoads;
		if (!matchBCmpOfLoads(CmpLoop.BCmpValue, CmpOfLoads))
		return false;

		if (!recognizeBCmpLoopControlFlow(CmpOfLoads, CmpLoop))
		return false;

		BCmpInst = cast<ICmpInst>(CmpLoop.BCmpValue); // FIXME: is there no
		LatchCmpInst = cast<CmpInst>(CmpLoop.LatchCmpValue); // way to combine
		LoadA = cast<LoadInst>(CmpOfLoads.LoadA); // these cast with
		LoadB = cast<LoadInst>(CmpOfLoads.LoadB); // m_Value() matcher?

		Type *BCmpValTy = BCmpInst->getOperand(0)->getType();
		LLVMContext &Context = BCmpValTy->getContext();
		uint64_t BCmpTyBits = DL->getTypeSizeInBits(BCmpValTy);
		static constexpr uint64_t ByteTyBits = 8;

		LLVM_DEBUG(dbgs() << "Got comparison between values of type " << *BCmpValTy
		<< " of size " << BCmpTyBits
		<< " bits (while byte = " << ByteTyBits << " bits).\n");
		// bcmp()/memcmp() minimal unit of work is a byte. Therefore we must check
		// that we are dealing with a multiple of a byte here.
		if (BCmpTyBits % ByteTyBits != 0) {
		LLVM_DEBUG(dbgs() << "Value size is not a multiple of byte.\n");
		return false;
		// FIXME: could still be done under a run-time check that the total bit
		// count is a multiple of a byte i guess? Or handle remainder separately?
		}

		// Each comparison is done on this many bytes.
		uint64_t BCmpTyBytes = BCmpTyBits / ByteTyBits;
		LLVM_DEBUG(dbgs() << "Size is exactly " << BCmpTyBytes
		<< " bytes, eligible for bcmp conversion.\n");

		const SCEV *Iterations;
		if (!recognizeBCmpLoopSCEV(BCmpTyBytes, CmpOfLoads, SrcA, SrcB, Iterations))
		return false;

		// bcmp / memcmp take length argument as size_t, do promotion now.
		Type *CmpFuncSizeTy = DL->getIntPtrType(Context);
		Iterations = SE->getNoopOrZeroExtend(Iterations, CmpFuncSizeTy);
		assert(Iterations != SE->getCouldNotCompute() && "Promotion failed.");
		// Note that it didn't do ptrtoint cast, we will need to do it manually.

		// We will be comparing bytes, not BCmpTy, we need to recalculate size.
		// It's a multiplication, and it could overflow. But for it to overflow
		// we'd want to compare more bytes than could be represented by size_t, But
		// allocation functions also take size_t. So how'd you produce such buffer?
		// FIXME: we likely need to actually check that we know this won't overflow,
		// via llvm::computeOverflowForUnsignedMul().
		NBytes = SE->getMulExpr(
		Iterations, SE->getConstant(CmpFuncSizeTy, BCmpTyBytes), SCEV::FlagNUW);
		assert(NBytes != SE->getCouldNotCompute() &&
		"Shouldn't fail to increment by one.");

		LLVM_DEBUG(dbgs() << "Computed total byte count: " << *NBytes << "\n");

		if (LoadA->getPointerAddressSpace() != LoadB->getPointerAddressSpace() \|\|
		LoadA->getPointerAddressSpace() != 0 \|\| !LoadA->isSimple() \|\|
		!LoadB->isSimple()) {
		StringLiteral L("Unsupported loads in idiom - only support identical, "
		"simple loads from address space 0.\n");
		LLVM_DEBUG(dbgs() << L);
		ORE.emit([&]() {
		return OptimizationRemarkMissed(DEBUG_TYPE, "BCmpIdiomUnsupportedLoads",
		BCmpInst->getDebugLoc(),
		CurLoop->getHeader())
		<< L;
		});
		return false; // FIXME
		}

		LLVM_DEBUG(dbgs() << "Recognized bcmp idiom\n");
		ORE.emit([&]() {
		return OptimizationRemarkAnalysis(DEBUG_TYPE, "RecognizedBCmpIdiom",
		CurLoop->getStartLoc(),
		CurLoop->getHeader())
		<< "Loop recognized as a bcmp idiom";
		});

		return true;
		}

		BasicBlock *
		LoopIdiomRecognize::transformBCmpControlFlow(ICmpInst *ComparedEqual) {
		LLVM_DEBUG(dbgs() << "Transforming control-flow.\n");
		SmallVector<DominatorTree::UpdateType, 8> DTUpdates;

		BasicBlock *PreheaderBB = CurLoop->getLoopPreheader();
		BasicBlock *HeaderBB = CurLoop->getHeader();
		BasicBlock *LoopLatchBB = CurLoop->getLoopLatch();
		SmallString<32> LoopName = CurLoop->getName();
		Function *Func = PreheaderBB->getParent();
		LLVMContext &Context = Func->getContext();

		// Before doing anything, drop SCEV info.
		SE->forgetLoop(CurLoop);

		// Here we start with: (0/6)
		// PreheaderBB: <preheader> ; preds = ???
		// <...>
		// %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
		// %ComparedEqual = icmp eq <...> %memcmp, 0
		// br label %LoopHeaderBB
		// LoopHeaderBB: <header,exiting> ; preds = %PreheaderBB,%LoopLatchBB
		// <...>
		// br i1 %<...>, label %LoopLatchBB, label %Successor0BB
		// LoopLatchBB: <latch,exiting> ; preds = %LoopHeaderBB
		// <...>
		// br i1 %<...>, label %Successor1BB, label %LoopHeaderBB
		// Successor0BB: <exit> ; preds = %LoopHeaderBB
		// %S0PHI = phi <...> [ <...>, %LoopHeaderBB ]
		// <...>
		// Successor1BB: <exit> ; preds = %LoopLatchBB
		// %S1PHI = phi <...> [ <...>, %LoopLatchBB ]
		// <...>
		//
		// Successor0 and Successor1 may or may not be the same basic block.

		// Decouple the edge between loop preheader basic block and loop header basic
		// block. Thus the loop has become unreachable.
		assert(cast<BranchInst>(PreheaderBB->getTerminator())->isUnconditional() &&
		PreheaderBB->getTerminator()->getSuccessor(0) == HeaderBB &&
		"Preheader bb must end with an unconditional branch to header bb.");
		PreheaderBB->getTerminator()->eraseFromParent();
		DTUpdates.push_back({DominatorTree::Delete, PreheaderBB, HeaderBB});

		// Create a new preheader basic block before loop header basic block.
		auto *PhonyPreheaderBB = BasicBlock::Create(
		Context, LoopName + ".phonypreheaderbb", Func, HeaderBB);
		// And insert an unconditional branch from phony preheader basic block to
		// loop header basic block.
		IRBuilder<>(PhonyPreheaderBB).CreateBr(HeaderBB);
		DTUpdates.push_back({DominatorTree::Insert, PhonyPreheaderBB, HeaderBB});

		// Create a single new empty block that we will substitute as a
		// successor basic block for the loop's exits. This one is temporary.
		// Much like phony preheader basic block, it is not connected.
		auto *PhonySuccessorBB =
		BasicBlock::Create(Context, LoopName + ".phonysuccessorbb", Func,
		LoopLatchBB->getNextNode());
		// That block must have some non-PHI instruction, or else deleteDeadLoop()
		// will mess up cleanup of dbginfo, and verifier will complain.
		IRBuilder<>(PhonySuccessorBB).CreateUnreachable();

		// Create two new empty blocks that we will use to preserve the original
		// loop exit control-flow, and preserve the incoming values in the PHI nodes
		// in loop's successor exit blocks. These will live one.
		auto *ComparedUnequalBB =
		BasicBlock::Create(Context, ComparedEqual->getName() + ".unequalbb", Func,
		PhonySuccessorBB->getNextNode());
		auto *ComparedEqualBB =
		BasicBlock::Create(Context, ComparedEqual->getName() + ".equalbb", Func,
		PhonySuccessorBB->getNextNode());

		// By now we have: (1/6)
		// PreheaderBB: ; preds = ???
		// <...>
		// %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
		// %ComparedEqual = icmp eq <...> %memcmp, 0
		// [no terminator instruction!]
		// PhonyPreheaderBB: <preheader> ; No preds, UNREACHABLE!
		// br label %LoopHeaderBB
		// LoopHeaderBB: <header,exiting> ; preds = %PhonyPreheaderBB, %LoopLatchBB
		// <...>
		// br i1 %<...>, label %LoopLatchBB, label %Successor0BB
		// LoopLatchBB: <latch,exiting> ; preds = %LoopHeaderBB
		// <...>
		// br i1 %<...>, label %Successor1BB, label %LoopHeaderBB
		// PhonySuccessorBB: ; No preds, UNREACHABLE!
		// unreachable
		// EqualBB: ; No preds, UNREACHABLE!
		// [no terminator instruction!]
		// UnequalBB: ; No preds, UNREACHABLE!
		// [no terminator instruction!]
		// Successor0BB: <exit> ; preds = %LoopHeaderBB
		// %S0PHI = phi <...> [ <...>, %LoopHeaderBB ]
		// <...>
		// Successor1BB: <exit> ; preds = %LoopLatchBB
		// %S1PHI = phi <...> [ <...>, %LoopLatchBB ]
		// <...>

		// What is the mapping/replacement basic block for exiting out of the loop
		// from either of old's loop basic blocks?
		auto GetReplacementBB = [this, ComparedEqualBB,
		ComparedUnequalBB](const BasicBlock *OldBB) {
		assert(CurLoop->contains(OldBB) && "Only for loop's basic blocks.");
		if (OldBB == CurLoop->getLoopLatch()) // "all elements compared equal".
		return ComparedEqualBB;
		if (OldBB == CurLoop->getHeader()) // "element compared unequal".
		return ComparedUnequalBB;
		llvm_unreachable("Only had two basic blocks in loop.");
		};

		// What are the exits out of this loop?
		SmallVector<Loop::Edge, 2> LoopExitEdges;
		CurLoop->getExitEdges(LoopExitEdges);
		assert(LoopExitEdges.size() == 2 && "Should have only to two exit edges.");

		// Populate new basic blocks, update the exiting control-flow, PHI nodes.
		for (const Loop::Edge &Edge : LoopExitEdges) {
		auto OldLoopBB = const_cast<BasicBlock >(Edge.first);
		auto SuccessorBB = const_cast<BasicBlock >(Edge.second);
		assert(CurLoop->contains(OldLoopBB) && !CurLoop->contains(SuccessorBB) &&
		"Unexpected edge.");

		// If we would exit the loop from this loop's basic block,
		// what semantically would that mean? Did comparison succeed or fail?
		BasicBlock *NewBB = GetReplacementBB(OldLoopBB);
		assert(NewBB->empty() && "Should not get same new basic block here twice.");
		IRBuilder<> Builder(NewBB);
		Builder.SetCurrentDebugLocation(OldLoopBB->getTerminator()->getDebugLoc());
		Builder.CreateBr(SuccessorBB);
		DTUpdates.push_back({DominatorTree::Insert, NewBB, SuccessorBB});
		// Also, be REALLY careful with PHI nodes in successor basic block,
		// update them to recieve the same input value, but not from current loop's
		// basic block, but from new basic block instead.
		SuccessorBB->replacePhiUsesWith(OldLoopBB, NewBB);
		// Also, change loop control-flow. This loop's basic block shall no longer
		// exit from the loop to it's original successor basic block, but to our new
		// phony successor basic block. Note that new successor will be unique exit.
		OldLoopBB->getTerminator()->replaceSuccessorWith(SuccessorBB,
		PhonySuccessorBB);
		DTUpdates.push_back({DominatorTree::Delete, OldLoopBB, SuccessorBB});
		DTUpdates.push_back({DominatorTree::Insert, OldLoopBB, PhonySuccessorBB});
		}

		// Inform DomTree about edge changes. Note that LoopInfo is still out-of-date.
		assert(DTUpdates.size() == 8 && "Update count prediction failed.");
		DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
		DTU.applyUpdates(DTUpdates);
		DTUpdates.clear();

		// By now we have: (2/6)
		// PreheaderBB: ; preds = ???
		// <...>
		// %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
		// %ComparedEqual = icmp eq <...> %memcmp, 0
		// [no terminator instruction!]
		// PhonyPreheaderBB: <preheader> ; No preds, UNREACHABLE!
		// br label %LoopHeaderBB
		// LoopHeaderBB: <header,exiting> ; preds = %PhonyPreheaderBB, %LoopLatchBB
		// <...>
		// br i1 %<...>, label %LoopLatchBB, label %PhonySuccessorBB
		// LoopLatchBB: <latch,exiting> ; preds = %LoopHeaderBB
		// <...>
		// br i1 %<...>, label %PhonySuccessorBB, label %LoopHeaderBB
		// PhonySuccessorBB: <uniq. exit> ; preds = %LoopHeaderBB, %LoopLatchBB
		// unreachable
		// EqualBB: ; No preds, UNREACHABLE!
		// br label %Successor1BB
		// UnequalBB: ; No preds, UNREACHABLE!
		// br label %Successor0BB
		// Successor0BB: ; preds = %UnequalBB
		// %S0PHI = phi <...> [ <...>, %UnequalBB ]
		// <...>
		// Successor1BB: ; preds = %EqualBB
		// %S0PHI = phi <...> [ <...>, %EqualBB ]
		// <...>

		// Finally, zap the original loop. Record it's parent loop though.
		Loop *ParentLoop = CurLoop->getParentLoop();
		LLVM_DEBUG(dbgs() << "Deleting old loop.\n");
		LoopDeleter.markLoopAsDeleted(CurLoop); // Mark as deleted BEFORE deleting!
		deleteDeadLoop(CurLoop, DT, SE, LI); // And actually delete the loop.
		CurLoop = nullptr;

		// By now we have: (3/6)
		// PreheaderBB: ; preds = ???
		// <...>
		// %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
		// %ComparedEqual = icmp eq <...> %memcmp, 0
		// [no terminator instruction!]
		// PhonyPreheaderBB: ; No preds, UNREACHABLE!
		// br label %PhonySuccessorBB
		// PhonySuccessorBB: ; preds = %PhonyPreheaderBB
		// unreachable
		// EqualBB: ; No preds, UNREACHABLE!
		// br label %Successor1BB
		// UnequalBB: ; No preds, UNREACHABLE!
		// br label %Successor0BB
		// Successor0BB: ; preds = %UnequalBB
		// %S0PHI = phi <...> [ <...>, %UnequalBB ]
		// <...>
		// Successor1BB: ; preds = %EqualBB
		// %S0PHI = phi <...> [ <...>, %EqualBB ]
		// <...>

		// Now, actually restore the CFG.

		// Insert an unconditional branch from an actual preheader basic block to
		// phony preheader basic block.
		IRBuilder<>(PreheaderBB).CreateBr(PhonyPreheaderBB);
		DTUpdates.push_back({DominatorTree::Insert, PhonyPreheaderBB, HeaderBB});
		// Insert proper conditional branch from phony successor basic block to the
		// "dispatch" basic blocks, which were used to preserve incoming values in
		// original loop's successor basic blocks.
		assert(isa<UnreachableInst>(PhonySuccessorBB->getTerminator()) &&
		"Yep, that's the one we created to keep deleteDeadLoop() happy.");
		PhonySuccessorBB->getTerminator()->eraseFromParent();
		{
		IRBuilder<> Builder(PhonySuccessorBB);
		Builder.SetCurrentDebugLocation(ComparedEqual->getDebugLoc());
		Builder.CreateCondBr(ComparedEqual, ComparedEqualBB, ComparedUnequalBB);
		}
		DTUpdates.push_back(
		{DominatorTree::Insert, PhonySuccessorBB, ComparedEqualBB});
		DTUpdates.push_back(
		{DominatorTree::Insert, PhonySuccessorBB, ComparedUnequalBB});

		BasicBlock *DispatchBB = PhonySuccessorBB;
		DispatchBB->setName(LoopName + ".bcmpdispatchbb");

		assert(DTUpdates.size() == 3 && "Update count prediction failed.");
		DTU.applyUpdates(DTUpdates);
		DTUpdates.clear();

		// By now we have: (4/6)
		// PreheaderBB: ; preds = ???
		// <...>
		// %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
		// %ComparedEqual = icmp eq <...> %memcmp, 0
		// br label %PhonyPreheaderBB
		// PhonyPreheaderBB: ; preds = %PreheaderBB
		// br label %DispatchBB
		// DispatchBB: ; preds = %PhonyPreheaderBB
		// br i1 %ComparedEqual, label %EqualBB, label %UnequalBB
		// EqualBB: ; preds = %DispatchBB
		// br label %Successor1BB
		// UnequalBB: ; preds = %DispatchBB
		// br label %Successor0BB
		// Successor0BB: ; preds = %UnequalBB
		// %S0PHI = phi <...> [ <...>, %UnequalBB ]
		// <...>
		// Successor1BB: ; preds = %EqualBB
		// %S0PHI = phi <...> [ <...>, %EqualBB ]
		// <...>

		// The basic CFG has been restored! Now let's merge redundant basic blocks.

		// Merge phony successor basic block into it's only predecessor,
		// phony preheader basic block. It is fully pointlessly redundant.
		MergeBasicBlockIntoOnlyPred(DispatchBB, &DTU);

		// By now we have: (5/6)
		// PreheaderBB: ; preds = ???
		// <...>
		// %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
		// %ComparedEqual = icmp eq <...> %memcmp, 0
		// br label %DispatchBB
		// DispatchBB: ; preds = %PreheaderBB
		// br i1 %ComparedEqual, label %EqualBB, label %UnequalBB
		// EqualBB: ; preds = %DispatchBB
		// br label %Successor1BB
		// UnequalBB: ; preds = %DispatchBB
		// br label %Successor0BB
		// Successor0BB: ; preds = %UnequalBB
		// %S0PHI = phi <...> [ <...>, %UnequalBB ]
		// <...>
		// Successor1BB: ; preds = %EqualBB
		// %S0PHI = phi <...> [ <...>, %EqualBB ]
		// <...>

		// Was this loop nested?
		if (!ParentLoop) {
		// If the loop was NOT nested, then let's also merge phony successor
		// basic block into it's only predecessor, preheader basic block.
		// Also, here we need to update LoopInfo.
		LI->removeBlock(PreheaderBB);
		MergeBasicBlockIntoOnlyPred(DispatchBB, &DTU);

		// By now we have: (6/6)
		// DispatchBB: ; preds = ???
		// <...>
		// %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
		// %ComparedEqual = icmp eq <...> %memcmp, 0
		// br i1 %ComparedEqual, label %EqualBB, label %UnequalBB
		// EqualBB: ; preds = %DispatchBB
		// br label %Successor1BB
		// UnequalBB: ; preds = %DispatchBB
		// br label %Successor0BB
		// Successor0BB: ; preds = %UnequalBB
		// %S0PHI = phi <...> [ <...>, %UnequalBB ]
		// <...>
		// Successor1BB: ; preds = %EqualBB
		// %S0PHI = phi <...> [ <...>, %EqualBB ]
		// <...>

		return DispatchBB;
		}

		// Otherwise, we need to "preserve" the LoopSimplify form of the deleted loop.
		// To achieve that, we shall keep the preheader basic block (mainly so that
		// the loop header block will be guaranteed to have a predecessor outside of
		// the loop), and create a phony loop with all these new three basic blocks.
		Loop *PhonyLoop = LI->AllocateLoop();
		ParentLoop->addChildLoop(PhonyLoop);
		PhonyLoop->addBasicBlockToLoop(DispatchBB, *LI);
		PhonyLoop->addBasicBlockToLoop(ComparedEqualBB, *LI);
		PhonyLoop->addBasicBlockToLoop(ComparedUnequalBB, *LI);

		// But we only have a preheader basic block, a header basic block block and
		// two exiting basic blocks. For a proper loop we also need a backedge from
		// non-header basic block to header bb.
		// Let's just add a never-taken branch from both of the exiting basic blocks.
		for (BasicBlock *BB : {ComparedEqualBB, ComparedUnequalBB}) {
		BranchInst *OldTerminator = cast<BranchInst>(BB->getTerminator());
		assert(OldTerminator->isUnconditional() && "That's the one we created.");
		BasicBlock *SuccessorBB = OldTerminator->getSuccessor(0);

		IRBuilder<> Builder(OldTerminator);
		Builder.SetCurrentDebugLocation(OldTerminator->getDebugLoc());
		Builder.CreateCondBr(ConstantInt::getTrue(Context), SuccessorBB,
		DispatchBB);
		OldTerminator->eraseFromParent();
		// Yes, the backedge will never be taken. The control-flow is redundant.
		// If it can be simplified further, other passes will take care.
		DTUpdates.push_back({DominatorTree::Delete, BB, SuccessorBB});
		DTUpdates.push_back({DominatorTree::Insert, BB, SuccessorBB});
		DTUpdates.push_back({DominatorTree::Insert, BB, DispatchBB});
		}
		assert(DTUpdates.size() == 6 && "Update count prediction failed.");
		DTU.applyUpdates(DTUpdates);
		DTUpdates.clear();

		// By now we have: (6/6)
		// PreheaderBB: <preheader> ; preds = ???
		// <...>
		// %memcmp = call i32 @memcmp(i8* %LoadSrcA, i8* %LoadSrcB, i64 %Nbytes)
		// %ComparedEqual = icmp eq <...> %memcmp, 0
		// br label %BCmpDispatchBB
		// BCmpDispatchBB: <header> ; preds = %PreheaderBB
		// br i1 %ComparedEqual, label %EqualBB, label %UnequalBB
		// EqualBB: <latch,exiting> ; preds = %BCmpDispatchBB
		// br i1 %true, label %Successor1BB, label %BCmpDispatchBB
		// UnequalBB: <latch,exiting> ; preds = %BCmpDispatchBB
		// br i1 %true, label %Successor0BB, label %BCmpDispatchBB
		// Successor0BB: ; preds = %UnequalBB
		// %S0PHI = phi <...> [ <...>, %UnequalBB ]
		// <...>
		// Successor1BB: ; preds = %EqualBB
		// %S0PHI = phi <...> [ <...>, %EqualBB ]
		// <...>

		courbetUnsubmitted Not Done Reply Inline Actions rm courbet: rm
		// Finally fully DONE!
		return DispatchBB;
		}

		void LoopIdiomRecognize::transformLoopToBCmp(ICmpInst *BCmpInst,
		CmpInst *LatchCmpInst,
		LoadInst LoadA, LoadInst LoadB,
		const SCEV SrcA, const SCEV SrcB,
		const SCEV *NBytes) {
		// We will be inserting before the terminator instruction of preheader block.
		IRBuilder<> Builder(CurLoop->getLoopPreheader()->getTerminator());

		LLVM_DEBUG(dbgs() << "Transforming bcmp loop idiom into a call.\n");
		LLVM_DEBUG(dbgs() << "Emitting new instructions.\n");

		// Expand the SCEV expressions for both sources to compare, and produce value
		// for the byte len (beware of Iterations potentially being a pointer, and
		// account for element size being BCmpTyBytes bytes, which may be not 1 byte)
		Value PtrA, PtrB, *Len;
		{
		SCEVExpander SExp(SE, DL, "LoopToBCmp");
		SExp.setInsertPoint(&*Builder.GetInsertPoint());

		auto HandlePtr = [&SExp](LoadInst Load, const SCEV Src) {
		SExp.SetCurrentDebugLocation(DebugLoc());
		// If the pointer operand of original load had dbgloc - use it.
		if (const auto *I = dyn_cast<Instruction>(Load->getPointerOperand()))
		SExp.SetCurrentDebugLocation(I->getDebugLoc());
		return SExp.expandCodeFor(Src);
		};
		PtrA = HandlePtr(LoadA, SrcA);
		PtrB = HandlePtr(LoadB, SrcB);

		// For len calculation let's use dbgloc for the loop's latch condition.
		Builder.SetCurrentDebugLocation(LatchCmpInst->getDebugLoc());
		SExp.SetCurrentDebugLocation(LatchCmpInst->getDebugLoc());
		Len = SExp.expandCodeFor(NBytes);

		Type *CmpFuncSizeTy = DL->getIntPtrType(Builder.getContext());
		assert(SE->getTypeSizeInBits(Len->getType()) ==
		DL->getTypeSizeInBits(CmpFuncSizeTy) &&
		"Len should already have the correct size.");

		// Make sure that iteration count is a number, insert ptrtoint cast if not.
		if (Len->getType()->isPointerTy())
		Len = Builder.CreatePtrToInt(Len, CmpFuncSizeTy);
		assert(Len->getType() == CmpFuncSizeTy && "Should have correct type now.");

		Len->setName(Len->getName() + ".bytecount");

		// There is no legality check needed. We want to compare that the memory
		// regions [PtrA, PtrA+Len) and [PtrB, PtrB+Len) are fully identical, equal.
		// For them to be fully equal, they must match bit-by-bit. And likewise,
		// for them to NOT be fully equal, they have to differ just by one bit.
		// The step of comparison (bits compared at once) simply does not matter.
		}

		// For the rest of new instructions, dbgloc should point at the value cmp.
		Builder.SetCurrentDebugLocation(BCmpInst->getDebugLoc());

		// Emit the comparison itself.
		auto *CmpCall =
		cast<CallInst>(HasBCmp ? emitBCmp(PtrA, PtrB, Len, Builder, *DL, TLI)
		: emitMemCmp(PtrA, PtrB, Len, Builder, *DL, TLI));
		// FIXME: add {B,Mem}CmpInst with MemoryCompareInst
		// (based on MemIntrinsicBase) as base?
		// FIXME: propagate metadata from loads? (alignments, AS, TBAA, ...)

		// {b,mem}cmp returned 0 if they were equal, or non-zero if not equal.
		auto *ComparedEqual = cast<ICmpInst>(Builder.CreateICmpEQ(
		CmpCall, ConstantInt::get(CmpCall->getType(), 0),
		PtrA->getName() + ".vs." + PtrB->getName() + ".eqcmp"));

		BasicBlock *BB = transformBCmpControlFlow(ComparedEqual);
		Builder.ClearInsertionPoint();

		// We're done.
		LLVM_DEBUG(dbgs() << "Transformed loop bcmp idiom into a call.\n");
		ORE.emit([&]() {
		return OptimizationRemark(DEBUG_TYPE, "TransformedBCmpIdiomToCall",
		CmpCall->getDebugLoc(), BB)
		<< "Transformed bcmp idiom into a call to "
		<< ore::NV("NewFunction", CmpCall->getCalledFunction())
		<< "() function";
		});
		++NumBCmp;
		}

		/// Recognizes a bcmp idiom in a non-countable loop.
		///
		/// If detected, transforms the relevant code to issue the bcmp (or memcmp)
		/// intrinsic function call, and returns true; otherwise, returns false.
		bool LoopIdiomRecognize::recognizeBCmp() {
		if (!HasMemCmp && !HasBCmp)
		return false;

		ICmpInst *BCmpInst;
		CmpInst *LatchCmpInst;
		LoadInst LoadA, LoadB;
		const SCEV SrcA, SrcB, *NBytes;
		if (!detectBCmpIdiom(BCmpInst, LatchCmpInst, LoadA, LoadB, SrcA, SrcB,
		NBytes)) {
		LLVM_DEBUG(dbgs() << "bcmp idiom recognition failed.\n");
		return false;
		}

		transformLoopToBCmp(BCmpInst, LatchCmpInst, LoadA, LoadB, SrcA, SrcB, NBytes);
		return true;
		}

llvm/test/Transforms/LoopIdiom/bcmp-basic.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-idiom < %s -S \| FileCheck %s		; RUN: opt -loop-idiom -verify -verify-each -verify-dom-info -verify-loop-info < %s -S \| FileCheck %s

target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"		target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"

; This only tests various forms of loop control-flow.		; This only tests various forms of loop control-flow.
; These tests are fully identical to bcmp()==0 otherwise.		; These tests are fully identical to bcmp()==0 otherwise.

; #include <algorithm>		; #include <algorithm>
;		;
▲ Show 20 Lines • Show All 223 Lines • ▼ Show 20 Lines
; if(v0 != v1)		; if(v0 != v1)
; return false;		; return false;
; }		; }
; return true;		; return true;
; }		; }

define i1 @_Z39pointer_iteration_const_size_no_overlapPKc(i8* %ptr) {		define i1 @_Z39pointer_iteration_const_size_no_overlapPKc(i8* %ptr) {
; CHECK-LABEL: @_Z39pointer_iteration_const_size_no_overlapPKc(		; CHECK-LABEL: @_Z39pointer_iteration_const_size_no_overlapPKc(
; CHECK-NEXT: entry:		; CHECK-NEXT: for.body.i.i.bcmpdispatchbb:
; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 8		; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 8
; CHECK-NEXT: br label [[FOR_BODY_I_I:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR]], i8* [[ADD_PTR]], i64 8)
; CHECK: for.body.i.i:		; CHECK-NEXT: [[PTR_VS_ADD_PTR_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[__FIRST2_ADDR_07_I_I:%.]] = phi i8 [ [[INCDEC_PTR1_I_I:%.]], [[FOR_INC_I_I:%.]] ], [ [[ADD_PTR]], [[ENTRY:%.*]] ]		; CHECK-NEXT: br i1 [[PTR_VS_ADD_PTR_EQCMP]], label [[PTR_VS_ADD_PTR_EQCMP_EQUALBB:%.]], label [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I_IDX:%.]] = phi i64 [ [[__FIRST1_ADDR_06_I_I_ADD:%.]], [[FOR_INC_I_I]] ], [ 0, [[ENTRY]] ]		; CHECK: ptr.vs.add.ptr.eqcmp.equalbb:
; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR]], i64 [[__FIRST1_ADDR_06_I_I_IDX]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.*]]
; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[__FIRST1_ADDR_06_I_I_PTR]]		; CHECK: ptr.vs.add.ptr.eqcmp.unequalbb:
; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[__FIRST2_ADDR_07_I_I]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]]
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp eq i8 [[V0]], [[V1]]
; CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_INC_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.*]]
; CHECK: for.inc.i.i:
; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I_ADD]] = add nuw nsw i64 [[__FIRST1_ADDR_06_I_I_IDX]], 1
; CHECK-NEXT: [[INCDEC_PTR1_I_I]] = getelementptr inbounds i8, i8* [[__FIRST2_ADDR_07_I_I]], i64 1
; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp eq i64 [[__FIRST1_ADDR_06_I_I_ADD]], 8
; CHECK-NEXT: br i1 [[CMP_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]], label [[FOR_BODY_I_I]]
; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:		; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:
; CHECK-NEXT: [[RETVAL_0_I_I:%.*]] = phi i1 [ false, [[FOR_BODY_I_I]] ], [ true, [[FOR_INC_I_I]] ]		; CHECK-NEXT: [[RETVAL_0_I_I:%.*]] = phi i1 [ false, [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB]] ], [ true, [[PTR_VS_ADD_PTR_EQCMP_EQUALBB]] ]
; CHECK-NEXT: ret i1 [[RETVAL_0_I_I]]		; CHECK-NEXT: ret i1 [[RETVAL_0_I_I]]
;		;
entry:		entry:
%add.ptr = getelementptr inbounds i8, i8* %ptr, i64 8		%add.ptr = getelementptr inbounds i8, i8* %ptr, i64 8
br label %for.body.i.i		br label %for.body.i.i

for.body.i.i: ; preds = %for.inc.i.i, %entry		for.body.i.i: ; preds = %for.inc.i.i, %entry
%__first2.addr.07.i.i = phi i8* [ %incdec.ptr1.i.i, %for.inc.i.i ], [ %add.ptr, %entry ]		%__first2.addr.07.i.i = phi i8* [ %incdec.ptr1.i.i, %for.inc.i.i ], [ %add.ptr, %entry ]
Show All 12 Lines

_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit: ; preds = %for.body.i.i, %for.inc.i.i		_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit: ; preds = %for.body.i.i, %for.inc.i.i
%retval.0.i.i = phi i1 [ false, %for.body.i.i ], [ true, %for.inc.i.i ]		%retval.0.i.i = phi i1 [ false, %for.body.i.i ], [ true, %for.inc.i.i ]
ret i1 %retval.0.i.i		ret i1 %retval.0.i.i
}		}

define i1 @_Z44pointer_iteration_const_size_partial_overlapPKc(i8* %ptr) {		define i1 @_Z44pointer_iteration_const_size_partial_overlapPKc(i8* %ptr) {
; CHECK-LABEL: @_Z44pointer_iteration_const_size_partial_overlapPKc(		; CHECK-LABEL: @_Z44pointer_iteration_const_size_partial_overlapPKc(
; CHECK-NEXT: entry:		; CHECK-NEXT: for.body.i.i.bcmpdispatchbb:
; CHECK-NEXT: [[ADD_PTR1:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 8		; CHECK-NEXT: [[ADD_PTR1:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 8
; CHECK-NEXT: br label [[FOR_BODY_I_I:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR]], i8* [[ADD_PTR1]], i64 16)
; CHECK: for.body.i.i:		; CHECK-NEXT: [[PTR_VS_ADD_PTR1_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[__FIRST2_ADDR_07_I_I:%.]] = phi i8 [ [[INCDEC_PTR1_I_I:%.]], [[FOR_INC_I_I:%.]] ], [ [[ADD_PTR1]], [[ENTRY:%.*]] ]		; CHECK-NEXT: br i1 [[PTR_VS_ADD_PTR1_EQCMP]], label [[PTR_VS_ADD_PTR1_EQCMP_EQUALBB:%.]], label [[PTR_VS_ADD_PTR1_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I_IDX:%.]] = phi i64 [ [[__FIRST1_ADDR_06_I_I_ADD:%.]], [[FOR_INC_I_I]] ], [ 0, [[ENTRY]] ]		; CHECK: ptr.vs.add.ptr1.eqcmp.equalbb:
; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR]], i64 [[__FIRST1_ADDR_06_I_I_IDX]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.*]]
; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[__FIRST1_ADDR_06_I_I_PTR]]		; CHECK: ptr.vs.add.ptr1.eqcmp.unequalbb:
; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[__FIRST2_ADDR_07_I_I]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]]
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp eq i8 [[V0]], [[V1]]
; CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_INC_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.*]]
; CHECK: for.inc.i.i:
; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I_ADD]] = add nuw nsw i64 [[__FIRST1_ADDR_06_I_I_IDX]], 1
; CHECK-NEXT: [[INCDEC_PTR1_I_I]] = getelementptr inbounds i8, i8* [[__FIRST2_ADDR_07_I_I]], i64 1
; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp eq i64 [[__FIRST1_ADDR_06_I_I_ADD]], 16
; CHECK-NEXT: br i1 [[CMP_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]], label [[FOR_BODY_I_I]]
; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:		; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:
; CHECK-NEXT: [[RETVAL_0_I_I:%.*]] = phi i1 [ false, [[FOR_BODY_I_I]] ], [ true, [[FOR_INC_I_I]] ]		; CHECK-NEXT: [[RETVAL_0_I_I:%.*]] = phi i1 [ false, [[PTR_VS_ADD_PTR1_EQCMP_UNEQUALBB]] ], [ true, [[PTR_VS_ADD_PTR1_EQCMP_EQUALBB]] ]
; CHECK-NEXT: ret i1 [[RETVAL_0_I_I]]		; CHECK-NEXT: ret i1 [[RETVAL_0_I_I]]
;		;
entry:		entry:
%add.ptr1 = getelementptr inbounds i8, i8* %ptr, i64 8		%add.ptr1 = getelementptr inbounds i8, i8* %ptr, i64 8
br label %for.body.i.i		br label %for.body.i.i

for.body.i.i: ; preds = %for.inc.i.i, %entry		for.body.i.i: ; preds = %for.inc.i.i, %entry
%__first2.addr.07.i.i = phi i8* [ %incdec.ptr1.i.i, %for.inc.i.i ], [ %add.ptr1, %entry ]		%__first2.addr.07.i.i = phi i8* [ %incdec.ptr1.i.i, %for.inc.i.i ], [ %add.ptr1, %entry ]
Show All 12 Lines

_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit: ; preds = %for.body.i.i, %for.inc.i.i		_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit: ; preds = %for.body.i.i, %for.inc.i.i
%retval.0.i.i = phi i1 [ false, %for.body.i.i ], [ true, %for.inc.i.i ]		%retval.0.i.i = phi i1 [ false, %for.body.i.i ], [ true, %for.inc.i.i ]
ret i1 %retval.0.i.i		ret i1 %retval.0.i.i
}		}

define i1 @_Z44pointer_iteration_const_size_overlap_unknownPKcS0_(i8* %ptr0, i8* %ptr1) {		define i1 @_Z44pointer_iteration_const_size_overlap_unknownPKcS0_(i8* %ptr0, i8* %ptr1) {
; CHECK-LABEL: @_Z44pointer_iteration_const_size_overlap_unknownPKcS0_(		; CHECK-LABEL: @_Z44pointer_iteration_const_size_overlap_unknownPKcS0_(
; CHECK-NEXT: entry:		; CHECK-NEXT: for.body.i.i.bcmpdispatchbb:
; CHECK-NEXT: br label [[FOR_BODY_I_I:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR0:%.]], i8 [[PTR1:%.*]], i64 8)
; CHECK: for.body.i.i:		; CHECK-NEXT: [[PTR0_VS_PTR1_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[__FIRST2_ADDR_07_I_I:%.]] = phi i8 [ [[INCDEC_PTR1_I_I:%.]], [[FOR_INC_I_I:%.]] ], [ [[PTR1:%.]], [[ENTRY:%.]] ]		; CHECK-NEXT: br i1 [[PTR0_VS_PTR1_EQCMP]], label [[PTR0_VS_PTR1_EQCMP_EQUALBB:%.]], label [[PTR0_VS_PTR1_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I_IDX:%.]] = phi i64 [ [[__FIRST1_ADDR_06_I_I_ADD:%.]], [[FOR_INC_I_I]] ], [ 0, [[ENTRY]] ]		; CHECK: ptr0.vs.ptr1.eqcmp.equalbb:
; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR0:%.*]], i64 [[__FIRST1_ADDR_06_I_I_IDX]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.*]]
; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[__FIRST1_ADDR_06_I_I_PTR]]		; CHECK: ptr0.vs.ptr1.eqcmp.unequalbb:
; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[__FIRST2_ADDR_07_I_I]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]]
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp eq i8 [[V0]], [[V1]]
; CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_INC_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.*]]
; CHECK: for.inc.i.i:
; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I_ADD]] = add nuw nsw i64 [[__FIRST1_ADDR_06_I_I_IDX]], 1
; CHECK-NEXT: [[INCDEC_PTR1_I_I]] = getelementptr inbounds i8, i8* [[__FIRST2_ADDR_07_I_I]], i64 1
; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp eq i64 [[__FIRST1_ADDR_06_I_I_ADD]], 8
; CHECK-NEXT: br i1 [[CMP_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]], label [[FOR_BODY_I_I]]
; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:		; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:
; CHECK-NEXT: [[RETVAL_0_I_I:%.*]] = phi i1 [ false, [[FOR_BODY_I_I]] ], [ true, [[FOR_INC_I_I]] ]		; CHECK-NEXT: [[RETVAL_0_I_I:%.*]] = phi i1 [ false, [[PTR0_VS_PTR1_EQCMP_UNEQUALBB]] ], [ true, [[PTR0_VS_PTR1_EQCMP_EQUALBB]] ]
; CHECK-NEXT: ret i1 [[RETVAL_0_I_I]]		; CHECK-NEXT: ret i1 [[RETVAL_0_I_I]]
;		;
entry:		entry:
br label %for.body.i.i		br label %for.body.i.i

for.body.i.i: ; preds = %for.inc.i.i, %entry		for.body.i.i: ; preds = %for.inc.i.i, %entry
%__first2.addr.07.i.i = phi i8* [ %incdec.ptr1.i.i, %for.inc.i.i ], [ %ptr1, %entry ]		%__first2.addr.07.i.i = phi i8* [ %incdec.ptr1.i.i, %for.inc.i.i ], [ %ptr1, %entry ]
%__first1.addr.06.i.i.idx = phi i64 [ %__first1.addr.06.i.i.add, %for.inc.i.i ], [ 0, %entry ]		%__first1.addr.06.i.i.idx = phi i64 [ %__first1.addr.06.i.i.add, %for.inc.i.i ], [ 0, %entry ]
Show All 12 Lines
_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit: ; preds = %for.body.i.i, %for.inc.i.i		_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit: ; preds = %for.body.i.i, %for.inc.i.i
%retval.0.i.i = phi i1 [ false, %for.body.i.i ], [ true, %for.inc.i.i ]		%retval.0.i.i = phi i1 [ false, %for.body.i.i ], [ true, %for.inc.i.i ]
ret i1 %retval.0.i.i		ret i1 %retval.0.i.i
}		}

define i1 @_Z42pointer_iteration_variable_size_no_overlapPKcm(i8* %ptr, i64 %count) {		define i1 @_Z42pointer_iteration_variable_size_no_overlapPKcm(i8* %ptr, i64 %count) {
; CHECK-LABEL: @_Z42pointer_iteration_variable_size_no_overlapPKcm(		; CHECK-LABEL: @_Z42pointer_iteration_variable_size_no_overlapPKcm(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.]], i64 [[COUNT:%.]]		; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.]], i64 [[COUNT_BYTECOUNT:%.]]
; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[COUNT]], 0		; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[COUNT_BYTECOUNT]], 0
; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.]], label [[FOR_BODY_I_I_PREHEADER:%.]]		; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.]], label [[FOR_BODY_I_I_BCMPDISPATCHBB:%.]]
; CHECK: for.body.i.i.preheader:		; CHECK: for.body.i.i.bcmpdispatchbb:
; CHECK-NEXT: br label [[FOR_BODY_I_I:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR]], i8* [[ADD_PTR]], i64 [[COUNT_BYTECOUNT]])
; CHECK: for.body.i.i:		; CHECK-NEXT: [[PTR_VS_ADD_PTR_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[__FIRST2_ADDR_07_I_I:%.]] = phi i8 [ [[INCDEC_PTR1_I_I:%.]], [[FOR_INC_I_I:%.]] ], [ [[ADD_PTR]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK-NEXT: br i1 [[PTR_VS_ADD_PTR_EQCMP]], label [[PTR_VS_ADD_PTR_EQCMP_EQUALBB:%.]], label [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I:%.]] = phi i8 [ [[INCDEC_PTR_I_I:%.*]], [[FOR_INC_I_I]] ], [ [[PTR]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK: ptr.vs.add.ptr.eqcmp.equalbb:
; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[__FIRST1_ADDR_06_I_I]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT:%.*]]
; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[__FIRST2_ADDR_07_I_I]]		; CHECK: ptr.vs.add.ptr.eqcmp.unequalbb:
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp eq i8 [[V0]], [[V1]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]]
; CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_INC_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT:%.*]]
; CHECK: for.inc.i.i:
; CHECK-NEXT: [[INCDEC_PTR_I_I]] = getelementptr inbounds i8, i8* [[__FIRST1_ADDR_06_I_I]], i64 1
; CHECK-NEXT: [[INCDEC_PTR1_I_I]] = getelementptr inbounds i8, i8* [[__FIRST2_ADDR_07_I_I]], i64 1
; CHECK-NEXT: [[CMP_I_I:%.]] = icmp eq i8 [[INCDEC_PTR_I_I]], [[ADD_PTR]]
; CHECK-NEXT: br i1 [[CMP_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]], label [[FOR_BODY_I_I]]
; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit.loopexit:		; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit.loopexit:
; CHECK-NEXT: [[RETVAL_0_I_I_PH:%.*]] = phi i1 [ false, [[FOR_BODY_I_I]] ], [ true, [[FOR_INC_I_I]] ]		; CHECK-NEXT: [[RETVAL_0_I_I_PH:%.*]] = phi i1 [ false, [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB]] ], [ true, [[PTR_VS_ADD_PTR_EQCMP_EQUALBB]] ]
; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]]
; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:		; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:
; CHECK-NEXT: [[RETVAL_0_I_I:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RETVAL_0_I_I_PH]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ]		; CHECK-NEXT: [[RETVAL_0_I_I:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RETVAL_0_I_I_PH]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ]
; CHECK-NEXT: ret i1 [[RETVAL_0_I_I]]		; CHECK-NEXT: ret i1 [[RETVAL_0_I_I]]
;		;
entry:		entry:
%add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %count		%add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %count
%cmp5.i.i = icmp eq i64 %count, 0		%cmp5.i.i = icmp eq i64 %count, 0
Show All 16 Lines
_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit: ; preds = %for.body.i.i, %for.inc.i.i, %entry		_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit: ; preds = %for.body.i.i, %for.inc.i.i, %entry
%retval.0.i.i = phi i1 [ true, %entry ], [ true, %for.inc.i.i ], [ false, %for.body.i.i ]		%retval.0.i.i = phi i1 [ true, %entry ], [ true, %for.inc.i.i ], [ false, %for.body.i.i ]
ret i1 %retval.0.i.i		ret i1 %retval.0.i.i
}		}

define i1 @_Z47pointer_iteration_variable_size_partial_overlapPKcm(i8* %ptr, i64 %count) {		define i1 @_Z47pointer_iteration_variable_size_partial_overlapPKcm(i8* %ptr, i64 %count) {
; CHECK-LABEL: @_Z47pointer_iteration_variable_size_partial_overlapPKcm(		; CHECK-LABEL: @_Z47pointer_iteration_variable_size_partial_overlapPKcm(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[MUL:%.]] = shl i64 [[COUNT:%.]], 1		; CHECK-NEXT: [[MUL_BYTECOUNT:%.]] = shl i64 [[COUNT:%.]], 1
; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 [[MUL]]		; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 [[MUL_BYTECOUNT]]
; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[MUL]], 0		; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[MUL_BYTECOUNT]], 0
; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.]], label [[FOR_BODY_I_I_PREHEADER:%.]]		; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.]], label [[FOR_BODY_I_I_BCMPDISPATCHBB:%.]]
; CHECK: for.body.i.i.preheader:		; CHECK: for.body.i.i.bcmpdispatchbb:
; CHECK-NEXT: [[ADD_PTR1:%.]] = getelementptr inbounds i8, i8 [[PTR]], i64 [[COUNT]]		; CHECK-NEXT: [[ADD_PTR1:%.]] = getelementptr inbounds i8, i8 [[PTR]], i64 [[COUNT]]
; CHECK-NEXT: br label [[FOR_BODY_I_I:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR]], i8* [[ADD_PTR1]], i64 [[MUL_BYTECOUNT]])
; CHECK: for.body.i.i:		; CHECK-NEXT: [[PTR_VS_ADD_PTR1_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[__FIRST2_ADDR_07_I_I:%.]] = phi i8 [ [[INCDEC_PTR1_I_I:%.]], [[FOR_INC_I_I:%.]] ], [ [[ADD_PTR1]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK-NEXT: br i1 [[PTR_VS_ADD_PTR1_EQCMP]], label [[PTR_VS_ADD_PTR1_EQCMP_EQUALBB:%.]], label [[PTR_VS_ADD_PTR1_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I:%.]] = phi i8 [ [[INCDEC_PTR_I_I:%.*]], [[FOR_INC_I_I]] ], [ [[PTR]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK: ptr.vs.add.ptr1.eqcmp.equalbb:
; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[__FIRST1_ADDR_06_I_I]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT:%.*]]
; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[__FIRST2_ADDR_07_I_I]]		; CHECK: ptr.vs.add.ptr1.eqcmp.unequalbb:
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp eq i8 [[V0]], [[V1]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]]
; CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_INC_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT:%.*]]
; CHECK: for.inc.i.i:
; CHECK-NEXT: [[INCDEC_PTR_I_I]] = getelementptr inbounds i8, i8* [[__FIRST1_ADDR_06_I_I]], i64 1
; CHECK-NEXT: [[INCDEC_PTR1_I_I]] = getelementptr inbounds i8, i8* [[__FIRST2_ADDR_07_I_I]], i64 1
; CHECK-NEXT: [[CMP_I_I:%.]] = icmp eq i8 [[INCDEC_PTR_I_I]], [[ADD_PTR]]
; CHECK-NEXT: br i1 [[CMP_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]], label [[FOR_BODY_I_I]]
; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit.loopexit:		; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit.loopexit:
; CHECK-NEXT: [[RETVAL_0_I_I_PH:%.*]] = phi i1 [ false, [[FOR_BODY_I_I]] ], [ true, [[FOR_INC_I_I]] ]		; CHECK-NEXT: [[RETVAL_0_I_I_PH:%.*]] = phi i1 [ false, [[PTR_VS_ADD_PTR1_EQCMP_UNEQUALBB]] ], [ true, [[PTR_VS_ADD_PTR1_EQCMP_EQUALBB]] ]
; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]]
; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:		; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:
; CHECK-NEXT: [[RETVAL_0_I_I:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RETVAL_0_I_I_PH]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ]		; CHECK-NEXT: [[RETVAL_0_I_I:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RETVAL_0_I_I_PH]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ]
; CHECK-NEXT: ret i1 [[RETVAL_0_I_I]]		; CHECK-NEXT: ret i1 [[RETVAL_0_I_I]]
;		;
entry:		entry:
%mul = shl i64 %count, 1		%mul = shl i64 %count, 1
%add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %mul		%add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %mul
Show All 21 Lines
_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit: ; preds = %for.body.i.i, %for.inc.i.i, %entry		_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit: ; preds = %for.body.i.i, %for.inc.i.i, %entry
%retval.0.i.i = phi i1 [ true, %entry ], [ true, %for.inc.i.i ], [ false, %for.body.i.i ]		%retval.0.i.i = phi i1 [ true, %entry ], [ true, %for.inc.i.i ], [ false, %for.body.i.i ]
ret i1 %retval.0.i.i		ret i1 %retval.0.i.i
}		}

define i1 @_Z47pointer_iteration_variable_size_overlap_unknownPKcS0_m(i8* %ptr0, i8* %ptr1, i64 %count) {		define i1 @_Z47pointer_iteration_variable_size_overlap_unknownPKcS0_m(i8* %ptr0, i8* %ptr1, i64 %count) {
; CHECK-LABEL: @_Z47pointer_iteration_variable_size_overlap_unknownPKcS0_m(		; CHECK-LABEL: @_Z47pointer_iteration_variable_size_overlap_unknownPKcS0_m(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR0:%.]], i64 [[COUNT:%.]]		; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR0:%.]], i64 [[COUNT_BYTECOUNT:%.]]
; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[COUNT]], 0		; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[COUNT_BYTECOUNT]], 0
; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.]], label [[FOR_BODY_I_I_PREHEADER:%.]]		; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.]], label [[FOR_BODY_I_I_BCMPDISPATCHBB:%.]]
; CHECK: for.body.i.i.preheader:		; CHECK: for.body.i.i.bcmpdispatchbb:
; CHECK-NEXT: br label [[FOR_BODY_I_I:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR0]], i8* [[PTR1:%.*]], i64 [[COUNT_BYTECOUNT]])
; CHECK: for.body.i.i:		; CHECK-NEXT: [[PTR0_VS_PTR1_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[__FIRST2_ADDR_07_I_I:%.]] = phi i8 [ [[INCDEC_PTR1_I_I:%.]], [[FOR_INC_I_I:%.]] ], [ [[PTR1:%.*]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK-NEXT: br i1 [[PTR0_VS_PTR1_EQCMP]], label [[PTR0_VS_PTR1_EQCMP_EQUALBB:%.]], label [[PTR0_VS_PTR1_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I:%.]] = phi i8 [ [[INCDEC_PTR_I_I:%.*]], [[FOR_INC_I_I]] ], [ [[PTR0]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK: ptr0.vs.ptr1.eqcmp.equalbb:
; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[__FIRST1_ADDR_06_I_I]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT:%.*]]
; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[__FIRST2_ADDR_07_I_I]]		; CHECK: ptr0.vs.ptr1.eqcmp.unequalbb:
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp eq i8 [[V0]], [[V1]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]]
; CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_INC_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT:%.*]]
; CHECK: for.inc.i.i:
; CHECK-NEXT: [[INCDEC_PTR_I_I]] = getelementptr inbounds i8, i8* [[__FIRST1_ADDR_06_I_I]], i64 1
; CHECK-NEXT: [[INCDEC_PTR1_I_I]] = getelementptr inbounds i8, i8* [[__FIRST2_ADDR_07_I_I]], i64 1
; CHECK-NEXT: [[CMP_I_I:%.]] = icmp eq i8 [[INCDEC_PTR_I_I]], [[ADD_PTR]]
; CHECK-NEXT: br i1 [[CMP_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]], label [[FOR_BODY_I_I]]
; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit.loopexit:		; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit.loopexit:
; CHECK-NEXT: [[RETVAL_0_I_I_PH:%.*]] = phi i1 [ false, [[FOR_BODY_I_I]] ], [ true, [[FOR_INC_I_I]] ]		; CHECK-NEXT: [[RETVAL_0_I_I_PH:%.*]] = phi i1 [ false, [[PTR0_VS_PTR1_EQCMP_UNEQUALBB]] ], [ true, [[PTR0_VS_PTR1_EQCMP_EQUALBB]] ]
; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]]
; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:		; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:
; CHECK-NEXT: [[RETVAL_0_I_I:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RETVAL_0_I_I_PH]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ]		; CHECK-NEXT: [[RETVAL_0_I_I:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RETVAL_0_I_I_PH]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ]
; CHECK-NEXT: ret i1 [[RETVAL_0_I_I]]		; CHECK-NEXT: ret i1 [[RETVAL_0_I_I]]
;		;
entry:		entry:
%add.ptr = getelementptr inbounds i8, i8* %ptr0, i64 %count		%add.ptr = getelementptr inbounds i8, i8* %ptr0, i64 %count
%cmp5.i.i = icmp eq i64 %count, 0		%cmp5.i.i = icmp eq i64 %count, 0
Show All 15 Lines

_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit: ; preds = %for.body.i.i, %for.inc.i.i, %entry		_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit: ; preds = %for.body.i.i, %for.inc.i.i, %entry
%retval.0.i.i = phi i1 [ true, %entry ], [ true, %for.inc.i.i ], [ false, %for.body.i.i ]		%retval.0.i.i = phi i1 [ true, %entry ], [ true, %for.inc.i.i ], [ false, %for.body.i.i ]
ret i1 %retval.0.i.i		ret i1 %retval.0.i.i
}		}

define i1 @_Z40index_iteration_eq_const_size_no_overlapPKc(i8* %ptr) {		define i1 @_Z40index_iteration_eq_const_size_no_overlapPKc(i8* %ptr) {
; CHECK-LABEL: @_Z40index_iteration_eq_const_size_no_overlapPKc(		; CHECK-LABEL: @_Z40index_iteration_eq_const_size_no_overlapPKc(
; CHECK-NEXT: entry:		; CHECK-NEXT: for.body.bcmpdispatchbb:
; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 8		; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 8
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR]], i8* [[ADD_PTR]], i64 8)
; CHECK: for.cond:		; CHECK-NEXT: [[PTR_VS_ADD_PTR_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[CMP:%.]] = icmp ult i64 [[INC:%.]], 8		; CHECK-NEXT: br i1 [[PTR_VS_ADD_PTR_EQCMP]], label [[PTR_VS_ADD_PTR_EQCMP_EQUALBB:%.]], label [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP:%.*]]		; CHECK: ptr.vs.add.ptr.eqcmp.equalbb:
; CHECK: for.body:		; CHECK-NEXT: br label [[CLEANUP:%.*]]
; CHECK-NEXT: [[I_013:%.]] = phi i64 [ 0, [[ENTRY:%.]] ], [ [[INC]], [[FOR_COND:%.*]] ]		; CHECK: ptr.vs.add.ptr.eqcmp.unequalbb:
; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i8, i8 [[PTR]], i64 [[I_013]]		; CHECK-NEXT: br label [[CLEANUP]]
; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[ARRAYIDX]]
; CHECK-NEXT: [[ARRAYIDX1:%.]] = getelementptr inbounds i8, i8 [[ADD_PTR]], i64 [[I_013]]
; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[ARRAYIDX1]]
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[V0]], [[V1]]
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_013]], 1
; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_COND]], label [[CLEANUP]]
; CHECK: cleanup:		; CHECK: cleanup:
; CHECK-NEXT: [[RES:%.*]] = phi i1 [ false, [[FOR_BODY]] ], [ true, [[FOR_COND]] ]		; CHECK-NEXT: [[RES:%.*]] = phi i1 [ false, [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB]] ], [ true, [[PTR_VS_ADD_PTR_EQCMP_EQUALBB]] ]
; CHECK-NEXT: ret i1 [[RES]]		; CHECK-NEXT: ret i1 [[RES]]
;		;
entry:		entry:
%add.ptr = getelementptr inbounds i8, i8* %ptr, i64 8		%add.ptr = getelementptr inbounds i8, i8* %ptr, i64 8
br label %for.body		br label %for.body

for.cond: ; preds = %for.body		for.cond: ; preds = %for.body
%cmp = icmp ult i64 %inc, 8		%cmp = icmp ult i64 %inc, 8
Show All 11 Lines

cleanup: ; preds = %for.cond, %for.body		cleanup: ; preds = %for.cond, %for.body
%res = phi i1 [ false, %for.body ], [ true, %for.cond ]		%res = phi i1 [ false, %for.body ], [ true, %for.cond ]
ret i1 %res		ret i1 %res
}		}

define i1 @_Z45index_iteration_eq_const_size_partial_overlapPKc(i8* %ptr) {		define i1 @_Z45index_iteration_eq_const_size_partial_overlapPKc(i8* %ptr) {
; CHECK-LABEL: @_Z45index_iteration_eq_const_size_partial_overlapPKc(		; CHECK-LABEL: @_Z45index_iteration_eq_const_size_partial_overlapPKc(
; CHECK-NEXT: entry:		; CHECK-NEXT: for.body.bcmpdispatchbb:
; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 8		; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 8
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR]], i8* [[ADD_PTR]], i64 16)
; CHECK: for.cond:		; CHECK-NEXT: [[PTR_VS_ADD_PTR_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[CMP:%.]] = icmp ult i64 [[INC:%.]], 16		; CHECK-NEXT: br i1 [[PTR_VS_ADD_PTR_EQCMP]], label [[PTR_VS_ADD_PTR_EQCMP_EQUALBB:%.]], label [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP:%.*]]		; CHECK: ptr.vs.add.ptr.eqcmp.equalbb:
; CHECK: for.body:		; CHECK-NEXT: br label [[CLEANUP:%.*]]
; CHECK-NEXT: [[I_013:%.]] = phi i64 [ 0, [[ENTRY:%.]] ], [ [[INC]], [[FOR_COND:%.*]] ]		; CHECK: ptr.vs.add.ptr.eqcmp.unequalbb:
; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i8, i8 [[PTR]], i64 [[I_013]]		; CHECK-NEXT: br label [[CLEANUP]]
; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[ARRAYIDX]]
; CHECK-NEXT: [[ARRAYIDX1:%.]] = getelementptr inbounds i8, i8 [[ADD_PTR]], i64 [[I_013]]
; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[ARRAYIDX1]]
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[V0]], [[V1]]
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_013]], 1
; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_COND]], label [[CLEANUP]]
; CHECK: cleanup:		; CHECK: cleanup:
; CHECK-NEXT: [[RES:%.*]] = phi i1 [ false, [[FOR_BODY]] ], [ true, [[FOR_COND]] ]		; CHECK-NEXT: [[RES:%.*]] = phi i1 [ false, [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB]] ], [ true, [[PTR_VS_ADD_PTR_EQCMP_EQUALBB]] ]
; CHECK-NEXT: ret i1 [[RES]]		; CHECK-NEXT: ret i1 [[RES]]
;		;
entry:		entry:
%add.ptr = getelementptr inbounds i8, i8* %ptr, i64 8		%add.ptr = getelementptr inbounds i8, i8* %ptr, i64 8
br label %for.body		br label %for.body

for.cond: ; preds = %for.body		for.cond: ; preds = %for.body
%cmp = icmp ult i64 %inc, 16		%cmp = icmp ult i64 %inc, 16
Show All 11 Lines

cleanup: ; preds = %for.cond, %for.body		cleanup: ; preds = %for.cond, %for.body
%res = phi i1 [ false, %for.body ], [ true, %for.cond ]		%res = phi i1 [ false, %for.body ], [ true, %for.cond ]
ret i1 %res		ret i1 %res
}		}

define i1 @_Z45index_iteration_eq_const_size_overlap_unknownPKcS0_(i8* %ptr0, i8* %ptr1) {		define i1 @_Z45index_iteration_eq_const_size_overlap_unknownPKcS0_(i8* %ptr0, i8* %ptr1) {
; CHECK-LABEL: @_Z45index_iteration_eq_const_size_overlap_unknownPKcS0_(		; CHECK-LABEL: @_Z45index_iteration_eq_const_size_overlap_unknownPKcS0_(
; CHECK-NEXT: entry:		; CHECK-NEXT: for.body.bcmpdispatchbb:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR0:%.]], i8 [[PTR1:%.*]], i64 8)
; CHECK: for.cond:		; CHECK-NEXT: [[PTR0_VS_PTR1_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[CMP:%.]] = icmp ult i64 [[INC:%.]], 8		; CHECK-NEXT: br i1 [[PTR0_VS_PTR1_EQCMP]], label [[PTR0_VS_PTR1_EQCMP_EQUALBB:%.]], label [[PTR0_VS_PTR1_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP:%.*]]		; CHECK: ptr0.vs.ptr1.eqcmp.equalbb:
; CHECK: for.body:		; CHECK-NEXT: br label [[CLEANUP:%.*]]
; CHECK-NEXT: [[I_08:%.]] = phi i64 [ 0, [[ENTRY:%.]] ], [ [[INC]], [[FOR_COND:%.*]] ]		; CHECK: ptr0.vs.ptr1.eqcmp.unequalbb:
; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i8, i8 [[PTR0:%.*]], i64 [[I_08]]		; CHECK-NEXT: br label [[CLEANUP]]
; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[ARRAYIDX]]
; CHECK-NEXT: [[ARRAYIDX1:%.]] = getelementptr inbounds i8, i8 [[PTR1:%.*]], i64 [[I_08]]
; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[ARRAYIDX1]]
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[V0]], [[V1]]
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_COND]], label [[CLEANUP]]
; CHECK: cleanup:		; CHECK: cleanup:
; CHECK-NEXT: [[RES:%.*]] = phi i1 [ false, [[FOR_BODY]] ], [ true, [[FOR_COND]] ]		; CHECK-NEXT: [[RES:%.*]] = phi i1 [ false, [[PTR0_VS_PTR1_EQCMP_UNEQUALBB]] ], [ true, [[PTR0_VS_PTR1_EQCMP_EQUALBB]] ]
; CHECK-NEXT: ret i1 [[RES]]		; CHECK-NEXT: ret i1 [[RES]]
;		;
entry:		entry:
br label %for.body		br label %for.body

for.cond: ; preds = %for.body		for.cond: ; preds = %for.body
%cmp = icmp ult i64 %inc, 8		%cmp = icmp ult i64 %inc, 8
br i1 %cmp, label %for.body, label %cleanup		br i1 %cmp, label %for.body, label %cleanup
Show All 11 Lines
cleanup: ; preds = %for.cond, %for.body		cleanup: ; preds = %for.cond, %for.body
%res = phi i1 [ false, %for.body ], [ true, %for.cond ]		%res = phi i1 [ false, %for.body ], [ true, %for.cond ]
ret i1 %res		ret i1 %res
}		}

define i1 @_Z43index_iteration_eq_variable_size_no_overlapPKcm(i8* %ptr, i64 %count) {		define i1 @_Z43index_iteration_eq_variable_size_no_overlapPKcm(i8* %ptr, i64 %count) {
; CHECK-LABEL: @_Z43index_iteration_eq_variable_size_no_overlapPKcm(		; CHECK-LABEL: @_Z43index_iteration_eq_variable_size_no_overlapPKcm(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.]], i64 [[COUNT:%.]]		; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.]], i64 [[COUNT_BYTECOUNT:%.]]
; CHECK-NEXT: [[CMP14:%.*]] = icmp eq i64 [[COUNT]], 0		; CHECK-NEXT: [[CMP14:%.*]] = icmp eq i64 [[COUNT_BYTECOUNT]], 0
; CHECK-NEXT: br i1 [[CMP14]], label [[CLEANUP:%.]], label [[FOR_BODY_PREHEADER:%.]]		; CHECK-NEXT: br i1 [[CMP14]], label [[CLEANUP:%.]], label [[FOR_BODY_BCMPDISPATCHBB:%.]]
; CHECK: for.body.preheader:		; CHECK: for.body.bcmpdispatchbb:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR]], i8* [[ADD_PTR]], i64 [[COUNT_BYTECOUNT]])
; CHECK: for.cond:		; CHECK-NEXT: [[PTR_VS_ADD_PTR_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[CMP:%.]] = icmp ult i64 [[INC:%.]], [[COUNT]]		; CHECK-NEXT: br i1 [[PTR_VS_ADD_PTR_EQCMP]], label [[PTR_VS_ADD_PTR_EQCMP_EQUALBB:%.]], label [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP_LOOPEXIT:%.*]]		; CHECK: ptr.vs.add.ptr.eqcmp.equalbb:
; CHECK: for.body:		; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT:%.*]]
; CHECK-NEXT: [[I_015:%.]] = phi i64 [ [[INC]], [[FOR_COND:%.]] ], [ 0, [[FOR_BODY_PREHEADER]] ]		; CHECK: ptr.vs.add.ptr.eqcmp.unequalbb:
; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i8, i8 [[PTR]], i64 [[I_015]]		; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT]]
; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[ARRAYIDX]]
; CHECK-NEXT: [[ARRAYIDX1:%.]] = getelementptr inbounds i8, i8 [[ADD_PTR]], i64 [[I_015]]
; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[ARRAYIDX1]]
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[V0]], [[V1]]
; CHECK-NEXT: [[INC]] = add nuw i64 [[I_015]], 1
; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_COND]], label [[CLEANUP_LOOPEXIT]]
; CHECK: cleanup.loopexit:		; CHECK: cleanup.loopexit:
; CHECK-NEXT: [[RES_PH:%.*]] = phi i1 [ false, [[FOR_BODY]] ], [ true, [[FOR_COND]] ]		; CHECK-NEXT: [[RES_PH:%.*]] = phi i1 [ false, [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB]] ], [ true, [[PTR_VS_ADD_PTR_EQCMP_EQUALBB]] ]
; CHECK-NEXT: br label [[CLEANUP]]		; CHECK-NEXT: br label [[CLEANUP]]
; CHECK: cleanup:		; CHECK: cleanup:
; CHECK-NEXT: [[RES:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RES_PH]], [[CLEANUP_LOOPEXIT]] ]		; CHECK-NEXT: [[RES:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RES_PH]], [[CLEANUP_LOOPEXIT]] ]
; CHECK-NEXT: ret i1 [[RES]]		; CHECK-NEXT: ret i1 [[RES]]
;		;
entry:		entry:
%add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %count		%add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %count
%cmp14 = icmp eq i64 %count, 0		%cmp14 = icmp eq i64 %count, 0
Show All 17 Lines	cleanup: ; preds = %for.body, %for.cond, %entry
%res = phi i1 [ true, %entry ], [ true, %for.cond ], [ false, %for.body ]		%res = phi i1 [ true, %entry ], [ true, %for.cond ], [ false, %for.body ]
ret i1 %res		ret i1 %res
}		}

define i1 @_Z48index_iteration_eq_variable_size_partial_overlapPKcm(i8* %ptr, i64 %count) {		define i1 @_Z48index_iteration_eq_variable_size_partial_overlapPKcm(i8* %ptr, i64 %count) {
; CHECK-LABEL: @_Z48index_iteration_eq_variable_size_partial_overlapPKcm(		; CHECK-LABEL: @_Z48index_iteration_eq_variable_size_partial_overlapPKcm(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.]], i64 [[COUNT:%.]]		; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.]], i64 [[COUNT:%.]]
; CHECK-NEXT: [[MUL:%.*]] = shl i64 [[COUNT]], 1		; CHECK-NEXT: [[MUL_BYTECOUNT:%.*]] = shl i64 [[COUNT]], 1
; CHECK-NEXT: [[CMP14:%.*]] = icmp eq i64 [[MUL]], 0		; CHECK-NEXT: [[CMP14:%.*]] = icmp eq i64 [[MUL_BYTECOUNT]], 0
; CHECK-NEXT: br i1 [[CMP14]], label [[CLEANUP:%.]], label [[FOR_BODY_PREHEADER:%.]]		; CHECK-NEXT: br i1 [[CMP14]], label [[CLEANUP:%.]], label [[FOR_BODY_BCMPDISPATCHBB:%.]]
; CHECK: for.body.preheader:		; CHECK: for.body.bcmpdispatchbb:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR]], i8* [[ADD_PTR]], i64 [[MUL_BYTECOUNT]])
; CHECK: for.cond:		; CHECK-NEXT: [[PTR_VS_ADD_PTR_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[CMP:%.]] = icmp ult i64 [[INC:%.]], [[MUL]]		; CHECK-NEXT: br i1 [[PTR_VS_ADD_PTR_EQCMP]], label [[PTR_VS_ADD_PTR_EQCMP_EQUALBB:%.]], label [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP_LOOPEXIT:%.*]]		; CHECK: ptr.vs.add.ptr.eqcmp.equalbb:
; CHECK: for.body:		; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT:%.*]]
; CHECK-NEXT: [[I_015:%.]] = phi i64 [ [[INC]], [[FOR_COND:%.]] ], [ 0, [[FOR_BODY_PREHEADER]] ]		; CHECK: ptr.vs.add.ptr.eqcmp.unequalbb:
; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i8, i8 [[PTR]], i64 [[I_015]]		; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT]]
; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[ARRAYIDX]]
; CHECK-NEXT: [[ARRAYIDX1:%.]] = getelementptr inbounds i8, i8 [[ADD_PTR]], i64 [[I_015]]
; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[ARRAYIDX1]]
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[V0]], [[V1]]
; CHECK-NEXT: [[INC]] = add nuw i64 [[I_015]], 1
; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_COND]], label [[CLEANUP_LOOPEXIT]]
; CHECK: cleanup.loopexit:		; CHECK: cleanup.loopexit:
; CHECK-NEXT: [[RES_PH:%.*]] = phi i1 [ false, [[FOR_BODY]] ], [ true, [[FOR_COND]] ]		; CHECK-NEXT: [[RES_PH:%.*]] = phi i1 [ false, [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB]] ], [ true, [[PTR_VS_ADD_PTR_EQCMP_EQUALBB]] ]
; CHECK-NEXT: br label [[CLEANUP]]		; CHECK-NEXT: br label [[CLEANUP]]
; CHECK: cleanup:		; CHECK: cleanup:
; CHECK-NEXT: [[RES:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RES_PH]], [[CLEANUP_LOOPEXIT]] ]		; CHECK-NEXT: [[RES:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RES_PH]], [[CLEANUP_LOOPEXIT]] ]
; CHECK-NEXT: ret i1 [[RES]]		; CHECK-NEXT: ret i1 [[RES]]
;		;
entry:		entry:
%add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %count		%add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %count
%mul = shl i64 %count, 1		%mul = shl i64 %count, 1
Show All 17 Lines
cleanup: ; preds = %for.body, %for.cond, %entry		cleanup: ; preds = %for.body, %for.cond, %entry
%res = phi i1 [ true, %entry ], [ true, %for.cond ], [ false, %for.body ]		%res = phi i1 [ true, %entry ], [ true, %for.cond ], [ false, %for.body ]
ret i1 %res		ret i1 %res
}		}

define i1 @_Z48index_iteration_eq_variable_size_overlap_unknownPKcS0_m(i8* %ptr0, i8* %ptr1, i64 %count) {		define i1 @_Z48index_iteration_eq_variable_size_overlap_unknownPKcS0_m(i8* %ptr0, i8* %ptr1, i64 %count) {
; CHECK-LABEL: @_Z48index_iteration_eq_variable_size_overlap_unknownPKcS0_m(		; CHECK-LABEL: @_Z48index_iteration_eq_variable_size_overlap_unknownPKcS0_m(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP8:%.]] = icmp eq i64 [[COUNT:%.]], 0		; CHECK-NEXT: [[CMP8:%.]] = icmp eq i64 [[COUNT_BYTECOUNT:%.]], 0
; CHECK-NEXT: br i1 [[CMP8]], label [[CLEANUP:%.]], label [[FOR_BODY_PREHEADER:%.]]		; CHECK-NEXT: br i1 [[CMP8]], label [[CLEANUP:%.]], label [[FOR_BODY_BCMPDISPATCHBB:%.]]
; CHECK: for.body.preheader:		; CHECK: for.body.bcmpdispatchbb:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR0:%.]], i8 [[PTR1:%.*]], i64 [[COUNT_BYTECOUNT]])
; CHECK: for.cond:		; CHECK-NEXT: [[PTR0_VS_PTR1_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[CMP:%.]] = icmp ult i64 [[INC:%.]], [[COUNT]]		; CHECK-NEXT: br i1 [[PTR0_VS_PTR1_EQCMP]], label [[PTR0_VS_PTR1_EQCMP_EQUALBB:%.]], label [[PTR0_VS_PTR1_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP_LOOPEXIT:%.*]]		; CHECK: ptr0.vs.ptr1.eqcmp.equalbb:
; CHECK: for.body:		; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT:%.*]]
; CHECK-NEXT: [[I_09:%.]] = phi i64 [ [[INC]], [[FOR_COND:%.]] ], [ 0, [[FOR_BODY_PREHEADER]] ]		; CHECK: ptr0.vs.ptr1.eqcmp.unequalbb:
; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i8, i8 [[PTR0:%.*]], i64 [[I_09]]		; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT]]
; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[ARRAYIDX]]
; CHECK-NEXT: [[ARRAYIDX1:%.]] = getelementptr inbounds i8, i8 [[PTR1:%.*]], i64 [[I_09]]
; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[ARRAYIDX1]]
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[V0]], [[V1]]
; CHECK-NEXT: [[INC]] = add nuw i64 [[I_09]], 1
; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_COND]], label [[CLEANUP_LOOPEXIT]]
; CHECK: cleanup.loopexit:		; CHECK: cleanup.loopexit:
; CHECK-NEXT: [[RES_PH:%.*]] = phi i1 [ false, [[FOR_BODY]] ], [ true, [[FOR_COND]] ]		; CHECK-NEXT: [[RES_PH:%.*]] = phi i1 [ false, [[PTR0_VS_PTR1_EQCMP_UNEQUALBB]] ], [ true, [[PTR0_VS_PTR1_EQCMP_EQUALBB]] ]
; CHECK-NEXT: br label [[CLEANUP]]		; CHECK-NEXT: br label [[CLEANUP]]
; CHECK: cleanup:		; CHECK: cleanup:
; CHECK-NEXT: [[RES:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RES_PH]], [[CLEANUP_LOOPEXIT]] ]		; CHECK-NEXT: [[RES:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RES_PH]], [[CLEANUP_LOOPEXIT]] ]
; CHECK-NEXT: ret i1 [[RES]]		; CHECK-NEXT: ret i1 [[RES]]
;		;
entry:		entry:
%cmp8 = icmp eq i64 %count, 0		%cmp8 = icmp eq i64 %count, 0
br i1 %cmp8, label %cleanup, label %for.body		br i1 %cmp8, label %cleanup, label %for.body
Show All 14 Lines

cleanup: ; preds = %for.body, %for.cond, %entry		cleanup: ; preds = %for.body, %for.cond, %entry
%res = phi i1 [ true, %entry ], [ true, %for.cond ], [ false, %for.body ]		%res = phi i1 [ true, %entry ], [ true, %for.cond ], [ false, %for.body ]
ret i1 %res		ret i1 %res
}		}

define i1 @_Z38index_iteration_starting_from_negativePKcS0_(i8* %ptr0, i8* %ptr1) {		define i1 @_Z38index_iteration_starting_from_negativePKcS0_(i8* %ptr0, i8* %ptr1) {
; CHECK-LABEL: @_Z38index_iteration_starting_from_negativePKcS0_(		; CHECK-LABEL: @_Z38index_iteration_starting_from_negativePKcS0_(
; CHECK-NEXT: entry:		; CHECK-NEXT: for.body.bcmpdispatchbb:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: [[SCEVGEP:%.]] = getelementptr i8, i8 [[PTR0:%.*]], i64 -4
; CHECK: for.cond:		; CHECK-NEXT: [[SCEVGEP1:%.]] = getelementptr i8, i8 [[PTR1:%.*]], i64 -4
; CHECK-NEXT: [[CMP:%.]] = icmp slt i64 [[INDVARS_IV_NEXT:%.]], 4		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[SCEVGEP]], i8* [[SCEVGEP1]], i64 8)
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP:%.*]]		; CHECK-NEXT: [[SCEVGEP_VS_SCEVGEP1_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK: for.body:		; CHECK-NEXT: br i1 [[SCEVGEP_VS_SCEVGEP1_EQCMP]], label [[SCEVGEP_VS_SCEVGEP1_EQCMP_EQUALBB:%.]], label [[SCEVGEP_VS_SCEVGEP1_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[INDVARS_IV:%.]] = phi i64 [ -4, [[ENTRY:%.]] ], [ [[INDVARS_IV_NEXT]], [[FOR_COND:%.*]] ]		; CHECK: scevgep.vs.scevgep1.eqcmp.equalbb:
; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i8, i8 [[PTR0:%.*]], i64 [[INDVARS_IV]]		; CHECK-NEXT: br label [[CLEANUP:%.*]]
; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[ARRAYIDX]]		; CHECK: scevgep.vs.scevgep1.eqcmp.unequalbb:
; CHECK-NEXT: [[ARRAYIDX2:%.]] = getelementptr inbounds i8, i8 [[PTR1:%.*]], i64 [[INDVARS_IV]]		; CHECK-NEXT: br label [[CLEANUP]]
; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[ARRAYIDX2]]
; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i8 [[V0]], [[V1]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_COND]], label [[CLEANUP]]
; CHECK: cleanup:		; CHECK: cleanup:
; CHECK-NEXT: [[RET:%.*]] = phi i1 [ false, [[FOR_BODY]] ], [ true, [[FOR_COND]] ]		; CHECK-NEXT: [[RET:%.*]] = phi i1 [ false, [[SCEVGEP_VS_SCEVGEP1_EQCMP_UNEQUALBB]] ], [ true, [[SCEVGEP_VS_SCEVGEP1_EQCMP_EQUALBB]] ]
; CHECK-NEXT: ret i1 [[RET]]		; CHECK-NEXT: ret i1 [[RET]]
;		;
entry:		entry:
br label %for.body		br label %for.body

for.cond: ; preds = %for.body		for.cond: ; preds = %for.body
%cmp = icmp slt i64 %indvars.iv.next, 4		%cmp = icmp slt i64 %indvars.iv.next, 4
br i1 %cmp, label %for.body, label %cleanup		br i1 %cmp, label %for.body, label %cleanup
Show All 10 Lines

cleanup: ; preds = %for.cond, %for.body		cleanup: ; preds = %for.cond, %for.body
%ret = phi i1 [ false, %for.body ], [ true, %for.cond ]		%ret = phi i1 [ false, %for.body ], [ true, %for.cond ]
ret i1 %ret		ret i1 %ret
}		}

define i1 @_Z43combined_iteration_eq_const_size_no_overlapPKc(i8* %ptr) {		define i1 @_Z43combined_iteration_eq_const_size_no_overlapPKc(i8* %ptr) {
; CHECK-LABEL: @_Z43combined_iteration_eq_const_size_no_overlapPKc(		; CHECK-LABEL: @_Z43combined_iteration_eq_const_size_no_overlapPKc(
; CHECK-NEXT: entry:		; CHECK-NEXT: for.body.bcmpdispatchbb:
; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 8		; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 8
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR]], i8* [[ADD_PTR]], i64 8)
; CHECK: for.body:		; CHECK-NEXT: [[PTR_VS_ADD_PTR_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[I_015:%.]] = phi i64 [ 0, [[ENTRY:%.]] ], [ [[INC:%.]], [[FOR_INC:%.]] ]		; CHECK-NEXT: br i1 [[PTR_VS_ADD_PTR_EQCMP]], label [[PTR_VS_ADD_PTR_EQCMP_EQUALBB:%.]], label [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[PTR1_014:%.]] = phi i8 [ [[ADD_PTR]], [[ENTRY]] ], [ [[INCDEC_PTR3:%.*]], [[FOR_INC]] ]		; CHECK: ptr.vs.add.ptr.eqcmp.equalbb:
; CHECK-NEXT: [[PTR0_013:%.]] = phi i8 [ [[PTR]], [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[FOR_INC]] ]		; CHECK-NEXT: br label [[CLEANUP:%.*]]
; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[PTR0_013]]		; CHECK: ptr.vs.add.ptr.eqcmp.unequalbb:
; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[PTR1_014]]		; CHECK-NEXT: br label [[CLEANUP]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8 [[V0]], [[V1]]
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_INC]], label [[CLEANUP:%.*]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_015]], 1
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[PTR0_013]], i64 1
; CHECK-NEXT: [[INCDEC_PTR3]] = getelementptr inbounds i8, i8* [[PTR1_014]], i64 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], 8
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP]]
; CHECK: cleanup:		; CHECK: cleanup:
; CHECK-NEXT: [[RES:%.*]] = phi i1 [ false, [[FOR_BODY]] ], [ true, [[FOR_INC]] ]		; CHECK-NEXT: [[RES:%.*]] = phi i1 [ false, [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB]] ], [ true, [[PTR_VS_ADD_PTR_EQCMP_EQUALBB]] ]
; CHECK-NEXT: ret i1 [[RES]]		; CHECK-NEXT: ret i1 [[RES]]
;		;
entry:		entry:
%add.ptr = getelementptr inbounds i8, i8* %ptr, i64 8		%add.ptr = getelementptr inbounds i8, i8* %ptr, i64 8
br label %for.body		br label %for.body

for.body: ; preds = %entry, %for.inc		for.body: ; preds = %entry, %for.inc
%i.015 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]		%i.015 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
Show All 13 Lines

cleanup: ; preds = %for.inc, %for.body		cleanup: ; preds = %for.inc, %for.body
%res = phi i1 [ false, %for.body ], [ true, %for.inc ]		%res = phi i1 [ false, %for.body ], [ true, %for.inc ]
ret i1 %res		ret i1 %res
}		}

define i1 @_Z48combined_iteration_eq_const_size_partial_overlapPKc(i8* %ptr) {		define i1 @_Z48combined_iteration_eq_const_size_partial_overlapPKc(i8* %ptr) {
; CHECK-LABEL: @_Z48combined_iteration_eq_const_size_partial_overlapPKc(		; CHECK-LABEL: @_Z48combined_iteration_eq_const_size_partial_overlapPKc(
; CHECK-NEXT: entry:		; CHECK-NEXT: for.body.bcmpdispatchbb:
; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 8		; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 8
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR]], i8* [[ADD_PTR]], i64 16)
; CHECK: for.body:		; CHECK-NEXT: [[PTR_VS_ADD_PTR_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[I_015:%.]] = phi i64 [ 0, [[ENTRY:%.]] ], [ [[INC:%.]], [[FOR_INC:%.]] ]		; CHECK-NEXT: br i1 [[PTR_VS_ADD_PTR_EQCMP]], label [[PTR_VS_ADD_PTR_EQCMP_EQUALBB:%.]], label [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[PTR1_014:%.]] = phi i8 [ [[ADD_PTR]], [[ENTRY]] ], [ [[INCDEC_PTR3:%.*]], [[FOR_INC]] ]		; CHECK: ptr.vs.add.ptr.eqcmp.equalbb:
; CHECK-NEXT: [[PTR0_013:%.]] = phi i8 [ [[PTR]], [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[FOR_INC]] ]		; CHECK-NEXT: br label [[CLEANUP:%.*]]
; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[PTR0_013]]		; CHECK: ptr.vs.add.ptr.eqcmp.unequalbb:
; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[PTR1_014]]		; CHECK-NEXT: br label [[CLEANUP]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8 [[V0]], [[V1]]
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_INC]], label [[CLEANUP:%.*]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_015]], 1
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[PTR0_013]], i64 1
; CHECK-NEXT: [[INCDEC_PTR3]] = getelementptr inbounds i8, i8* [[PTR1_014]], i64 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], 16
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP]]
; CHECK: cleanup:		; CHECK: cleanup:
; CHECK-NEXT: [[RES:%.*]] = phi i1 [ false, [[FOR_BODY]] ], [ true, [[FOR_INC]] ]		; CHECK-NEXT: [[RES:%.*]] = phi i1 [ false, [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB]] ], [ true, [[PTR_VS_ADD_PTR_EQCMP_EQUALBB]] ]
; CHECK-NEXT: ret i1 [[RES]]		; CHECK-NEXT: ret i1 [[RES]]
;		;
entry:		entry:
%add.ptr = getelementptr inbounds i8, i8* %ptr, i64 8		%add.ptr = getelementptr inbounds i8, i8* %ptr, i64 8
br label %for.body		br label %for.body

for.body: ; preds = %entry, %for.inc		for.body: ; preds = %entry, %for.inc
%i.015 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]		%i.015 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
Show All 13 Lines

cleanup: ; preds = %for.inc, %for.body		cleanup: ; preds = %for.inc, %for.body
%res = phi i1 [ false, %for.body ], [ true, %for.inc ]		%res = phi i1 [ false, %for.body ], [ true, %for.inc ]
ret i1 %res		ret i1 %res
}		}

define i1 @_Z48combined_iteration_eq_const_size_overlap_unknownPKcS0_(i8* %ptr0, i8* %ptr1) {		define i1 @_Z48combined_iteration_eq_const_size_overlap_unknownPKcS0_(i8* %ptr0, i8* %ptr1) {
; CHECK-LABEL: @_Z48combined_iteration_eq_const_size_overlap_unknownPKcS0_(		; CHECK-LABEL: @_Z48combined_iteration_eq_const_size_overlap_unknownPKcS0_(
; CHECK-NEXT: entry:		; CHECK-NEXT: for.body.bcmpdispatchbb:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR0:%.]], i8 [[PTR1:%.*]], i64 8)
; CHECK: for.body:		; CHECK-NEXT: [[PTR0_VS_PTR1_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[I_010:%.]] = phi i64 [ 0, [[ENTRY:%.]] ], [ [[INC:%.]], [[FOR_INC:%.]] ]		; CHECK-NEXT: br i1 [[PTR0_VS_PTR1_EQCMP]], label [[PTR0_VS_PTR1_EQCMP_EQUALBB:%.]], label [[PTR0_VS_PTR1_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[PTR1_ADDR_09:%.]] = phi i8 [ [[PTR1:%.]], [[ENTRY]] ], [ [[INCDEC_PTR3:%.]], [[FOR_INC]] ]		; CHECK: ptr0.vs.ptr1.eqcmp.equalbb:
; CHECK-NEXT: [[PTR0_ADDR_08:%.]] = phi i8 [ [[PTR0:%.]], [[ENTRY]] ], [ [[INCDEC_PTR:%.]], [[FOR_INC]] ]		; CHECK-NEXT: br label [[CLEANUP:%.*]]
; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[PTR0_ADDR_08]]		; CHECK: ptr0.vs.ptr1.eqcmp.unequalbb:
; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[PTR1_ADDR_09]]		; CHECK-NEXT: br label [[CLEANUP]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8 [[V0]], [[V1]]
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_INC]], label [[CLEANUP:%.*]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_010]], 1
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[PTR0_ADDR_08]], i64 1
; CHECK-NEXT: [[INCDEC_PTR3]] = getelementptr inbounds i8, i8* [[PTR1_ADDR_09]], i64 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], 8
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP]]
; CHECK: cleanup:		; CHECK: cleanup:
; CHECK-NEXT: [[RES:%.*]] = phi i1 [ false, [[FOR_BODY]] ], [ true, [[FOR_INC]] ]		; CHECK-NEXT: [[RES:%.*]] = phi i1 [ false, [[PTR0_VS_PTR1_EQCMP_UNEQUALBB]] ], [ true, [[PTR0_VS_PTR1_EQCMP_EQUALBB]] ]
; CHECK-NEXT: ret i1 [[RES]]		; CHECK-NEXT: ret i1 [[RES]]
;		;
entry:		entry:
br label %for.body		br label %for.body

for.body: ; preds = %entry, %for.inc		for.body: ; preds = %entry, %for.inc
%i.010 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]		%i.010 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
%ptr1.addr.09 = phi i8* [ %ptr1, %entry ], [ %incdec.ptr3, %for.inc ]		%ptr1.addr.09 = phi i8* [ %ptr1, %entry ], [ %incdec.ptr3, %for.inc ]
Show All 13 Lines
cleanup: ; preds = %for.inc, %for.body		cleanup: ; preds = %for.inc, %for.body
%res = phi i1 [ false, %for.body ], [ true, %for.inc ]		%res = phi i1 [ false, %for.body ], [ true, %for.inc ]
ret i1 %res		ret i1 %res
}		}

define i1 @_Z46combined_iteration_eq_variable_size_no_overlapPKcm(i8* %ptr, i64 %count) {		define i1 @_Z46combined_iteration_eq_variable_size_no_overlapPKcm(i8* %ptr, i64 %count) {
; CHECK-LABEL: @_Z46combined_iteration_eq_variable_size_no_overlapPKcm(		; CHECK-LABEL: @_Z46combined_iteration_eq_variable_size_no_overlapPKcm(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP14:%.]] = icmp eq i64 [[COUNT:%.]], 0		; CHECK-NEXT: [[CMP14:%.]] = icmp eq i64 [[COUNT_BYTECOUNT:%.]], 0
; CHECK-NEXT: br i1 [[CMP14]], label [[CLEANUP:%.]], label [[FOR_BODY_PREHEADER:%.]]		; CHECK-NEXT: br i1 [[CMP14]], label [[CLEANUP:%.]], label [[FOR_BODY_BCMPDISPATCHBB:%.]]
; CHECK: for.body.preheader:		; CHECK: for.body.bcmpdispatchbb:
; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 [[COUNT]]		; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 [[COUNT_BYTECOUNT]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR]], i8* [[ADD_PTR]], i64 [[COUNT_BYTECOUNT]])
; CHECK: for.body:		; CHECK-NEXT: [[PTR_VS_ADD_PTR_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[I_017:%.]] = phi i64 [ [[INC:%.]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]		; CHECK-NEXT: br i1 [[PTR_VS_ADD_PTR_EQCMP]], label [[PTR_VS_ADD_PTR_EQCMP_EQUALBB:%.]], label [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[PTR1_016:%.]] = phi i8 [ [[INCDEC_PTR3:%.*]], [[FOR_INC]] ], [ [[ADD_PTR]], [[FOR_BODY_PREHEADER]] ]		; CHECK: ptr.vs.add.ptr.eqcmp.equalbb:
; CHECK-NEXT: [[PTR0_015:%.]] = phi i8 [ [[INCDEC_PTR:%.*]], [[FOR_INC]] ], [ [[PTR]], [[FOR_BODY_PREHEADER]] ]		; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT:%.*]]
; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[PTR0_015]]		; CHECK: ptr.vs.add.ptr.eqcmp.unequalbb:
; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[PTR1_016]]		; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8 [[V0]], [[V1]]
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_INC]], label [[CLEANUP_LOOPEXIT:%.*]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nuw i64 [[I_017]], 1
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[PTR0_015]], i64 1
; CHECK-NEXT: [[INCDEC_PTR3]] = getelementptr inbounds i8, i8* [[PTR1_016]], i64 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], [[COUNT]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP_LOOPEXIT]]
; CHECK: cleanup.loopexit:		; CHECK: cleanup.loopexit:
; CHECK-NEXT: [[RES_PH:%.*]] = phi i1 [ false, [[FOR_BODY]] ], [ true, [[FOR_INC]] ]		; CHECK-NEXT: [[RES_PH:%.*]] = phi i1 [ false, [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB]] ], [ true, [[PTR_VS_ADD_PTR_EQCMP_EQUALBB]] ]
; CHECK-NEXT: br label [[CLEANUP]]		; CHECK-NEXT: br label [[CLEANUP]]
; CHECK: cleanup:		; CHECK: cleanup:
; CHECK-NEXT: [[RES:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RES_PH]], [[CLEANUP_LOOPEXIT]] ]		; CHECK-NEXT: [[RES:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RES_PH]], [[CLEANUP_LOOPEXIT]] ]
; CHECK-NEXT: ret i1 [[RES]]		; CHECK-NEXT: ret i1 [[RES]]
;		;
entry:		entry:
%cmp14 = icmp eq i64 %count, 0		%cmp14 = icmp eq i64 %count, 0
br i1 %cmp14, label %cleanup, label %for.body.preheader		br i1 %cmp14, label %cleanup, label %for.body.preheader
Show All 21 Lines
cleanup: ; preds = %for.body, %for.inc, %entry		cleanup: ; preds = %for.body, %for.inc, %entry
%res = phi i1 [ true, %entry ], [ true, %for.inc ], [ false, %for.body ]		%res = phi i1 [ true, %entry ], [ true, %for.inc ], [ false, %for.body ]
ret i1 %res		ret i1 %res
}		}

define i1 @_Z51combined_iteration_eq_variable_size_partial_overlapPKcm(i8* %ptr, i64 %count) {		define i1 @_Z51combined_iteration_eq_variable_size_partial_overlapPKcm(i8* %ptr, i64 %count) {
; CHECK-LABEL: @_Z51combined_iteration_eq_variable_size_partial_overlapPKcm(		; CHECK-LABEL: @_Z51combined_iteration_eq_variable_size_partial_overlapPKcm(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[MUL:%.]] = shl i64 [[COUNT:%.]], 1		; CHECK-NEXT: [[MUL_BYTECOUNT:%.]] = shl i64 [[COUNT:%.]], 1
; CHECK-NEXT: [[CMP14:%.*]] = icmp eq i64 [[MUL]], 0		; CHECK-NEXT: [[CMP14:%.*]] = icmp eq i64 [[MUL_BYTECOUNT]], 0
; CHECK-NEXT: br i1 [[CMP14]], label [[CLEANUP:%.]], label [[FOR_BODY_PREHEADER:%.]]		; CHECK-NEXT: br i1 [[CMP14]], label [[CLEANUP:%.]], label [[FOR_BODY_BCMPDISPATCHBB:%.]]
; CHECK: for.body.preheader:		; CHECK: for.body.bcmpdispatchbb:
; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 [[COUNT]]		; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.*]], i64 [[COUNT]]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR]], i8* [[ADD_PTR]], i64 [[MUL_BYTECOUNT]])
; CHECK: for.body:		; CHECK-NEXT: [[PTR_VS_ADD_PTR_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[I_017:%.]] = phi i64 [ [[INC:%.]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]		; CHECK-NEXT: br i1 [[PTR_VS_ADD_PTR_EQCMP]], label [[PTR_VS_ADD_PTR_EQCMP_EQUALBB:%.]], label [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[PTR1_016:%.]] = phi i8 [ [[INCDEC_PTR3:%.*]], [[FOR_INC]] ], [ [[ADD_PTR]], [[FOR_BODY_PREHEADER]] ]		; CHECK: ptr.vs.add.ptr.eqcmp.equalbb:
; CHECK-NEXT: [[PTR0_015:%.]] = phi i8 [ [[INCDEC_PTR:%.*]], [[FOR_INC]] ], [ [[PTR]], [[FOR_BODY_PREHEADER]] ]		; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT:%.*]]
; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[PTR0_015]]		; CHECK: ptr.vs.add.ptr.eqcmp.unequalbb:
; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[PTR1_016]]		; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8 [[V0]], [[V1]]
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_INC]], label [[CLEANUP_LOOPEXIT:%.*]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nuw i64 [[I_017]], 1
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[PTR0_015]], i64 1
; CHECK-NEXT: [[INCDEC_PTR3]] = getelementptr inbounds i8, i8* [[PTR1_016]], i64 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], [[MUL]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP_LOOPEXIT]]
; CHECK: cleanup.loopexit:		; CHECK: cleanup.loopexit:
; CHECK-NEXT: [[RES_PH:%.*]] = phi i1 [ false, [[FOR_BODY]] ], [ true, [[FOR_INC]] ]		; CHECK-NEXT: [[RES_PH:%.*]] = phi i1 [ false, [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB]] ], [ true, [[PTR_VS_ADD_PTR_EQCMP_EQUALBB]] ]
; CHECK-NEXT: br label [[CLEANUP]]		; CHECK-NEXT: br label [[CLEANUP]]
; CHECK: cleanup:		; CHECK: cleanup:
; CHECK-NEXT: [[RES:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RES_PH]], [[CLEANUP_LOOPEXIT]] ]		; CHECK-NEXT: [[RES:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RES_PH]], [[CLEANUP_LOOPEXIT]] ]
; CHECK-NEXT: ret i1 [[RES]]		; CHECK-NEXT: ret i1 [[RES]]
;		;
entry:		entry:
%mul = shl i64 %count, 1		%mul = shl i64 %count, 1
%cmp14 = icmp eq i64 %mul, 0		%cmp14 = icmp eq i64 %mul, 0
Show All 22 Lines
cleanup: ; preds = %for.body, %for.inc, %entry		cleanup: ; preds = %for.body, %for.inc, %entry
%res = phi i1 [ true, %entry ], [ true, %for.inc ], [ false, %for.body ]		%res = phi i1 [ true, %entry ], [ true, %for.inc ], [ false, %for.body ]
ret i1 %res		ret i1 %res
}		}

define i1 @_Z51combined_iteration_eq_variable_size_overlap_unknownPKcS0_m(i8* %ptr0, i8* %ptr1, i64 %count) {		define i1 @_Z51combined_iteration_eq_variable_size_overlap_unknownPKcS0_m(i8* %ptr0, i8* %ptr1, i64 %count) {
; CHECK-LABEL: @_Z51combined_iteration_eq_variable_size_overlap_unknownPKcS0_m(		; CHECK-LABEL: @_Z51combined_iteration_eq_variable_size_overlap_unknownPKcS0_m(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP8:%.]] = icmp eq i64 [[COUNT:%.]], 0		; CHECK-NEXT: [[CMP8:%.]] = icmp eq i64 [[COUNT_BYTECOUNT:%.]], 0
; CHECK-NEXT: br i1 [[CMP8]], label [[CLEANUP:%.]], label [[FOR_BODY_PREHEADER:%.]]		; CHECK-NEXT: br i1 [[CMP8]], label [[CLEANUP:%.]], label [[FOR_BODY_BCMPDISPATCHBB:%.]]
; CHECK: for.body.preheader:		; CHECK: for.body.bcmpdispatchbb:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR0:%.]], i8 [[PTR1:%.*]], i64 [[COUNT_BYTECOUNT]])
; CHECK: for.body:		; CHECK-NEXT: [[PTR0_VS_PTR1_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[I_011:%.]] = phi i64 [ [[INC:%.]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]		; CHECK-NEXT: br i1 [[PTR0_VS_PTR1_EQCMP]], label [[PTR0_VS_PTR1_EQCMP_EQUALBB:%.]], label [[PTR0_VS_PTR1_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[PTR1_ADDR_010:%.]] = phi i8 [ [[INCDEC_PTR3:%.]], [[FOR_INC]] ], [ [[PTR1:%.]], [[FOR_BODY_PREHEADER]] ]		; CHECK: ptr0.vs.ptr1.eqcmp.equalbb:
; CHECK-NEXT: [[PTR0_ADDR_09:%.]] = phi i8 [ [[INCDEC_PTR:%.]], [[FOR_INC]] ], [ [[PTR0:%.]], [[FOR_BODY_PREHEADER]] ]		; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT:%.*]]
; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[PTR0_ADDR_09]]		; CHECK: ptr0.vs.ptr1.eqcmp.unequalbb:
; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[PTR1_ADDR_010]]		; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT]]
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8 [[V0]], [[V1]]
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_INC]], label [[CLEANUP_LOOPEXIT:%.*]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nuw i64 [[I_011]], 1
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[PTR0_ADDR_09]], i64 1
; CHECK-NEXT: [[INCDEC_PTR3]] = getelementptr inbounds i8, i8* [[PTR1_ADDR_010]], i64 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], [[COUNT]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP_LOOPEXIT]]
; CHECK: cleanup.loopexit:		; CHECK: cleanup.loopexit:
; CHECK-NEXT: [[RES_PH:%.*]] = phi i1 [ false, [[FOR_BODY]] ], [ true, [[FOR_INC]] ]		; CHECK-NEXT: [[RES_PH:%.*]] = phi i1 [ false, [[PTR0_VS_PTR1_EQCMP_UNEQUALBB]] ], [ true, [[PTR0_VS_PTR1_EQCMP_EQUALBB]] ]
; CHECK-NEXT: br label [[CLEANUP]]		; CHECK-NEXT: br label [[CLEANUP]]
; CHECK: cleanup:		; CHECK: cleanup:
; CHECK-NEXT: [[RES:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RES_PH]], [[CLEANUP_LOOPEXIT]] ]		; CHECK-NEXT: [[RES:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RES_PH]], [[CLEANUP_LOOPEXIT]] ]
; CHECK-NEXT: ret i1 [[RES]]		; CHECK-NEXT: ret i1 [[RES]]
;		;
entry:		entry:
%cmp8 = icmp eq i64 %count, 0		%cmp8 = icmp eq i64 %count, 0
br i1 %cmp8, label %cleanup, label %for.body		br i1 %cmp8, label %cleanup, label %for.body
Show All 17 Lines
cleanup: ; preds = %for.body, %for.inc, %entry		cleanup: ; preds = %for.body, %for.inc, %entry
%res = phi i1 [ true, %entry ], [ true, %for.inc ], [ false, %for.body ]		%res = phi i1 [ true, %entry ], [ true, %for.inc ], [ false, %for.body ]
ret i1 %res		ret i1 %res
}		}

define i1 @_Z55negated_pointer_iteration_variable_size_overlap_unknownPKcS0_m(i8* %ptr0, i8* %ptr1, i64 %count) {		define i1 @_Z55negated_pointer_iteration_variable_size_overlap_unknownPKcS0_m(i8* %ptr0, i8* %ptr1, i64 %count) {
; CHECK-LABEL: @_Z55negated_pointer_iteration_variable_size_overlap_unknownPKcS0_m(		; CHECK-LABEL: @_Z55negated_pointer_iteration_variable_size_overlap_unknownPKcS0_m(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR0:%.]], i64 [[COUNT:%.]]		; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR0:%.]], i64 [[COUNT_BYTECOUNT:%.]]
; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[COUNT]], 0		; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[COUNT_BYTECOUNT]], 0
; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.]], label [[FOR_BODY_I_I_PREHEADER:%.]]		; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.]], label [[FOR_BODY_I_I_BCMPDISPATCHBB:%.]]
; CHECK: for.body.i.i.preheader:		; CHECK: for.body.i.i.bcmpdispatchbb:
; CHECK-NEXT: br label [[FOR_BODY_I_I:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR0]], i8* [[PTR1:%.*]], i64 [[COUNT_BYTECOUNT]])
; CHECK: for.body.i.i:		; CHECK-NEXT: [[PTR0_VS_PTR1_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[__FIRST2_ADDR_07_I_I:%.]] = phi i8 [ [[INCDEC_PTR1_I_I:%.]], [[FOR_INC_I_I:%.]] ], [ [[PTR1:%.*]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK-NEXT: br i1 [[PTR0_VS_PTR1_EQCMP]], label [[PTR0_VS_PTR1_EQCMP_EQUALBB:%.]], label [[PTR0_VS_PTR1_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I:%.]] = phi i8 [ [[INCDEC_PTR_I_I:%.*]], [[FOR_INC_I_I]] ], [ [[PTR0]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK: ptr0.vs.ptr1.eqcmp.equalbb:
; CHECK-NEXT: [[T0:%.]] = load i8, i8 [[__FIRST1_ADDR_06_I_I]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT:%.*]]
; CHECK-NEXT: [[T1:%.]] = load i8, i8 [[__FIRST2_ADDR_07_I_I]]		; CHECK: ptr0.vs.ptr1.eqcmp.unequalbb:
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp eq i8 [[T0]], [[T1]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]]
; CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_INC_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT:%.*]]
; CHECK: for.inc.i.i:
; CHECK-NEXT: [[INCDEC_PTR_I_I]] = getelementptr inbounds i8, i8* [[__FIRST1_ADDR_06_I_I]], i64 1
; CHECK-NEXT: [[INCDEC_PTR1_I_I]] = getelementptr inbounds i8, i8* [[__FIRST2_ADDR_07_I_I]], i64 1
; CHECK-NEXT: [[CMP_I_I:%.]] = icmp eq i8 [[INCDEC_PTR_I_I]], [[ADD_PTR]]
; CHECK-NEXT: br i1 [[CMP_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]], label [[FOR_BODY_I_I]]
; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit.loopexit:		; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit.loopexit:
; CHECK-NEXT: [[RETVAL_0_I_I_PH:%.*]] = phi i1 [ true, [[FOR_BODY_I_I]] ], [ false, [[FOR_INC_I_I]] ]		; CHECK-NEXT: [[RETVAL_0_I_I_PH:%.*]] = phi i1 [ true, [[PTR0_VS_PTR1_EQCMP_UNEQUALBB]] ], [ false, [[PTR0_VS_PTR1_EQCMP_EQUALBB]] ]
; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]]
; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:		; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:
; CHECK-NEXT: [[RETVAL_0_I_I:%.]] = phi i1 [ false, [[ENTRY:%.]] ], [ [[RETVAL_0_I_I_PH]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ]		; CHECK-NEXT: [[RETVAL_0_I_I:%.]] = phi i1 [ false, [[ENTRY:%.]] ], [ [[RETVAL_0_I_I_PH]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ]
; CHECK-NEXT: ret i1 [[RETVAL_0_I_I]]		; CHECK-NEXT: ret i1 [[RETVAL_0_I_I]]
;		;
entry:		entry:
%add.ptr = getelementptr inbounds i8, i8* %ptr0, i64 %count		%add.ptr = getelementptr inbounds i8, i8* %ptr0, i64 %count
%cmp5.i.i = icmp eq i64 %count, 0		%cmp5.i.i = icmp eq i64 %count, 0
Show All 18 Lines	_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit: ; preds = %for.body.i.i, %for.inc.i.i, %entry
ret i1 %retval.0.i.i		ret i1 %retval.0.i.i
}		}

define i1 @_Z55integer_pointer_iteration_variable_size_overlap_unknownPKiS0_m(i32* %ptr0, i32* %ptr1, i64 %count) {		define i1 @_Z55integer_pointer_iteration_variable_size_overlap_unknownPKiS0_m(i32* %ptr0, i32* %ptr1, i64 %count) {
; CHECK-LABEL: @_Z55integer_pointer_iteration_variable_size_overlap_unknownPKiS0_m(		; CHECK-LABEL: @_Z55integer_pointer_iteration_variable_size_overlap_unknownPKiS0_m(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i32, i32 [[PTR0:%.]], i64 [[COUNT:%.]]		; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i32, i32 [[PTR0:%.]], i64 [[COUNT:%.]]
; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[COUNT]], 0		; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[COUNT]], 0
; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[_ZNST3__15EQUALIPKIS2_EEBT_S3_T0__EXIT:%.]], label [[FOR_BODY_I_I_PREHEADER:%.]]		; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[_ZNST3__15EQUALIPKIS2_EEBT_S3_T0__EXIT:%.]], label [[FOR_BODY_I_I_BCMPDISPATCHBB:%.]]
; CHECK: for.body.i.i.preheader:		; CHECK: for.body.i.i.bcmpdispatchbb:
; CHECK-NEXT: br label [[FOR_BODY_I_I:%.*]]		; CHECK-NEXT: [[TMP0:%.*]] = shl nsw i64 [[COUNT]], 2
; CHECK: for.body.i.i:		; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], -4
; CHECK-NEXT: [[__FIRST2_ADDR_07_I_I:%.]] = phi i32 [ [[INCDEC_PTR1_I_I:%.]], [[FOR_INC_I_I:%.]] ], [ [[PTR1:%.*]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 2
; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I:%.]] = phi i32 [ [[INCDEC_PTR_I_I:%.*]], [[FOR_INC_I_I]] ], [ [[PTR0]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2
; CHECK-NEXT: [[T0:%.]] = load i32, i32 [[__FIRST1_ADDR_06_I_I]]		; CHECK-NEXT: [[DOTBYTECOUNT:%.*]] = add i64 [[TMP3]], 4
; CHECK-NEXT: [[T1:%.]] = load i32, i32 [[__FIRST2_ADDR_07_I_I]]		; CHECK-NEXT: [[CSTR:%.]] = bitcast i32 [[PTR0]] to i8*
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp eq i32 [[T0]], [[T1]]		; CHECK-NEXT: [[CSTR1:%.]] = bitcast i32 [[PTR1:%.]] to i8
; CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_INC_I_I]], label [[_ZNST3__15EQUALIPKIS2_EEBT_S3_T0__EXIT_LOOPEXIT:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[CSTR]], i8* [[CSTR1]], i64 [[DOTBYTECOUNT]])
; CHECK: for.inc.i.i:		; CHECK-NEXT: [[PTR0_VS_PTR1_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[INCDEC_PTR_I_I]] = getelementptr inbounds i32, i32* [[__FIRST1_ADDR_06_I_I]], i64 1		; CHECK-NEXT: br i1 [[PTR0_VS_PTR1_EQCMP]], label [[PTR0_VS_PTR1_EQCMP_EQUALBB:%.]], label [[PTR0_VS_PTR1_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[INCDEC_PTR1_I_I]] = getelementptr inbounds i32, i32* [[__FIRST2_ADDR_07_I_I]], i64 1		; CHECK: ptr0.vs.ptr1.eqcmp.equalbb:
; CHECK-NEXT: [[CMP_I_I:%.]] = icmp eq i32 [[INCDEC_PTR_I_I]], [[ADD_PTR]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKIS2_EEBT_S3_T0__EXIT_LOOPEXIT:%.*]]
; CHECK-NEXT: br i1 [[CMP_I_I]], label [[_ZNST3__15EQUALIPKIS2_EEBT_S3_T0__EXIT_LOOPEXIT]], label [[FOR_BODY_I_I]]		; CHECK: ptr0.vs.ptr1.eqcmp.unequalbb:
		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKIS2_EEBT_S3_T0__EXIT_LOOPEXIT]]
; CHECK: _ZNSt3__15equalIPKiS2_EEbT_S3_T0_.exit.loopexit:		; CHECK: _ZNSt3__15equalIPKiS2_EEbT_S3_T0_.exit.loopexit:
; CHECK-NEXT: [[RETVAL_0_I_I_PH:%.*]] = phi i1 [ false, [[FOR_BODY_I_I]] ], [ true, [[FOR_INC_I_I]] ]		; CHECK-NEXT: [[RETVAL_0_I_I_PH:%.*]] = phi i1 [ false, [[PTR0_VS_PTR1_EQCMP_UNEQUALBB]] ], [ true, [[PTR0_VS_PTR1_EQCMP_EQUALBB]] ]
; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKIS2_EEBT_S3_T0__EXIT]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKIS2_EEBT_S3_T0__EXIT]]
; CHECK: _ZNSt3__15equalIPKiS2_EEbT_S3_T0_.exit:		; CHECK: _ZNSt3__15equalIPKiS2_EEbT_S3_T0_.exit:
; CHECK-NEXT: [[RETVAL_0_I_I:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RETVAL_0_I_I_PH]], [[_ZNST3__15EQUALIPKIS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ]		; CHECK-NEXT: [[RETVAL_0_I_I:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RETVAL_0_I_I_PH]], [[_ZNST3__15EQUALIPKIS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ]
; CHECK-NEXT: ret i1 [[RETVAL_0_I_I]]		; CHECK-NEXT: ret i1 [[RETVAL_0_I_I]]
;		;
entry:		entry:
%add.ptr = getelementptr inbounds i32, i32* %ptr0, i64 %count		%add.ptr = getelementptr inbounds i32, i32* %ptr0, i64 %count
%cmp5.i.i = icmp eq i64 %count, 0		%cmp5.i.i = icmp eq i64 %count, 0
Show All 17 Lines	_ZNSt3__15equalIPKiS2_EEbT_S3_T0_.exit: ; preds = %for.body.i.i, %for.inc.i.i, %entry
%retval.0.i.i = phi i1 [ true, %entry ], [ true, %for.inc.i.i ], [ false, %for.body.i.i ]		%retval.0.i.i = phi i1 [ true, %entry ], [ true, %for.inc.i.i ], [ false, %for.body.i.i ]
ret i1 %retval.0.i.i		ret i1 %retval.0.i.i
}		}

define i1 @_Z21small_index_iterationPKcS0_i(i8* %ptr0, i8* %ptr1, i32 %count) {		define i1 @_Z21small_index_iterationPKcS0_i(i8* %ptr0, i8* %ptr1, i32 %count) {
; CHECK-LABEL: @_Z21small_index_iterationPKcS0_i(		; CHECK-LABEL: @_Z21small_index_iterationPKcS0_i(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP8:%.]] = icmp sgt i32 [[COUNT:%.]], 0		; CHECK-NEXT: [[CMP8:%.]] = icmp sgt i32 [[COUNT:%.]], 0
; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.]], label [[CLEANUP:%.]]		; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY_BCMPDISPATCHBB:%.]], label [[CLEANUP:%.]]
; CHECK: for.body.preheader:		; CHECK: for.body.bcmpdispatchbb:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: [[DOTBYTECOUNT:%.*]] = zext i32 [[COUNT]] to i64
; CHECK: for.body:		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR0:%.]], i8 [[PTR1:%.*]], i64 [[DOTBYTECOUNT]])
; CHECK-NEXT: [[I_011:%.]] = phi i32 [ [[INC:%.]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]		; CHECK-NEXT: [[PTR0_VS_PTR1_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[PTR1_ADDR_010:%.]] = phi i8 [ [[INCDEC_PTR3:%.]], [[FOR_INC]] ], [ [[PTR1:%.]], [[FOR_BODY_PREHEADER]] ]		; CHECK-NEXT: br i1 [[PTR0_VS_PTR1_EQCMP]], label [[PTR0_VS_PTR1_EQCMP_EQUALBB:%.]], label [[PTR0_VS_PTR1_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[PTR0_ADDR_09:%.]] = phi i8 [ [[INCDEC_PTR:%.]], [[FOR_INC]] ], [ [[PTR0:%.]], [[FOR_BODY_PREHEADER]] ]		; CHECK: ptr0.vs.ptr1.eqcmp.equalbb:
; CHECK-NEXT: [[T0:%.]] = load i8, i8 [[PTR0_ADDR_09]]		; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT:%.*]]
; CHECK-NEXT: [[T1:%.]] = load i8, i8 [[PTR1_ADDR_010]]		; CHECK: ptr0.vs.ptr1.eqcmp.unequalbb:
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8 [[T0]], [[T1]]		; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT]]
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_INC]], label [[CLEANUP_LOOPEXIT:%.*]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_011]], 1
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[PTR0_ADDR_09]], i64 1
; CHECK-NEXT: [[INCDEC_PTR3]] = getelementptr inbounds i8, i8* [[PTR1_ADDR_010]], i64 1
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[COUNT]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP_LOOPEXIT]]
; CHECK: cleanup.loopexit:		; CHECK: cleanup.loopexit:
; CHECK-NEXT: [[T2_PH:%.*]] = phi i1 [ false, [[FOR_BODY]] ], [ true, [[FOR_INC]] ]		; CHECK-NEXT: [[T2_PH:%.*]] = phi i1 [ false, [[PTR0_VS_PTR1_EQCMP_UNEQUALBB]] ], [ true, [[PTR0_VS_PTR1_EQCMP_EQUALBB]] ]
; CHECK-NEXT: br label [[CLEANUP]]		; CHECK-NEXT: br label [[CLEANUP]]
; CHECK: cleanup:		; CHECK: cleanup:
; CHECK-NEXT: [[T2:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[T2_PH]], [[CLEANUP_LOOPEXIT]] ]		; CHECK-NEXT: [[T2:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[T2_PH]], [[CLEANUP_LOOPEXIT]] ]
; CHECK-NEXT: ret i1 [[T2]]		; CHECK-NEXT: ret i1 [[T2]]
;		;
entry:		entry:
%cmp8 = icmp sgt i32 %count, 0		%cmp8 = icmp sgt i32 %count, 0
br i1 %cmp8, label %for.body, label %cleanup		br i1 %cmp8, label %for.body, label %cleanup
Show All 17 Lines
cleanup: ; preds = %for.body, %for.inc, %entry		cleanup: ; preds = %for.body, %for.inc, %entry
%t2 = phi i1 [ true, %entry ], [ true, %for.inc ], [ false, %for.body ]		%t2 = phi i1 [ true, %entry ], [ true, %for.inc ], [ false, %for.body ]
ret i1 %t2		ret i1 %t2
}		}

define i1 @_Z23three_pointer_iterationPKcS0_S0_(i8* %ptr0, i8* %ptr0_end, i8* %ptr1) {		define i1 @_Z23three_pointer_iterationPKcS0_S0_(i8* %ptr0, i8* %ptr0_end, i8* %ptr1) {
; CHECK-LABEL: @_Z23three_pointer_iterationPKcS0_S0_(		; CHECK-LABEL: @_Z23three_pointer_iterationPKcS0_S0_(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP5_I_I:%.]] = icmp eq i8 [[PTR0:%.]], [[PTR0_END:%.]]		; CHECK-NEXT: [[PTR01:%.]] = ptrtoint i8 [[PTR0:%.*]] to i64
; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.]], label [[FOR_BODY_I_I_PREHEADER:%.]]		; CHECK-NEXT: [[CMP5_I_I:%.]] = icmp eq i8 [[PTR0]], [[PTR0_END:%.*]]
; CHECK: for.body.i.i.preheader:		; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.]], label [[FOR_BODY_I_I_BCMPDISPATCHBB:%.]]
; CHECK-NEXT: br label [[FOR_BODY_I_I:%.*]]		; CHECK: for.body.i.i.bcmpdispatchbb:
; CHECK: for.body.i.i:		; CHECK-NEXT: [[TMP0:%.*]] = sub i64 0, [[PTR01]]
; CHECK-NEXT: [[__FIRST2_ADDR_07_I_I:%.]] = phi i8 [ [[INCDEC_PTR1_I_I:%.]], [[FOR_INC_I_I:%.]] ], [ [[PTR1:%.*]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK-NEXT: [[SCEVGEP:%.]] = getelementptr i8, i8 [[PTR0_END]], i64 [[TMP0]]
; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I:%.]] = phi i8 [ [[INCDEC_PTR_I_I:%.*]], [[FOR_INC_I_I]] ], [ [[PTR0]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK-NEXT: [[DOTBYTECOUNT:%.]] = ptrtoint i8 [[SCEVGEP]] to i64
; CHECK-NEXT: [[T0:%.]] = load i8, i8 [[__FIRST1_ADDR_06_I_I]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR0]], i8* [[PTR1:%.*]], i64 [[DOTBYTECOUNT]])
; CHECK-NEXT: [[T1:%.]] = load i8, i8 [[__FIRST2_ADDR_07_I_I]]		; CHECK-NEXT: [[PTR0_VS_PTR1_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp eq i8 [[T0]], [[T1]]		; CHECK-NEXT: br i1 [[PTR0_VS_PTR1_EQCMP]], label [[PTR0_VS_PTR1_EQCMP_EQUALBB:%.]], label [[PTR0_VS_PTR1_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_INC_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT:%.*]]		; CHECK: ptr0.vs.ptr1.eqcmp.equalbb:
; CHECK: for.inc.i.i:		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT:%.*]]
; CHECK-NEXT: [[INCDEC_PTR_I_I]] = getelementptr inbounds i8, i8* [[__FIRST1_ADDR_06_I_I]], i64 1		; CHECK: ptr0.vs.ptr1.eqcmp.unequalbb:
; CHECK-NEXT: [[INCDEC_PTR1_I_I]] = getelementptr inbounds i8, i8* [[__FIRST2_ADDR_07_I_I]], i64 1		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]]
; CHECK-NEXT: [[CMP_I_I:%.]] = icmp eq i8 [[INCDEC_PTR_I_I]], [[PTR0_END]]
; CHECK-NEXT: br i1 [[CMP_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]], label [[FOR_BODY_I_I]]
; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit.loopexit:		; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit.loopexit:
; CHECK-NEXT: [[RETVAL_0_I_I_PH:%.*]] = phi i1 [ false, [[FOR_BODY_I_I]] ], [ true, [[FOR_INC_I_I]] ]		; CHECK-NEXT: [[RETVAL_0_I_I_PH:%.*]] = phi i1 [ false, [[PTR0_VS_PTR1_EQCMP_UNEQUALBB]] ], [ true, [[PTR0_VS_PTR1_EQCMP_EQUALBB]] ]
; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]]
; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:		; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:
; CHECK-NEXT: [[RETVAL_0_I_I:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RETVAL_0_I_I_PH]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ]		; CHECK-NEXT: [[RETVAL_0_I_I:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RETVAL_0_I_I_PH]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ]
; CHECK-NEXT: ret i1 [[RETVAL_0_I_I]]		; CHECK-NEXT: ret i1 [[RETVAL_0_I_I]]
;		;
entry:		entry:
%cmp5.i.i = icmp eq i8* %ptr0, %ptr0_end		%cmp5.i.i = icmp eq i8* %ptr0, %ptr0_end
br i1 %cmp5.i.i, label %_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit, label %for.body.i.i		br i1 %cmp5.i.i, label %_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit, label %for.body.i.i
Show All 15 Lines
_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit: ; preds = %for.body.i.i, %for.inc.i.i, %entry		_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit: ; preds = %for.body.i.i, %for.inc.i.i, %entry
%retval.0.i.i = phi i1 [ true, %entry ], [ true, %for.inc.i.i ], [ false, %for.body.i.i ]		%retval.0.i.i = phi i1 [ true, %entry ], [ true, %for.inc.i.i ], [ false, %for.body.i.i ]
ret i1 %retval.0.i.i		ret i1 %retval.0.i.i
}		}

define i32 @_Z17value_propagationPKcS0_mii(i8* %ptr0, i8* %ptr1, i64 %count, i32 %on_equal, i32 %on_unequal) {		define i32 @_Z17value_propagationPKcS0_mii(i8* %ptr0, i8* %ptr1, i64 %count, i32 %on_equal, i32 %on_unequal) {
; CHECK-LABEL: @_Z17value_propagationPKcS0_mii(		; CHECK-LABEL: @_Z17value_propagationPKcS0_mii(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR0:%.]], i64 [[COUNT:%.]]		; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR0:%.]], i64 [[COUNT_BYTECOUNT:%.]]
; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[COUNT]], 0		; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[COUNT_BYTECOUNT]], 0
; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.]], label [[FOR_BODY_I_I_PREHEADER:%.]]		; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.]], label [[FOR_BODY_I_I_BCMPDISPATCHBB:%.]]
; CHECK: for.body.i.i.preheader:		; CHECK: for.body.i.i.bcmpdispatchbb:
; CHECK-NEXT: br label [[FOR_BODY_I_I:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR0]], i8* [[PTR1:%.*]], i64 [[COUNT_BYTECOUNT]])
; CHECK: for.body.i.i:		; CHECK-NEXT: [[PTR0_VS_PTR1_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[__FIRST2_ADDR_07_I_I:%.]] = phi i8 [ [[INCDEC_PTR1_I_I:%.]], [[FOR_INC_I_I:%.]] ], [ [[PTR1:%.*]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK-NEXT: br i1 [[PTR0_VS_PTR1_EQCMP]], label [[PTR0_VS_PTR1_EQCMP_EQUALBB:%.]], label [[PTR0_VS_PTR1_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I:%.]] = phi i8 [ [[INCDEC_PTR_I_I:%.*]], [[FOR_INC_I_I]] ], [ [[PTR0]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK: ptr0.vs.ptr1.eqcmp.equalbb:
; CHECK-NEXT: [[T0:%.]] = load i8, i8 [[__FIRST1_ADDR_06_I_I]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT:%.*]]
; CHECK-NEXT: [[T1:%.]] = load i8, i8 [[__FIRST2_ADDR_07_I_I]]		; CHECK: ptr0.vs.ptr1.eqcmp.unequalbb:
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp eq i8 [[T0]], [[T1]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]]
; CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_INC_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT:%.*]]
; CHECK: for.inc.i.i:
; CHECK-NEXT: [[INCDEC_PTR_I_I]] = getelementptr inbounds i8, i8* [[__FIRST1_ADDR_06_I_I]], i64 1
; CHECK-NEXT: [[INCDEC_PTR1_I_I]] = getelementptr inbounds i8, i8* [[__FIRST2_ADDR_07_I_I]], i64 1
; CHECK-NEXT: [[CMP_I_I:%.]] = icmp eq i8 [[INCDEC_PTR_I_I]], [[ADD_PTR]]
; CHECK-NEXT: br i1 [[CMP_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]], label [[FOR_BODY_I_I]]
; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit.loopexit:		; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit.loopexit:
; CHECK-NEXT: [[T2_PH:%.]] = phi i32 [ [[ON_UNEQUAL:%.]], [[FOR_BODY_I_I]] ], [ [[ON_EQUAL:%.*]], [[FOR_INC_I_I]] ]		; CHECK-NEXT: [[T2_PH:%.]] = phi i32 [ [[ON_UNEQUAL:%.]], [[PTR0_VS_PTR1_EQCMP_UNEQUALBB]] ], [ [[ON_EQUAL:%.*]], [[PTR0_VS_PTR1_EQCMP_EQUALBB]] ]
; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]]
; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:		; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:
; CHECK-NEXT: [[T2:%.]] = phi i32 [ [[ON_EQUAL]], [[ENTRY:%.]] ], [ [[T2_PH]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ]		; CHECK-NEXT: [[T2:%.]] = phi i32 [ [[ON_EQUAL]], [[ENTRY:%.]] ], [ [[T2_PH]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ]
; CHECK-NEXT: ret i32 [[T2]]		; CHECK-NEXT: ret i32 [[T2]]
;		;
entry:		entry:
%add.ptr = getelementptr inbounds i8, i8* %ptr0, i64 %count		%add.ptr = getelementptr inbounds i8, i8* %ptr0, i64 %count
%cmp5.i.i = icmp eq i64 %count, 0		%cmp5.i.i = icmp eq i64 %count, 0
Show All 16 Lines
_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit: ; preds = %for.inc.i.i, %for.body.i.i, %entry		_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit: ; preds = %for.inc.i.i, %for.body.i.i, %entry
%t2 = phi i32 [ %on_equal, %entry ], [ %on_equal, %for.inc.i.i ], [ %on_unequal, %for.body.i.i ]		%t2 = phi i32 [ %on_equal, %entry ], [ %on_equal, %for.inc.i.i ], [ %on_unequal, %for.body.i.i ]
ret i32 %t2		ret i32 %t2
}		}

define void @_Z20multiple_exit_blocksPKcS0_m(i8* %ptr0, i8* %ptr1, i64 %count) {		define void @_Z20multiple_exit_blocksPKcS0_m(i8* %ptr0, i8* %ptr1, i64 %count) {
; CHECK-LABEL: @_Z20multiple_exit_blocksPKcS0_m(		; CHECK-LABEL: @_Z20multiple_exit_blocksPKcS0_m(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR0:%.]], i64 [[COUNT:%.]]		; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR0:%.]], i64 [[COUNT_BYTECOUNT:%.]]
; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[COUNT]], 0		; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[COUNT_BYTECOUNT]], 0
; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[IF_END:%.]], label [[FOR_BODY_I_I_PREHEADER:%.]]		; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[IF_END:%.]], label [[FOR_BODY_I_I_BCMPDISPATCHBB:%.]]
; CHECK: for.body.i.i.preheader:		; CHECK: for.body.i.i.bcmpdispatchbb:
; CHECK-NEXT: br label [[FOR_BODY_I_I:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR0]], i8* [[PTR1:%.*]], i64 [[COUNT_BYTECOUNT]])
; CHECK: for.body.i.i:		; CHECK-NEXT: [[PTR0_VS_PTR1_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[__FIRST2_ADDR_07_I_I:%.]] = phi i8 [ [[INCDEC_PTR1_I_I:%.]], [[FOR_INC_I_I:%.]] ], [ [[PTR1:%.*]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK-NEXT: br i1 [[PTR0_VS_PTR1_EQCMP]], label [[PTR0_VS_PTR1_EQCMP_EQUALBB:%.]], label [[PTR0_VS_PTR1_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I:%.]] = phi i8 [ [[INCDEC_PTR_I_I:%.*]], [[FOR_INC_I_I]] ], [ [[PTR0]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK: ptr0.vs.ptr1.eqcmp.equalbb:
; CHECK-NEXT: [[T0:%.]] = load i8, i8 [[__FIRST1_ADDR_06_I_I]]		; CHECK-NEXT: br label [[IF_END_LOOPEXIT:%.*]]
; CHECK-NEXT: [[T1:%.]] = load i8, i8 [[__FIRST2_ADDR_07_I_I]]		; CHECK: ptr0.vs.ptr1.eqcmp.unequalbb:
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp eq i8 [[T0]], [[T1]]		; CHECK-NEXT: br label [[IF_THEN:%.*]]
; CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_INC_I_I]], label [[IF_THEN:%.*]]
; CHECK: for.inc.i.i:
; CHECK-NEXT: [[INCDEC_PTR_I_I]] = getelementptr inbounds i8, i8* [[__FIRST1_ADDR_06_I_I]], i64 1
; CHECK-NEXT: [[INCDEC_PTR1_I_I]] = getelementptr inbounds i8, i8* [[__FIRST2_ADDR_07_I_I]], i64 1
; CHECK-NEXT: [[CMP_I_I:%.]] = icmp eq i8 [[INCDEC_PTR_I_I]], [[ADD_PTR]]
; CHECK-NEXT: br i1 [[CMP_I_I]], label [[IF_END_LOOPEXIT:%.*]], label [[FOR_BODY_I_I]]
; CHECK: if.then:		; CHECK: if.then:
; CHECK-NEXT: tail call void @_Z17callee_on_unequalv()		; CHECK-NEXT: tail call void @_Z17callee_on_unequalv()
; CHECK-NEXT: br label [[RETURN:%.*]]		; CHECK-NEXT: br label [[RETURN:%.*]]
; CHECK: if.end.loopexit:		; CHECK: if.end.loopexit:
; CHECK-NEXT: br label [[IF_END]]		; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:		; CHECK: if.end:
; CHECK-NEXT: tail call void @_Z17callee_on_successv()		; CHECK-NEXT: tail call void @_Z17callee_on_successv()
; CHECK-NEXT: br label [[RETURN]]		; CHECK-NEXT: br label [[RETURN]]
Show All 31 Lines	return: ; preds = %if.end, %if.then
ret void		ret void
}		}
declare void @_Z17callee_on_unequalv()		declare void @_Z17callee_on_unequalv()
declare void @_Z17callee_on_successv()		declare void @_Z17callee_on_successv()

define void @_Z13multiple_phisPKcS0_mS0_S0_S0_S0_PS0_S1_(i8* %ptr0, i8* %ptr1, i64 %count, i8* %v0, i8* %v1, i8* %v2, i8* %v3, i8 %out0, i8 %out1) {		define void @_Z13multiple_phisPKcS0_mS0_S0_S0_S0_PS0_S1_(i8* %ptr0, i8* %ptr1, i64 %count, i8* %v0, i8* %v1, i8* %v2, i8* %v3, i8 %out0, i8 %out1) {
; CHECK-LABEL: @_Z13multiple_phisPKcS0_mS0_S0_S0_S0_PS0_S1_(		; CHECK-LABEL: @_Z13multiple_phisPKcS0_mS0_S0_S0_S0_PS0_S1_(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR0:%.]], i64 [[COUNT:%.]]		; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR0:%.]], i64 [[COUNT_BYTECOUNT:%.]]
; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[COUNT]], 0		; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[COUNT_BYTECOUNT]], 0
; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.]], label [[FOR_BODY_I_I_PREHEADER:%.]]		; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.]], label [[FOR_BODY_I_I_BCMPDISPATCHBB:%.]]
; CHECK: for.body.i.i.preheader:		; CHECK: for.body.i.i.bcmpdispatchbb:
; CHECK-NEXT: br label [[FOR_BODY_I_I:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR0]], i8* [[PTR1:%.*]], i64 [[COUNT_BYTECOUNT]])
; CHECK: for.body.i.i:		; CHECK-NEXT: [[PTR0_VS_PTR1_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[__FIRST2_ADDR_07_I_I:%.]] = phi i8 [ [[INCDEC_PTR1_I_I:%.]], [[FOR_INC_I_I:%.]] ], [ [[PTR1:%.*]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK-NEXT: br i1 [[PTR0_VS_PTR1_EQCMP]], label [[PTR0_VS_PTR1_EQCMP_EQUALBB:%.]], label [[PTR0_VS_PTR1_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I:%.]] = phi i8 [ [[INCDEC_PTR_I_I:%.*]], [[FOR_INC_I_I]] ], [ [[PTR0]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK: ptr0.vs.ptr1.eqcmp.equalbb:
; CHECK-NEXT: [[T0:%.]] = load i8, i8 [[__FIRST1_ADDR_06_I_I]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT:%.*]]
; CHECK-NEXT: [[T1:%.]] = load i8, i8 [[__FIRST2_ADDR_07_I_I]]		; CHECK: ptr0.vs.ptr1.eqcmp.unequalbb:
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp eq i8 [[T0]], [[T1]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]]
; CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_INC_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT:%.*]]
; CHECK: for.inc.i.i:
; CHECK-NEXT: [[INCDEC_PTR_I_I]] = getelementptr inbounds i8, i8* [[__FIRST1_ADDR_06_I_I]], i64 1
; CHECK-NEXT: [[INCDEC_PTR1_I_I]] = getelementptr inbounds i8, i8* [[__FIRST2_ADDR_07_I_I]], i64 1
; CHECK-NEXT: [[CMP_I_I:%.]] = icmp eq i8 [[INCDEC_PTR_I_I]], [[ADD_PTR]]
; CHECK-NEXT: br i1 [[CMP_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]], label [[FOR_BODY_I_I]]
; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit.loopexit:		; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit.loopexit:
; CHECK-NEXT: [[T2_PH:%.]] = phi i8 [ [[V2:%.]], [[FOR_BODY_I_I]] ], [ [[V0:%.]], [[FOR_INC_I_I]] ]		; CHECK-NEXT: [[T2_PH:%.]] = phi i8 [ [[V2:%.]], [[PTR0_VS_PTR1_EQCMP_UNEQUALBB]] ], [ [[V0:%.]], [[PTR0_VS_PTR1_EQCMP_EQUALBB]] ]
; CHECK-NEXT: [[T3_PH:%.]] = phi i8 [ [[V3:%.]], [[FOR_BODY_I_I]] ], [ [[V1:%.]], [[FOR_INC_I_I]] ]		; CHECK-NEXT: [[T3_PH:%.]] = phi i8 [ [[V3:%.]], [[PTR0_VS_PTR1_EQCMP_UNEQUALBB]] ], [ [[V1:%.]], [[PTR0_VS_PTR1_EQCMP_EQUALBB]] ]
; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]]
; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:		; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:
; CHECK-NEXT: [[T2:%.]] = phi i8 [ [[V0]], [[ENTRY:%.*]] ], [ [[T2_PH]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ]		; CHECK-NEXT: [[T2:%.]] = phi i8 [ [[V0]], [[ENTRY:%.*]] ], [ [[T2_PH]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ]
; CHECK-NEXT: [[T3:%.]] = phi i8 [ [[V1]], [[ENTRY]] ], [ [[T3_PH]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ]		; CHECK-NEXT: [[T3:%.]] = phi i8 [ [[V1]], [[ENTRY]] ], [ [[T3_PH]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ]
; CHECK-NEXT: store i8* [[T2]], i8** [[OUT0:%.*]]		; CHECK-NEXT: store i8* [[T2]], i8** [[OUT0:%.*]]
; CHECK-NEXT: store i8* [[T3]], i8** [[OUT1:%.*]]		; CHECK-NEXT: store i8* [[T3]], i8** [[OUT1:%.*]]
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
Show All 35 Lines
; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]		; CHECK-NEXT: br label [[FOR_COND_CLEANUP]]
; CHECK: for.cond.cleanup:		; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
; CHECK: for.body:		; CHECK: for.body:
; CHECK-NEXT: [[I_012:%.]] = phi i64 [ [[INC:%.]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]		; CHECK-NEXT: [[I_012:%.]] = phi i64 [ [[INC:%.]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i8, i8** [[PTR0:%.*]], i64 [[I_012]]		; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i8, i8** [[PTR0:%.*]], i64 [[I_012]]
; CHECK-NEXT: [[T0:%.]] = load i8, i8** [[ARRAYIDX]]		; CHECK-NEXT: [[T0:%.]] = load i8, i8** [[ARRAYIDX]]
; CHECK-NEXT: [[ARRAYIDX2:%.]] = getelementptr inbounds i64, i64 [[COUNT:%.*]], i64 [[I_012]]		; CHECK-NEXT: [[ARRAYIDX2:%.]] = getelementptr inbounds i64, i64 [[COUNT:%.*]], i64 [[I_012]]
; CHECK-NEXT: [[T1:%.]] = load i64, i64 [[ARRAYIDX2]]		; CHECK-NEXT: [[T1_BYTECOUNT:%.]] = load i64, i64 [[ARRAYIDX2]]
; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[T0]], i64 [[T1]]		; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[T0]], i64 [[T1_BYTECOUNT]]
; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[T1]], 0		; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[T1_BYTECOUNT]], 0
; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]], label [[FOR_BODY_I_I_PREHEADER:%.*]]		; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]], label [[FOR_BODY_I_I_PREHEADER:%.*]]
; CHECK: for.body.i.i.preheader:		; CHECK: for.body.i.i.preheader:
; CHECK-NEXT: [[ARRAYIDX3:%.]] = getelementptr inbounds i8, i8** [[PTR1:%.*]], i64 [[I_012]]		; CHECK-NEXT: [[ARRAYIDX3:%.]] = getelementptr inbounds i8, i8** [[PTR1:%.*]], i64 [[I_012]]
; CHECK-NEXT: [[T2:%.]] = load i8, i8** [[ARRAYIDX3]]		; CHECK-NEXT: [[T2:%.]] = load i8, i8** [[ARRAYIDX3]]
; CHECK-NEXT: br label [[FOR_BODY_I_I:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[T0]], i8* [[T2]], i64 [[T1_BYTECOUNT]])
; CHECK: for.body.i.i:		; CHECK-NEXT: [[T0_VS_T2_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[__FIRST2_ADDR_07_I_I:%.]] = phi i8 [ [[INCDEC_PTR1_I_I:%.]], [[FOR_INC_I_I:%.]] ], [ [[T2]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK-NEXT: br label [[FOR_BODY_I_I_BCMPDISPATCHBB:%.*]]
; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I:%.]] = phi i8 [ [[INCDEC_PTR_I_I:%.*]], [[FOR_INC_I_I]] ], [ [[T0]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK: for.body.i.i.bcmpdispatchbb:
; CHECK-NEXT: [[T3:%.]] = load i8, i8 [[__FIRST1_ADDR_06_I_I]]		; CHECK-NEXT: br i1 [[T0_VS_T2_EQCMP]], label [[T0_VS_T2_EQCMP_EQUALBB:%.]], label [[T0_VS_T2_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[T4:%.]] = load i8, i8 [[__FIRST2_ADDR_07_I_I]]		; CHECK: t0.vs.t2.eqcmp.equalbb:
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp eq i8 [[T3]], [[T4]]		; CHECK-NEXT: br i1 true, label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT:%.*]], label [[FOR_BODY_I_I_BCMPDISPATCHBB]]
; CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_INC_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT:%.*]]		; CHECK: t0.vs.t2.eqcmp.unequalbb:
; CHECK: for.inc.i.i:		; CHECK-NEXT: br i1 true, label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]], label [[FOR_BODY_I_I_BCMPDISPATCHBB]]
; CHECK-NEXT: [[INCDEC_PTR_I_I]] = getelementptr inbounds i8, i8* [[__FIRST1_ADDR_06_I_I]], i64 1
; CHECK-NEXT: [[INCDEC_PTR1_I_I]] = getelementptr inbounds i8, i8* [[__FIRST2_ADDR_07_I_I]], i64 1
; CHECK-NEXT: [[CMP_I_I:%.]] = icmp eq i8 [[INCDEC_PTR_I_I]], [[ADD_PTR]]
; CHECK-NEXT: br i1 [[CMP_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]], label [[FOR_BODY_I_I]]
; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit.loopexit:		; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit.loopexit:
; CHECK-NEXT: [[RETVAL_0_I_I_PH:%.*]] = phi i1 [ false, [[FOR_BODY_I_I]] ], [ true, [[FOR_INC_I_I]] ]		; CHECK-NEXT: [[RETVAL_0_I_I_PH:%.*]] = phi i1 [ false, [[T0_VS_T2_EQCMP_UNEQUALBB]] ], [ true, [[T0_VS_T2_EQCMP_EQUALBB]] ]
; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]]		; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]]
; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:		; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:
; CHECK-NEXT: [[RETVAL_0_I_I:%.*]] = phi i1 [ true, [[FOR_BODY]] ], [ [[RETVAL_0_I_I_PH]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ]		; CHECK-NEXT: [[RETVAL_0_I_I:%.*]] = phi i1 [ true, [[FOR_BODY]] ], [ [[RETVAL_0_I_I_PH]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ]
; CHECK-NEXT: tail call void @_Z4sinkb(i1 [[RETVAL_0_I_I]])		; CHECK-NEXT: tail call void @_Z4sinkb(i1 [[RETVAL_0_I_I]])
; CHECK-NEXT: [[INC]] = add nuw i64 [[I_012]], 1		; CHECK-NEXT: [[INC]] = add nuw i64 [[I_012]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[INC]], [[OUTER_COUNT]]		; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[INC]], [[OUTER_COUNT]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]		; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
;		;
▲ Show 20 Lines • Show All 49 Lines • ▼ Show 20 Lines
; CHECK-NEXT: br i1 [[CMP11]], label [[CLEANUP:%.]], label [[FOR_BODY_PREHEADER:%.]]		; CHECK-NEXT: br i1 [[CMP11]], label [[CLEANUP:%.]], label [[FOR_BODY_PREHEADER:%.]]
; CHECK: for.body.preheader:		; CHECK: for.body.preheader:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:		; CHECK: for.body:
; CHECK-NEXT: [[I_012:%.]] = phi i64 [ [[INC:%.]], [[IF_END:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]		; CHECK-NEXT: [[I_012:%.]] = phi i64 [ [[INC:%.]], [[IF_END:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i8, i8** [[PTR0:%.*]], i64 [[I_012]]		; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i8, i8** [[PTR0:%.*]], i64 [[I_012]]
; CHECK-NEXT: [[T0:%.]] = load i8, i8** [[ARRAYIDX]]		; CHECK-NEXT: [[T0:%.]] = load i8, i8** [[ARRAYIDX]]
; CHECK-NEXT: [[ARRAYIDX2:%.]] = getelementptr inbounds i64, i64 [[COUNT:%.*]], i64 [[I_012]]		; CHECK-NEXT: [[ARRAYIDX2:%.]] = getelementptr inbounds i64, i64 [[COUNT:%.*]], i64 [[I_012]]
; CHECK-NEXT: [[T1:%.]] = load i64, i64 [[ARRAYIDX2]]		; CHECK-NEXT: [[T1_BYTECOUNT:%.]] = load i64, i64 [[ARRAYIDX2]]
; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[T0]], i64 [[T1]]		; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[T0]], i64 [[T1_BYTECOUNT]]
; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[T1]], 0		; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[T1_BYTECOUNT]], 0
; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[IF_END]], label [[FOR_BODY_I_I_PREHEADER:%.*]]		; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[IF_END]], label [[FOR_BODY_I_I_PREHEADER:%.*]]
; CHECK: for.body.i.i.preheader:		; CHECK: for.body.i.i.preheader:
; CHECK-NEXT: [[ARRAYIDX3:%.]] = getelementptr inbounds i8, i8** [[PTR1:%.*]], i64 [[I_012]]		; CHECK-NEXT: [[ARRAYIDX3:%.]] = getelementptr inbounds i8, i8** [[PTR1:%.*]], i64 [[I_012]]
; CHECK-NEXT: [[T2:%.]] = load i8, i8** [[ARRAYIDX3]]		; CHECK-NEXT: [[T2:%.]] = load i8, i8** [[ARRAYIDX3]]
; CHECK-NEXT: br label [[FOR_BODY_I_I:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[T0]], i8* [[T2]], i64 [[T1_BYTECOUNT]])
; CHECK: for.body.i.i:		; CHECK-NEXT: [[T0_VS_T2_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[__FIRST2_ADDR_07_I_I:%.]] = phi i8 [ [[INCDEC_PTR1_I_I:%.]], [[FOR_INC_I_I:%.]] ], [ [[T2]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK-NEXT: br label [[FOR_BODY_I_I_BCMPDISPATCHBB:%.*]]
; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I:%.]] = phi i8 [ [[INCDEC_PTR_I_I:%.*]], [[FOR_INC_I_I]] ], [ [[T0]], [[FOR_BODY_I_I_PREHEADER]] ]		; CHECK: for.body.i.i.bcmpdispatchbb:
; CHECK-NEXT: [[T3:%.]] = load i8, i8 [[__FIRST1_ADDR_06_I_I]]		; CHECK-NEXT: br i1 [[T0_VS_T2_EQCMP]], label [[T0_VS_T2_EQCMP_EQUALBB:%.]], label [[T0_VS_T2_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[T4:%.]] = load i8, i8 [[__FIRST2_ADDR_07_I_I]]		; CHECK: t0.vs.t2.eqcmp.equalbb:
; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp eq i8 [[T3]], [[T4]]		; CHECK-NEXT: br i1 true, label [[IF_END_LOOPEXIT:%.*]], label [[FOR_BODY_I_I_BCMPDISPATCHBB]]
; CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_INC_I_I]], label [[IF_THEN:%.*]]		; CHECK: t0.vs.t2.eqcmp.unequalbb:
; CHECK: for.inc.i.i:		; CHECK-NEXT: br i1 true, label [[IF_THEN:%.*]], label [[FOR_BODY_I_I_BCMPDISPATCHBB]]
; CHECK-NEXT: [[INCDEC_PTR_I_I]] = getelementptr inbounds i8, i8* [[__FIRST1_ADDR_06_I_I]], i64 1
; CHECK-NEXT: [[INCDEC_PTR1_I_I]] = getelementptr inbounds i8, i8* [[__FIRST2_ADDR_07_I_I]], i64 1
; CHECK-NEXT: [[CMP_I_I:%.]] = icmp eq i8 [[INCDEC_PTR_I_I]], [[ADD_PTR]]
; CHECK-NEXT: br i1 [[CMP_I_I]], label [[IF_END_LOOPEXIT:%.*]], label [[FOR_BODY_I_I]]
; CHECK: if.then:		; CHECK: if.then:
; CHECK-NEXT: tail call void @_Z17callee_on_unequalv()		; CHECK-NEXT: tail call void @_Z17callee_on_unequalv()
; CHECK-NEXT: br label [[CLEANUP]]		; CHECK-NEXT: br label [[CLEANUP]]
; CHECK: if.end.loopexit:		; CHECK: if.end.loopexit:
; CHECK-NEXT: br label [[IF_END]]		; CHECK-NEXT: br label [[IF_END]]
; CHECK: if.end:		; CHECK: if.end:
; CHECK-NEXT: tail call void @_Z17callee_on_successv()		; CHECK-NEXT: tail call void @_Z17callee_on_successv()
; CHECK-NEXT: [[INC]] = add nuw i64 [[I_012]], 1		; CHECK-NEXT: [[INC]] = add nuw i64 [[I_012]], 1
▲ Show 20 Lines • Show All 53 Lines • ▼ Show 20 Lines

define void @_Z21endless_loop_if_equalPiS_(i32* %a, i32* %b) {		define void @_Z21endless_loop_if_equalPiS_(i32* %a, i32* %b) {
; CHECK-LABEL: @_Z21endless_loop_if_equalPiS_(		; CHECK-LABEL: @_Z21endless_loop_if_equalPiS_(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_COND:%.*]]		; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond.loopexit:		; CHECK: for.cond.loopexit:
; CHECK-NEXT: br label [[FOR_COND]]		; CHECK-NEXT: br label [[FOR_COND]]
; CHECK: for.cond:		; CHECK: for.cond:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: [[CSTR:%.]] = bitcast i32 [[A:%.]] to i8
; CHECK: for.cond1:		; CHECK-NEXT: [[CSTR1:%.]] = bitcast i32 [[B:%.]] to i8
; CHECK-NEXT: [[CMP:%.]] = icmp ult i64 [[INDVARS_IV_NEXT:%.]], 4		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[CSTR]], i8* [[CSTR1]], i64 16)
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_LOOPEXIT:%.*]]		; CHECK-NEXT: [[A_VS_B_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK: for.body:		; CHECK-NEXT: br label [[FOR_BODY_BCMPDISPATCHBB:%.*]]
; CHECK-NEXT: [[INDVARS_IV:%.]] = phi i64 [ 0, [[FOR_COND]] ], [ [[INDVARS_IV_NEXT]], [[FOR_COND1:%.]] ]		; CHECK: for.body.bcmpdispatchbb:
; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i32, i32 [[A:%.*]], i64 [[INDVARS_IV]]		; CHECK-NEXT: br i1 [[A_VS_B_EQCMP]], label [[A_VS_B_EQCMP_EQUALBB:%.]], label [[A_VS_B_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[TMP0:%.]] = load i32, i32 [[ARRAYIDX]]		; CHECK: a.vs.b.eqcmp.equalbb:
; CHECK-NEXT: [[ARRAYIDX3:%.]] = getelementptr inbounds i32, i32 [[B:%.*]], i64 [[INDVARS_IV]]		; CHECK-NEXT: br i1 true, label [[FOR_COND_LOOPEXIT:%.*]], label [[FOR_BODY_BCMPDISPATCHBB]]
; CHECK-NEXT: [[TMP1:%.]] = load i32, i32 [[ARRAYIDX3]]		; CHECK: a.vs.b.eqcmp.unequalbb:
; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]]		; CHECK-NEXT: br i1 true, label [[RETURN:%.*]], label [[FOR_BODY_BCMPDISPATCHBB]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_COND1]], label [[RETURN:%.*]]
; CHECK: return:		; CHECK: return:
; CHECK-NEXT: ret void		; CHECK-NEXT: ret void
;		;
entry:		entry:
br label %for.cond		br label %for.cond

for.cond: ; preds = %for.cond1, %entry		for.cond: ; preds = %for.cond1, %entry
br label %for.body		br label %for.body
Show All 15 Lines
return: ; preds = %for.body		return: ; preds = %for.body
ret void		ret void
}		}

define i1 @_Z21load_of_bitcastsPKcPKfm(i8* %ptr0, float* %ptr1, i64 %count) {		define i1 @_Z21load_of_bitcastsPKcPKfm(i8* %ptr0, float* %ptr1, i64 %count) {
; CHECK-LABEL: @_Z21load_of_bitcastsPKcPKfm(		; CHECK-LABEL: @_Z21load_of_bitcastsPKcPKfm(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP13:%.]] = icmp eq i64 [[COUNT:%.]], 0		; CHECK-NEXT: [[CMP13:%.]] = icmp eq i64 [[COUNT:%.]], 0
; CHECK-NEXT: br i1 [[CMP13]], label [[CLEANUP3:%.]], label [[FOR_BODY_PREHEADER:%.]]		; CHECK-NEXT: br i1 [[CMP13]], label [[CLEANUP3:%.]], label [[FOR_BODY_BCMPDISPATCHBB:%.]]
; CHECK: for.body.preheader:		; CHECK: for.body.bcmpdispatchbb:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: [[DOTBYTECOUNT:%.*]] = shl nuw i64 [[COUNT]], 2
; CHECK: for.body:		; CHECK-NEXT: [[CSTR:%.]] = bitcast float [[PTR1:%.]] to i8
; CHECK-NEXT: [[PTR0_ADDR_016:%.]] = phi i8 [ [[ADD_PTR:%.]], [[FOR_INC:%.]] ], [ [[PTR0:%.*]], [[FOR_BODY_PREHEADER]] ]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR0:%.]], i8 [[CSTR]], i64 [[DOTBYTECOUNT]])
; CHECK-NEXT: [[I_015:%.]] = phi i64 [ [[INC:%.]], [[FOR_INC]] ], [ 0, [[FOR_BODY_PREHEADER]] ]		; CHECK-NEXT: [[PTR0_VS_PTR1_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[PTR1_ADDR_014:%.]] = phi float [ [[INCDEC_PTR:%.]], [[FOR_INC]] ], [ [[PTR1:%.]], [[FOR_BODY_PREHEADER]] ]		; CHECK-NEXT: br i1 [[PTR0_VS_PTR1_EQCMP]], label [[PTR0_VS_PTR1_EQCMP_EQUALBB:%.]], label [[PTR0_VS_PTR1_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[V0_0__SROA_CAST:%.]] = bitcast i8 [[PTR0_ADDR_016]] to i32*		; CHECK: ptr0.vs.ptr1.eqcmp.equalbb:
; CHECK-NEXT: [[V0_0_COPYLOAD:%.]] = load i32, i32 [[V0_0__SROA_CAST]]		; CHECK-NEXT: br label [[CLEANUP3_LOOPEXIT:%.*]]
; CHECK-NEXT: [[V1_0__SROA_CAST:%.]] = bitcast float [[PTR1_ADDR_014]] to i32*		; CHECK: ptr0.vs.ptr1.eqcmp.unequalbb:
; CHECK-NEXT: [[V1_0_COPYLOAD:%.]] = load i32, i32 [[V1_0__SROA_CAST]]		; CHECK-NEXT: br label [[CLEANUP3_LOOPEXIT]]
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[V0_0_COPYLOAD]], [[V1_0_COPYLOAD]]
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_INC]], label [[CLEANUP3_LOOPEXIT:%.*]]
; CHECK: for.inc:
; CHECK-NEXT: [[INC]] = add nuw i64 [[I_015]], 1
; CHECK-NEXT: [[ADD_PTR]] = getelementptr inbounds i8, i8* [[PTR0_ADDR_016]], i64 4
; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds float, float* [[PTR1_ADDR_014]], i64 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], [[COUNT]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP3_LOOPEXIT]]
; CHECK: cleanup3.loopexit:		; CHECK: cleanup3.loopexit:
; CHECK-NEXT: [[RES_PH:%.*]] = phi i1 [ false, [[FOR_BODY]] ], [ true, [[FOR_INC]] ]		; CHECK-NEXT: [[RES_PH:%.*]] = phi i1 [ false, [[PTR0_VS_PTR1_EQCMP_UNEQUALBB]] ], [ true, [[PTR0_VS_PTR1_EQCMP_EQUALBB]] ]
; CHECK-NEXT: br label [[CLEANUP3]]		; CHECK-NEXT: br label [[CLEANUP3]]
; CHECK: cleanup3:		; CHECK: cleanup3:
; CHECK-NEXT: [[RES:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RES_PH]], [[CLEANUP3_LOOPEXIT]] ]		; CHECK-NEXT: [[RES:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RES_PH]], [[CLEANUP3_LOOPEXIT]] ]
; CHECK-NEXT: ret i1 [[RES]]		; CHECK-NEXT: ret i1 [[RES]]
;		;
entry:		entry:
%cmp13 = icmp eq i64 %count, 0		%cmp13 = icmp eq i64 %count, 0
br i1 %cmp13, label %cleanup3, label %for.body		br i1 %cmp13, label %cleanup3, label %for.body
▲ Show 20 Lines • Show All 77 Lines • ▼ Show 20 Lines
cleanup4: ; preds = %for.body, %for.inc, %entry		cleanup4: ; preds = %for.body, %for.inc, %entry
%res = phi i1 [ true, %entry ], [ true, %for.inc ], [ false, %for.body ]		%res = phi i1 [ true, %entry ], [ true, %for.inc ], [ false, %for.body ]
ret i1 %res		ret i1 %res
}		}

define i1 @exit_block_is_not_dedicated(i8* %ptr0, i8* %ptr1) {		define i1 @exit_block_is_not_dedicated(i8* %ptr0, i8* %ptr1) {
; CHECK-LABEL: @exit_block_is_not_dedicated(		; CHECK-LABEL: @exit_block_is_not_dedicated(
; CHECK-NEXT: entry:		; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 true, label [[FOR_BODY_PREHEADER:%.]], label [[CLEANUP:%.]]		; CHECK-NEXT: br i1 true, label [[FOR_BODY_BCMPDISPATCHBB:%.]], label [[CLEANUP:%.]]
; CHECK: for.body.preheader:		; CHECK: for.body.bcmpdispatchbb:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]		; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR0:%.]], i8 [[PTR1:%.*]], i64 8)
; CHECK: for.body:		; CHECK-NEXT: [[PTR0_VS_PTR1_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0
; CHECK-NEXT: [[I_08:%.]] = phi i64 [ [[INC:%.]], [[FOR_COND:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ]		; CHECK-NEXT: br i1 [[PTR0_VS_PTR1_EQCMP]], label [[PTR0_VS_PTR1_EQCMP_EQUALBB:%.]], label [[PTR0_VS_PTR1_EQCMP_UNEQUALBB:%.]]
; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i8, i8 [[PTR0:%.*]], i64 [[I_08]]		; CHECK: ptr0.vs.ptr1.eqcmp.equalbb:
; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[ARRAYIDX]]		; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT:%.*]]
; CHECK-NEXT: [[ARRAYIDX1:%.]] = getelementptr inbounds i8, i8 [[PTR1:%.*]], i64 [[I_08]]		; CHECK: ptr0.vs.ptr1.eqcmp.unequalbb:
; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[ARRAYIDX1]]		; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT]]
; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[V0]], [[V1]]
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_COND]], label [[CLEANUP_LOOPEXIT:%.*]]
; CHECK: for.cond:
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INC]], 8
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP_LOOPEXIT]]
; CHECK: cleanup.loopexit:		; CHECK: cleanup.loopexit:
; CHECK-NEXT: [[RES_PH:%.*]] = phi i1 [ true, [[FOR_COND]] ], [ false, [[FOR_BODY]] ]		; CHECK-NEXT: [[RES_PH:%.*]] = phi i1 [ true, [[PTR0_VS_PTR1_EQCMP_EQUALBB]] ], [ false, [[PTR0_VS_PTR1_EQCMP_UNEQUALBB]] ]
; CHECK-NEXT: br label [[CLEANUP]]		; CHECK-NEXT: br label [[CLEANUP]]
; CHECK: cleanup:		; CHECK: cleanup:
; CHECK-NEXT: [[RES:%.]] = phi i1 [ false, [[ENTRY:%.]] ], [ [[RES_PH]], [[CLEANUP_LOOPEXIT]] ]		; CHECK-NEXT: [[RES:%.]] = phi i1 [ false, [[ENTRY:%.]] ], [ [[RES_PH]], [[CLEANUP_LOOPEXIT]] ]
; CHECK-NEXT: ret i1 [[RES]]		; CHECK-NEXT: ret i1 [[RES]]
;		;
entry:		entry:
br i1 true, label %for.body, label %cleanup		br i1 true, label %for.body, label %cleanup

Show All 18 Lines

llvm/test/Transforms/LoopIdiom/bcmp-debugify-remarks.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -debugify -loop-idiom < %s -S 2>&1 \| FileCheck %s			; RUN: opt -debugify -loop-idiom -pass-remarks=loop-idiom -pass-remarks-analysis=loop-idiom -verify -verify-each -verify-dom-info -verify-loop-info < %s -S 2>&1 \| FileCheck %s

	target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"			target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"

	; Check that everything still works when debuginfo is present, and that it is reasonably propagated.			; Check that everything still works when debuginfo is present, and that it is reasonably propagated.

	; #include <algorithm>			; #include <algorithm>
	;			;
	; bool index_iteration_eq_variable_size_no_overlap(char const* ptr, size_t count) {			; bool index_iteration_eq_variable_size_no_overlap(char const* ptr, size_t count) {
	; char const* ptr0 = ptr;			; char const* ptr0 = ptr;
	; char const* ptr1 = ptr + count;			; char const* ptr1 = ptr + count;
	; for(size_t i = 0; i < count; i++) {			; for(size_t i = 0; i < count; i++) {
	; if(ptr0[i] != ptr1[i])			; if(ptr0[i] != ptr1[i])
	; return false;			; return false;
	; }			; }
	; return true;			; return true;
	; }			; }
	;			;
	; void sink(bool);			; void sink(bool);
	; void loop_within_loop(size_t outer_count, char const ptr0, char const ptr1, size_t* count) {			; void loop_within_loop(size_t outer_count, char const ptr0, char const ptr1, size_t* count) {
	; for(size_t i = 0; i != outer_count; ++i)			; for(size_t i = 0; i != outer_count; ++i)
	; sink(std::equal(ptr0[i], ptr0[i] + count[i], ptr1[i]));			; sink(std::equal(ptr0[i], ptr0[i] + count[i], ptr1[i]));
	; }			; }

				; CHECK: remark: <stdin>:13:1: Loop recognized as a bcmp idiom
				; CHECK: remark: <stdin>:11:1: Transformed bcmp idiom into a call to memcmp() function
				; CHECK: remark: <stdin>:29:1: Loop recognized as a bcmp idiom
				; CHECK: remark: <stdin>:34:1: Transformed bcmp idiom into a call to memcmp() function

	define i1 @_Z43index_iteration_eq_variable_size_no_overlapPKcm(i8* nocapture %ptr, i64 %count) {			define i1 @_Z43index_iteration_eq_variable_size_no_overlapPKcm(i8* nocapture %ptr, i64 %count) {
	; CHECK-LABEL: @_Z43index_iteration_eq_variable_size_no_overlapPKcm(			; CHECK-LABEL: @_Z43index_iteration_eq_variable_size_no_overlapPKcm(
	; CHECK-NEXT: entry:			; CHECK-NEXT: entry:
	; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.]], i64 [[COUNT:%.]], !dbg !22			; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[PTR:%.]], i64 [[COUNT_BYTECOUNT:%.]], !dbg !22
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[ADD_PTR]], metadata !9, metadata !DIExpression()), !dbg !22			; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[ADD_PTR]], metadata !9, metadata !DIExpression()), !dbg !22
	; CHECK-NEXT: [[CMP14:%.*]] = icmp eq i64 [[COUNT]], 0, !dbg !23			; CHECK-NEXT: [[CMP14:%.*]] = icmp eq i64 [[COUNT_BYTECOUNT]], 0, !dbg !23
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP14]], metadata !11, metadata !DIExpression()), !dbg !23			; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP14]], metadata !11, metadata !DIExpression()), !dbg !23
	; CHECK-NEXT: br i1 [[CMP14]], label [[CLEANUP:%.]], label [[FOR_BODY_PREHEADER:%.]], !dbg !24			; CHECK-NEXT: br i1 [[CMP14]], label [[CLEANUP:%.]], label [[FOR_BODY_BCMPDISPATCHBB:%.]], !dbg !24
	; CHECK: for.body.preheader:			; CHECK: for.body.bcmpdispatchbb:
	; CHECK-NEXT: br label [[FOR_BODY:%.*]], !dbg !25			; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[PTR]], i8* [[ADD_PTR]], i64 [[COUNT_BYTECOUNT]]), !dbg !25
	; CHECK: for.cond:			; CHECK-NEXT: [[PTR_VS_ADD_PTR_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0, !dbg !25
	; CHECK-NEXT: [[CMP:%.]] = icmp ult i64 [[INC:%.]], [[COUNT]], !dbg !26			; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata !14, metadata !DIExpression()), !dbg !26
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP]], metadata !13, metadata !DIExpression()), !dbg !26			; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata !15, metadata !DIExpression()), !dbg !27
	; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP_LOOPEXIT:%.*]], !dbg !27			; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata !16, metadata !DIExpression()), !dbg !28
	; CHECK: for.body:			; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata !17, metadata !DIExpression()), !dbg !29
	; CHECK-NEXT: [[I_015:%.]] = phi i64 [ [[INC]], [[FOR_COND:%.]] ], [ 0, [[FOR_BODY_PREHEADER]] ], !dbg !28			; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata !18, metadata !DIExpression()), !dbg !30
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[I_015]], metadata !14, metadata !DIExpression()), !dbg !28			; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata !19, metadata !DIExpression()), !dbg !25
	; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i8, i8 [[PTR]], i64 [[I_015]], !dbg !29			; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata !20, metadata !DIExpression()), !dbg !31
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[ARRAYIDX]], metadata !15, metadata !DIExpression()), !dbg !29			; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata !13, metadata !DIExpression()), !dbg !32
	; CHECK-NEXT: [[V0:%.]] = load i8, i8 [[ARRAYIDX]], !dbg !30			; CHECK-NEXT: br i1 [[PTR_VS_ADD_PTR_EQCMP]], label [[PTR_VS_ADD_PTR_EQCMP_EQUALBB:%.]], label [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB:%.]], !dbg !25
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[V0]], metadata !16, metadata !DIExpression()), !dbg !30			; CHECK: ptr.vs.add.ptr.eqcmp.equalbb:
	; CHECK-NEXT: [[ARRAYIDX1:%.]] = getelementptr inbounds i8, i8 [[ADD_PTR]], i64 [[I_015]], !dbg !31			; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT:%.*]], !dbg !33
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[ARRAYIDX1]], metadata !17, metadata !DIExpression()), !dbg !31			; CHECK: ptr.vs.add.ptr.eqcmp.unequalbb:
	; CHECK-NEXT: [[V1:%.]] = load i8, i8 [[ARRAYIDX1]], !dbg !32			; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT]], !dbg !34
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[V1]], metadata !18, metadata !DIExpression()), !dbg !32
	; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[V0]], [[V1]], !dbg !33
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP3]], metadata !19, metadata !DIExpression()), !dbg !33
	; CHECK-NEXT: [[INC]] = add nuw i64 [[I_015]], 1, !dbg !34
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[INC]], metadata !20, metadata !DIExpression()), !dbg !34
	; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_COND]], label [[CLEANUP_LOOPEXIT]], !dbg !25
	; CHECK: cleanup.loopexit:			; CHECK: cleanup.loopexit:
	; CHECK-NEXT: [[RES_PH:%.*]] = phi i1 [ false, [[FOR_BODY]] ], [ true, [[FOR_COND]] ]			; CHECK-NEXT: [[RES_PH:%.*]] = phi i1 [ false, [[PTR_VS_ADD_PTR_EQCMP_UNEQUALBB]] ], [ true, [[PTR_VS_ADD_PTR_EQCMP_EQUALBB]] ]
	; CHECK-NEXT: br label [[CLEANUP]], !dbg !35			; CHECK-NEXT: br label [[CLEANUP]], !dbg !35
	; CHECK: cleanup:			; CHECK: cleanup:
	; CHECK-NEXT: [[RES:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RES_PH]], [[CLEANUP_LOOPEXIT]] ], !dbg !36			; CHECK-NEXT: [[RES:%.]] = phi i1 [ true, [[ENTRY:%.]] ], [ [[RES_PH]], [[CLEANUP_LOOPEXIT]] ], !dbg !36
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[RES]], metadata !21, metadata !DIExpression()), !dbg !36			; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[RES]], metadata !21, metadata !DIExpression()), !dbg !36
	; CHECK-NEXT: ret i1 [[RES]], !dbg !35			; CHECK-NEXT: ret i1 [[RES]], !dbg !35
	;			;
	entry:			entry:
	%add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %count			%add.ptr = getelementptr inbounds i8, i8* %ptr, i64 %count
	Show All 35 Lines
	; CHECK-NEXT: [[I_012:%.]] = phi i64 [ [[INC:%.]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ], !dbg !64			; CHECK-NEXT: [[I_012:%.]] = phi i64 [ [[INC:%.]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ], !dbg !64
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[I_012]], metadata !40, metadata !DIExpression()), !dbg !64			; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[I_012]], metadata !40, metadata !DIExpression()), !dbg !64
	; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i8, i8** [[PTR0:%.*]], i64 [[I_012]], !dbg !65			; CHECK-NEXT: [[ARRAYIDX:%.]] = getelementptr inbounds i8, i8** [[PTR0:%.*]], i64 [[I_012]], !dbg !65
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i8** [[ARRAYIDX]], metadata !41, metadata !DIExpression()), !dbg !65			; CHECK-NEXT: call void @llvm.dbg.value(metadata i8** [[ARRAYIDX]], metadata !41, metadata !DIExpression()), !dbg !65
	; CHECK-NEXT: [[T0:%.]] = load i8, i8** [[ARRAYIDX]], !dbg !66			; CHECK-NEXT: [[T0:%.]] = load i8, i8** [[ARRAYIDX]], !dbg !66
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[T0]], metadata !42, metadata !DIExpression()), !dbg !66			; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[T0]], metadata !42, metadata !DIExpression()), !dbg !66
	; CHECK-NEXT: [[ARRAYIDX2:%.]] = getelementptr inbounds i64, i64 [[COUNT:%.*]], i64 [[I_012]], !dbg !67			; CHECK-NEXT: [[ARRAYIDX2:%.]] = getelementptr inbounds i64, i64 [[COUNT:%.*]], i64 [[I_012]], !dbg !67
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i64* [[ARRAYIDX2]], metadata !43, metadata !DIExpression()), !dbg !67			; CHECK-NEXT: call void @llvm.dbg.value(metadata i64* [[ARRAYIDX2]], metadata !43, metadata !DIExpression()), !dbg !67
	; CHECK-NEXT: [[T1:%.]] = load i64, i64 [[ARRAYIDX2]], !dbg !68			; CHECK-NEXT: [[T1_BYTECOUNT:%.]] = load i64, i64 [[ARRAYIDX2]], !dbg !68
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[T1]], metadata !44, metadata !DIExpression()), !dbg !68			; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[T1_BYTECOUNT]], metadata !44, metadata !DIExpression()), !dbg !68
	; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[T0]], i64 [[T1]], !dbg !69			; CHECK-NEXT: [[ADD_PTR:%.]] = getelementptr inbounds i8, i8 [[T0]], i64 [[T1_BYTECOUNT]], !dbg !69
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[ADD_PTR]], metadata !45, metadata !DIExpression()), !dbg !69			; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[ADD_PTR]], metadata !45, metadata !DIExpression()), !dbg !69
	; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[T1]], 0, !dbg !70			; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i64 [[T1_BYTECOUNT]], 0, !dbg !70
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP5_I_I]], metadata !46, metadata !DIExpression()), !dbg !70			; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP5_I_I]], metadata !46, metadata !DIExpression()), !dbg !70
	; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]], label [[FOR_BODY_I_I_PREHEADER:%.*]], !dbg !62			; CHECK-NEXT: br i1 [[CMP5_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]], label [[FOR_BODY_I_I_PREHEADER:%.*]], !dbg !62
	; CHECK: for.body.i.i.preheader:			; CHECK: for.body.i.i.preheader:
	; CHECK-NEXT: [[ARRAYIDX3:%.]] = getelementptr inbounds i8, i8** [[PTR1:%.*]], i64 [[I_012]], !dbg !71			; CHECK-NEXT: [[ARRAYIDX3:%.]] = getelementptr inbounds i8, i8** [[PTR1:%.*]], i64 [[I_012]], !dbg !71
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i8** [[ARRAYIDX3]], metadata !47, metadata !DIExpression()), !dbg !71			; CHECK-NEXT: call void @llvm.dbg.value(metadata i8** [[ARRAYIDX3]], metadata !47, metadata !DIExpression()), !dbg !71
	; CHECK-NEXT: [[T2:%.]] = load i8, i8** [[ARRAYIDX3]], !dbg !72			; CHECK-NEXT: [[T2:%.]] = load i8, i8** [[ARRAYIDX3]], !dbg !72
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[T2]], metadata !48, metadata !DIExpression()), !dbg !72			; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[T2]], metadata !48, metadata !DIExpression()), !dbg !72
	; CHECK-NEXT: br label [[FOR_BODY_I_I:%.*]], !dbg !73			; CHECK-NEXT: [[MEMCMP:%.]] = call i32 @memcmp(i8 [[T0]], i8* [[T2]], i64 [[T1_BYTECOUNT]]), !dbg !73
	; CHECK: for.body.i.i:			; CHECK-NEXT: [[T0_VS_T2_EQCMP:%.*]] = icmp eq i32 [[MEMCMP]], 0, !dbg !73
	; CHECK-NEXT: [[__FIRST2_ADDR_07_I_I:%.]] = phi i8 [ [[INCDEC_PTR1_I_I:%.]], [[FOR_INC_I_I:%.]] ], [ [[T2]], [[FOR_BODY_I_I_PREHEADER]] ], !dbg !74			; CHECK-NEXT: br label [[FOR_BODY_I_I_BCMPDISPATCHBB:%.*]]
	; CHECK-NEXT: [[__FIRST1_ADDR_06_I_I:%.]] = phi i8 [ [[INCDEC_PTR_I_I:%.*]], [[FOR_INC_I_I]] ], [ [[T0]], [[FOR_BODY_I_I_PREHEADER]] ], !dbg !75			; CHECK: for.body.i.i.bcmpdispatchbb:
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[__FIRST2_ADDR_07_I_I]], metadata !49, metadata !DIExpression()), !dbg !74			; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata !49, metadata !DIExpression()), !dbg !74
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[__FIRST1_ADDR_06_I_I]], metadata !50, metadata !DIExpression()), !dbg !75			; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata !50, metadata !DIExpression()), !dbg !75
	; CHECK-NEXT: [[T3:%.]] = load i8, i8 [[__FIRST1_ADDR_06_I_I]], !dbg !76			; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata !51, metadata !DIExpression()), !dbg !76
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[T3]], metadata !51, metadata !DIExpression()), !dbg !76			; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata !52, metadata !DIExpression()), !dbg !77
	; CHECK-NEXT: [[T4:%.]] = load i8, i8 [[__FIRST2_ADDR_07_I_I]], !dbg !77			; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata !53, metadata !DIExpression()), !dbg !73
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[T4]], metadata !52, metadata !DIExpression()), !dbg !77			; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata !54, metadata !DIExpression()), !dbg !78
	; CHECK-NEXT: [[CMP_I_I_I:%.*]] = icmp eq i8 [[T3]], [[T4]], !dbg !78			; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata !55, metadata !DIExpression()), !dbg !79
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP_I_I_I]], metadata !53, metadata !DIExpression()), !dbg !78			; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata !56, metadata !DIExpression()), !dbg !80
	; CHECK-NEXT: br i1 [[CMP_I_I_I]], label [[FOR_INC_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT:%.*]], !dbg !79			; CHECK-NEXT: br i1 [[T0_VS_T2_EQCMP]], label [[T0_VS_T2_EQCMP_EQUALBB:%.]], label [[T0_VS_T2_EQCMP_UNEQUALBB:%.]], !dbg !73
	; CHECK: for.inc.i.i:			; CHECK: t0.vs.t2.eqcmp.equalbb:
	; CHECK-NEXT: [[INCDEC_PTR_I_I]] = getelementptr inbounds i8, i8* [[__FIRST1_ADDR_06_I_I]], i64 1, !dbg !80			; CHECK-NEXT: br i1 true, label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT:%.*]], label [[FOR_BODY_I_I_BCMPDISPATCHBB]], !dbg !81
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[INCDEC_PTR_I_I]], metadata !54, metadata !DIExpression()), !dbg !80			; CHECK: t0.vs.t2.eqcmp.unequalbb:
	; CHECK-NEXT: [[INCDEC_PTR1_I_I]] = getelementptr inbounds i8, i8* [[__FIRST2_ADDR_07_I_I]], i64 1, !dbg !81			; CHECK-NEXT: br i1 true, label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]], label [[FOR_BODY_I_I_BCMPDISPATCHBB]], !dbg !82
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i8* [[INCDEC_PTR1_I_I]], metadata !55, metadata !DIExpression()), !dbg !81
	; CHECK-NEXT: [[CMP_I_I:%.]] = icmp eq i8 [[INCDEC_PTR_I_I]], [[ADD_PTR]], !dbg !82
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP_I_I]], metadata !56, metadata !DIExpression()), !dbg !82
	; CHECK-NEXT: br i1 [[CMP_I_I]], label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]], label [[FOR_BODY_I_I]], !dbg !83
	; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit.loopexit:			; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit.loopexit:
	; CHECK-NEXT: [[RETVAL_0_I_I_PH:%.*]] = phi i1 [ false, [[FOR_BODY_I_I]] ], [ true, [[FOR_INC_I_I]] ]			; CHECK-NEXT: [[RETVAL_0_I_I_PH:%.*]] = phi i1 [ false, [[T0_VS_T2_EQCMP_UNEQUALBB]] ], [ true, [[T0_VS_T2_EQCMP_EQUALBB]] ]
	; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]], !dbg !84			; CHECK-NEXT: br label [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT]], !dbg !83
	; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:			; CHECK: _ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit:
	; CHECK-NEXT: [[RETVAL_0_I_I:%.*]] = phi i1 [ true, [[FOR_BODY]] ], [ [[RETVAL_0_I_I_PH]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ], !dbg !85			; CHECK-NEXT: [[RETVAL_0_I_I:%.*]] = phi i1 [ true, [[FOR_BODY]] ], [ [[RETVAL_0_I_I_PH]], [[_ZNST3__15EQUALIPKCS2_EEBT_S3_T0__EXIT_LOOPEXIT]] ], !dbg !84
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[RETVAL_0_I_I]], metadata !57, metadata !DIExpression()), !dbg !85			; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[RETVAL_0_I_I]], metadata !57, metadata !DIExpression()), !dbg !84
	; CHECK-NEXT: tail call void @_Z4sinkb(i1 [[RETVAL_0_I_I]]), !dbg !84			; CHECK-NEXT: tail call void @_Z4sinkb(i1 [[RETVAL_0_I_I]]), !dbg !83
	; CHECK-NEXT: [[INC]] = add nuw i64 [[I_012]], 1, !dbg !86			; CHECK-NEXT: [[INC]] = add nuw i64 [[I_012]], 1, !dbg !85
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[INC]], metadata !58, metadata !DIExpression()), !dbg !86			; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[INC]], metadata !58, metadata !DIExpression()), !dbg !85
	; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[INC]], [[OUTER_COUNT]], !dbg !87			; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[INC]], [[OUTER_COUNT]], !dbg !86
	; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP]], metadata !59, metadata !DIExpression()), !dbg !87			; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP]], metadata !59, metadata !DIExpression()), !dbg !86
	; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]], !dbg !88			; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]], !dbg !87
	;			;
	entry:			entry:
	%cmp11 = icmp eq i64 %outer_count, 0			%cmp11 = icmp eq i64 %outer_count, 0
	br i1 %cmp11, label %for.cond.cleanup, label %for.body			br i1 %cmp11, label %for.cond.cleanup, label %for.body

	for.cond.cleanup: ; preds = %_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit, %entry			for.cond.cleanup: ; preds = %_ZNSt3__15equalIPKcS2_EEbT_S3_T0_.exit, %entry
	ret void			ret void

	Show All 37 Lines

llvm/test/Transforms/LoopIdiom/bcmp-widening.ll

	; NOTE: Assertions have been autogenerated by utils/update_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
	; RUN: opt -loop-idiom < %s -S \| FileCheck %s			; RUN: opt -loop-idiom -verify -verify-each -verify-dom-info -verify-loop-info < %s -S \| FileCheck %s

	target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"			target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"

	; We do not necessarily have a loop with comparison of two loaded values.			; We do not necessarily have a loop with comparison of two loaded values.
	; The loop may already be doing bcmp() itself. We just need to widen it.			; The loop may already be doing bcmp() itself. We just need to widen it.

	; FIXME: -memcpyopt does not promote memcmp() source into memcmp() / bcmp().			; FIXME: -memcpyopt does not promote memcmp() source into memcmp() / bcmp().

	▲ Show 20 Lines • Show All 127 Lines • Show Last 20 Lines