This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
lib/Target/X86/
-
Target/
-
X86/
2/4
X86ISelLowering.cpp
-
test/CodeGen/X86/
-
CodeGen/
-
X86/
-
8bit_cmov_of_trunc_promotion.ll
3/3
cmov-promotion.ll
-
cmov.ll
-
cmovcmov.ll
1
copy-eflags.ll
-
fast-isel-select-pseudo-cmov.ll
-
fshl.ll
-
fshr.ll
-
i386-shrink-wrapping.ll
-
midpoint-int.ll
-
pr5145.ll
-
sadd_sat.ll
-
sadd_sat_vec.ll
-
sat-add.ll
-
select.ll
-
select_const.ll
-
ssub_sat.ll
-
ssub_sat_vec.ll
-
uadd_sat.ll
-
uadd_sat_vec.ll
-
usub_sat.ll
-
usub_sat_vec.ll
-
x86-shrink-wrapping.ll

Differential D59035

[X86] Promote i8 CMOV's (PR40965)
ClosedPublic

Authored by lebedev.ri on Mar 6 2019, 11:36 AM.

Download Raw Diff

Details

Reviewers

craig.topper
RKSimon
spatel
andreadb
nikic
mclow.lists

Commits

rGb6e376ddfa17: [X86] Promote i8 CMOV's (PR40965)
rL356300: [X86] Promote i8 CMOV's (PR40965)

Summary

@mclow.lists brought up this issue up in IRC, it came up during
implementation of libc++ std::midpoint() implementation (D59099)
https://godbolt.org/z/oLrHBP

Currently LLVM X86 backend only promotes i8 CMOV if it came from 2x`trunc`.
This differential proposes to always promote i8 CMOV.

There are several concerns here:

Is this actually more performant, or is it just the ASM that looks cuter?
Does this result in partial register stalls?
What about branch predictor?

Indeed, performance should be the main point here.

Let's look at a simple microbenchmark:

llvm-cmov-bench.cc4 KBDownload

#include "benchmark/benchmark.h"

#include <algorithm>
#include <cmath>
#include <cstdint>
#include <iterator>
#include <limits>
#include <random>
#include <type_traits>
#include <utility>
#include <vector>

// Future preliminary libc++ code, from Marshall Clow.
namespace std {
template <class _Tp>
__inline _Tp midpoint(_Tp __a, _Tp __b) noexcept {
  using _Up = typename std::make_unsigned<typename remove_cv<_Tp>::type>::type;

  int __sign = 1;
  _Up __m = __a;
  _Up __M = __b;
  if (__a > __b) {
    __sign = -1;
    __m = __b;
    __M = __a;
  }
  return __a + __sign * _Tp(_Up(__M - __m) >> 1);
}
}  // namespace std

template <typename T>
std::vector<T> getVectorOfRandomNumbers(size_t count) {
  std::random_device rd;
  std::mt19937 gen(rd());
  std::uniform_int_distribution<T> dis(std::numeric_limits<T>::min(),
                                       std::numeric_limits<T>::max());
  std::vector<T> v;
  v.reserve(count);
  std::generate_n(std::back_inserter(v), count,
                  [&dis, &gen]() { return dis(gen); });
  assert(v.size() == count);
  return v;
}

struct RandRand {
  template <typename T>
  static std::pair<std::vector<T>, std::vector<T>> Gen(size_t count) {
    return std::make_pair(getVectorOfRandomNumbers<T>(count),
                          getVectorOfRandomNumbers<T>(count));
  }
};
struct ZeroRand {
  template <typename T>
  static std::pair<std::vector<T>, std::vector<T>> Gen(size_t count) {
    return std::make_pair(std::vector<T>(count, T(0)),
                          getVectorOfRandomNumbers<T>(count));
  }
};

template <class T, class Gen>
void BM_StdMidpoint(benchmark::State& state) {
  const size_t Length = state.range(0);

  const std::pair<std::vector<T>, std::vector<T>> Data =
      Gen::template Gen<T>(Length);
  const std::vector<T>& a = Data.first;
  const std::vector<T>& b = Data.second;
  assert(a.size() == Length && b.size() == a.size());

  benchmark::ClobberMemory();
  benchmark::DoNotOptimize(a);
  benchmark::DoNotOptimize(a.data());
  benchmark::DoNotOptimize(b);
  benchmark::DoNotOptimize(b.data());

  for (auto _ : state) {
    for (size_t i = 0; i < Length; i++) {
      const auto calculated = std::midpoint(a[i], b[i]);
      benchmark::DoNotOptimize(calculated);
    }
  }
  state.SetComplexityN(Length);
  state.counters["midpoints"] =
      benchmark::Counter(Length, benchmark::Counter::kIsIterationInvariant);
  state.counters["midpoints/sec"] =
      benchmark::Counter(Length, benchmark::Counter::kIsIterationInvariantRate);
  const size_t BytesRead = 2 * sizeof(T) * Length;
  state.counters["bytes_read/iteration"] =
      benchmark::Counter(BytesRead, benchmark::Counter::kDefaults,
                         benchmark::Counter::OneK::kIs1024);
  state.counters["bytes_read/sec"] = benchmark::Counter(
      BytesRead, benchmark::Counter::kIsIterationInvariantRate,
      benchmark::Counter::OneK::kIs1024);
}

template <typename T>
static void CustomArguments(benchmark::internal::Benchmark* b) {
  const size_t L2SizeBytes = 2 * 1024 * 1024;
  // What is the largest range we can check to always fit within given L2 cache?
  const size_t MaxLen = L2SizeBytes / /*total bufs*/ 2 /
                        /*maximal elt size*/ sizeof(T) / /*safety margin*/ 2;
  b->RangeMultiplier(2)->Range(1, MaxLen)->Complexity(benchmark::oN);
}

// Both of the values are random.
// The comparison is unpredictable.
BENCHMARK_TEMPLATE(BM_StdMidpoint, int32_t, RandRand)
    ->Apply(CustomArguments<int32_t>);
BENCHMARK_TEMPLATE(BM_StdMidpoint, uint32_t, RandRand)
    ->Apply(CustomArguments<uint32_t>);
BENCHMARK_TEMPLATE(BM_StdMidpoint, int64_t, RandRand)
    ->Apply(CustomArguments<int64_t>);
BENCHMARK_TEMPLATE(BM_StdMidpoint, uint64_t, RandRand)
    ->Apply(CustomArguments<uint64_t>);
BENCHMARK_TEMPLATE(BM_StdMidpoint, int16_t, RandRand)
    ->Apply(CustomArguments<int16_t>);
BENCHMARK_TEMPLATE(BM_StdMidpoint, uint16_t, RandRand)
    ->Apply(CustomArguments<uint16_t>);
BENCHMARK_TEMPLATE(BM_StdMidpoint, int8_t, RandRand)
    ->Apply(CustomArguments<int8_t>);
BENCHMARK_TEMPLATE(BM_StdMidpoint, uint8_t, RandRand)
    ->Apply(CustomArguments<uint8_t>);

// One value is always zero, and another is bigger or equal than zero.
// The comparison is predictable.
BENCHMARK_TEMPLATE(BM_StdMidpoint, uint32_t, ZeroRand)
    ->Apply(CustomArguments<uint32_t>);
BENCHMARK_TEMPLATE(BM_StdMidpoint, uint64_t, ZeroRand)
    ->Apply(CustomArguments<uint64_t>);
BENCHMARK_TEMPLATE(BM_StdMidpoint, uint16_t, ZeroRand)
    ->Apply(CustomArguments<uint16_t>);
BENCHMARK_TEMPLATE(BM_StdMidpoint, uint8_t, ZeroRand)
    ->Apply(CustomArguments<uint8_t>);

$ ~/src/googlebenchmark/tools/compare.py --no-utest benchmarks ./llvm-cmov-bench-OLD ./llvm-cmov-bench-NEW
RUNNING: ./llvm-cmov-bench-OLD --benchmark_out=/tmp/tmp5a5qjm
2019-03-06 21:53:31
Running ./llvm-cmov-bench-OLD
Run on (8 X 4000 MHz CPU s)
CPU Caches:
  L1 Data 16K (x8)
  L1 Instruction 64K (x4)
  L2 Unified 2048K (x4)
  L3 Unified 8192K (x1)
Load Average: 1.78, 1.81, 1.36
----------------------------------------------------------------------------------------------------
Benchmark                                          Time             CPU   Iterations UserCounters<...>
----------------------------------------------------------------------------------------------------
<...>
BM_StdMidpoint<int32_t, RandRand>/131072      300398 ns       300404 ns         2330 bytes_read/iteration=1024k bytes_read/sec=3.25083G/s midpoints=305.398M midpoints/sec=436.319M/s
BM_StdMidpoint<int32_t, RandRand>_BigO          2.29 N          2.29 N
BM_StdMidpoint<int32_t, RandRand>_RMS              2 %             2 %
<...>
BM_StdMidpoint<uint32_t, RandRand>/131072     300433 ns       300433 ns         2330 bytes_read/iteration=1024k bytes_read/sec=3.25052G/s midpoints=305.398M midpoints/sec=436.278M/s
BM_StdMidpoint<uint32_t, RandRand>_BigO         2.29 N          2.29 N
BM_StdMidpoint<uint32_t, RandRand>_RMS             2 %             2 %
<...>
BM_StdMidpoint<int64_t, RandRand>/65536       169857 ns       169858 ns         4121 bytes_read/iteration=1024k bytes_read/sec=5.74929G/s midpoints=270.074M midpoints/sec=385.828M/s
BM_StdMidpoint<int64_t, RandRand>_BigO          2.59 N          2.59 N
BM_StdMidpoint<int64_t, RandRand>_RMS              3 %             3 %
<...>
BM_StdMidpoint<uint64_t, RandRand>/65536      169770 ns       169771 ns         4125 bytes_read/iteration=1024k bytes_read/sec=5.75223G/s midpoints=270.336M midpoints/sec=386.026M/s
BM_StdMidpoint<uint64_t, RandRand>_BigO         2.59 N          2.59 N
BM_StdMidpoint<uint64_t, RandRand>_RMS             3 %             3 %
<...>
BM_StdMidpoint<int16_t, RandRand>/262144      591169 ns       591179 ns         1182 bytes_read/iteration=1024k bytes_read/sec=1.65189G/s midpoints=309.854M midpoints/sec=443.426M/s
BM_StdMidpoint<int16_t, RandRand>_BigO          2.25 N          2.25 N
BM_StdMidpoint<int16_t, RandRand>_RMS              1 %             1 %
<...>
BM_StdMidpoint<uint16_t, RandRand>/262144     591264 ns       591274 ns         1184 bytes_read/iteration=1024k bytes_read/sec=1.65162G/s midpoints=310.378M midpoints/sec=443.354M/s
BM_StdMidpoint<uint16_t, RandRand>_BigO         2.25 N          2.25 N
BM_StdMidpoint<uint16_t, RandRand>_RMS             1 %             1 %
<...>
BM_StdMidpoint<int8_t, RandRand>/524288      2983669 ns      2983689 ns          235 bytes_read/iteration=1024k bytes_read/sec=335.156M/s midpoints=123.208M midpoints/sec=175.718M/s
BM_StdMidpoint<int8_t, RandRand>_BigO           5.69 N          5.69 N
BM_StdMidpoint<int8_t, RandRand>_RMS               0 %             0 %
<...>
BM_StdMidpoint<uint8_t, RandRand>/524288     2668398 ns      2668419 ns          262 bytes_read/iteration=1024k bytes_read/sec=374.754M/s midpoints=137.363M midpoints/sec=196.479M/s
BM_StdMidpoint<uint8_t, RandRand>_BigO          5.09 N          5.09 N
BM_StdMidpoint<uint8_t, RandRand>_RMS              0 %             0 %
<...>
BM_StdMidpoint<uint32_t, ZeroRand>/131072     300887 ns       300887 ns         2331 bytes_read/iteration=1024k bytes_read/sec=3.24561G/s midpoints=305.529M midpoints/sec=435.619M/s
BM_StdMidpoint<uint32_t, ZeroRand>_BigO         2.29 N          2.29 N
BM_StdMidpoint<uint32_t, ZeroRand>_RMS             2 %             2 %
<...>
BM_StdMidpoint<uint64_t, ZeroRand>/65536      169634 ns       169634 ns         4102 bytes_read/iteration=1024k bytes_read/sec=5.75688G/s midpoints=268.829M midpoints/sec=386.338M/s
BM_StdMidpoint<uint64_t, ZeroRand>_BigO         2.59 N          2.59 N
BM_StdMidpoint<uint64_t, ZeroRand>_RMS             3 %             3 %
<...>
BM_StdMidpoint<uint16_t, ZeroRand>/262144     592252 ns       592255 ns         1182 bytes_read/iteration=1024k bytes_read/sec=1.64889G/s midpoints=309.854M midpoints/sec=442.62M/s
BM_StdMidpoint<uint16_t, ZeroRand>_BigO         2.26 N          2.26 N
BM_StdMidpoint<uint16_t, ZeroRand>_RMS             1 %             1 %
<...>
BM_StdMidpoint<uint8_t, ZeroRand>/524288      987295 ns       987309 ns          711 bytes_read/iteration=1024k bytes_read/sec=1012.85M/s midpoints=372.769M midpoints/sec=531.028M/s
BM_StdMidpoint<uint8_t, ZeroRand>_BigO          1.88 N          1.88 N
BM_StdMidpoint<uint8_t, ZeroRand>_RMS              1 %             1 %
RUNNING: ./llvm-cmov-bench-NEW --benchmark_out=/tmp/tmpPvwpfW
2019-03-06 21:56:58
Running ./llvm-cmov-bench-NEW
Run on (8 X 4000 MHz CPU s)
CPU Caches:
  L1 Data 16K (x8)
  L1 Instruction 64K (x4)
  L2 Unified 2048K (x4)
  L3 Unified 8192K (x1)
Load Average: 1.17, 1.46, 1.30
----------------------------------------------------------------------------------------------------
Benchmark                                          Time             CPU   Iterations UserCounters<...>
----------------------------------------------------------------------------------------------------
<...>
BM_StdMidpoint<int32_t, RandRand>/131072      300878 ns       300880 ns         2324 bytes_read/iteration=1024k bytes_read/sec=3.24569G/s midpoints=304.611M midpoints/sec=435.629M/s
BM_StdMidpoint<int32_t, RandRand>_BigO          2.29 N          2.29 N
BM_StdMidpoint<int32_t, RandRand>_RMS              2 %             2 %
<...>
BM_StdMidpoint<uint32_t, RandRand>/131072     300231 ns       300226 ns         2330 bytes_read/iteration=1024k bytes_read/sec=3.25276G/s midpoints=305.398M midpoints/sec=436.578M/s
BM_StdMidpoint<uint32_t, RandRand>_BigO         2.29 N          2.29 N
BM_StdMidpoint<uint32_t, RandRand>_RMS             2 %             2 %
<...>
BM_StdMidpoint<int64_t, RandRand>/65536       170819 ns       170777 ns         4115 bytes_read/iteration=1024k bytes_read/sec=5.71835G/s midpoints=269.681M midpoints/sec=383.752M/s
BM_StdMidpoint<int64_t, RandRand>_BigO          2.60 N          2.60 N
BM_StdMidpoint<int64_t, RandRand>_RMS              3 %             3 %
<...>
BM_StdMidpoint<uint64_t, RandRand>/65536      171705 ns       171708 ns         4106 bytes_read/iteration=1024k bytes_read/sec=5.68733G/s midpoints=269.091M midpoints/sec=381.671M/s
BM_StdMidpoint<uint64_t, RandRand>_BigO         2.62 N          2.62 N
BM_StdMidpoint<uint64_t, RandRand>_RMS             3 %             3 %
<...>
BM_StdMidpoint<int16_t, RandRand>/262144      592510 ns       592516 ns         1182 bytes_read/iteration=1024k bytes_read/sec=1.64816G/s midpoints=309.854M midpoints/sec=442.425M/s
BM_StdMidpoint<int16_t, RandRand>_BigO          2.26 N          2.26 N
BM_StdMidpoint<int16_t, RandRand>_RMS              1 %             1 %
<...>
BM_StdMidpoint<uint16_t, RandRand>/262144     614823 ns       614823 ns         1180 bytes_read/iteration=1024k bytes_read/sec=1.58836G/s midpoints=309.33M midpoints/sec=426.373M/s
BM_StdMidpoint<uint16_t, RandRand>_BigO         2.33 N          2.33 N
BM_StdMidpoint<uint16_t, RandRand>_RMS             4 %             4 %
<...>
BM_StdMidpoint<int8_t, RandRand>/524288      1073181 ns      1073201 ns          650 bytes_read/iteration=1024k bytes_read/sec=931.791M/s midpoints=340.787M midpoints/sec=488.527M/s
BM_StdMidpoint<int8_t, RandRand>_BigO           2.05 N          2.05 N
BM_StdMidpoint<int8_t, RandRand>_RMS               1 %             1 %
BM_StdMidpoint<uint8_t, RandRand>/524288     1071010 ns      1071020 ns          653 bytes_read/iteration=1024k bytes_read/sec=933.689M/s midpoints=342.36M midpoints/sec=489.522M/s
BM_StdMidpoint<uint8_t, RandRand>_BigO          2.05 N          2.05 N
BM_StdMidpoint<uint8_t, RandRand>_RMS              1 %             1 %
<...>
BM_StdMidpoint<uint32_t, ZeroRand>/131072     300413 ns       300416 ns         2330 bytes_read/iteration=1024k bytes_read/sec=3.2507G/s midpoints=305.398M midpoints/sec=436.302M/s
BM_StdMidpoint<uint32_t, ZeroRand>_BigO         2.29 N          2.29 N
BM_StdMidpoint<uint32_t, ZeroRand>_RMS             2 %             2 %
<...>
BM_StdMidpoint<uint64_t, ZeroRand>/65536      169667 ns       169669 ns         4123 bytes_read/iteration=1024k bytes_read/sec=5.75568G/s midpoints=270.205M midpoints/sec=386.257M/s
BM_StdMidpoint<uint64_t, ZeroRand>_BigO         2.59 N          2.59 N
BM_StdMidpoint<uint64_t, ZeroRand>_RMS             3 %             3 %
<...>
BM_StdMidpoint<uint16_t, ZeroRand>/262144     591396 ns       591404 ns         1184 bytes_read/iteration=1024k bytes_read/sec=1.65126G/s midpoints=310.378M midpoints/sec=443.257M/s
BM_StdMidpoint<uint16_t, ZeroRand>_BigO         2.26 N          2.26 N
BM_StdMidpoint<uint16_t, ZeroRand>_RMS             1 %             1 %
<...>
BM_StdMidpoint<uint8_t, ZeroRand>/524288     1069421 ns      1069413 ns          655 bytes_read/iteration=1024k bytes_read/sec=935.092M/s midpoints=343.409M midpoints/sec=490.258M/s
BM_StdMidpoint<uint8_t, ZeroRand>_BigO          2.04 N          2.04 N
BM_StdMidpoint<uint8_t, ZeroRand>_RMS              0 %             0 %
Comparing ./llvm-cmov-bench-OLD to ./llvm-cmov-bench-NEW
Benchmark                                                   Time             CPU      Time Old      Time New       CPU Old       CPU New
----------------------------------------------------------------------------------------------------------------------------------------
<...>
BM_StdMidpoint<int32_t, RandRand>/131072                 +0.0016         +0.0016        300398        300878        300404        300880
<...>
BM_StdMidpoint<uint32_t, RandRand>/131072                -0.0007         -0.0007        300433        300231        300433        300226
<...>
BM_StdMidpoint<int64_t, RandRand>/65536                  +0.0057         +0.0054        169857        170819        169858        170777
<...>
BM_StdMidpoint<uint64_t, RandRand>/65536                 +0.0114         +0.0114        169770        171705        169771        171708
<...>
BM_StdMidpoint<int16_t, RandRand>/262144                 +0.0023         +0.0023        591169        592510        591179        592516
<...>
BM_StdMidpoint<uint16_t, RandRand>/262144                +0.0398         +0.0398        591264        614823        591274        614823
<...>
BM_StdMidpoint<int8_t, RandRand>/524288                  -0.6403         -0.6403       2983669       1073181       2983689       1073201
<...>
BM_StdMidpoint<uint8_t, RandRand>/524288                 -0.5986         -0.5986       2668398       1071010       2668419       1071020
<...>
BM_StdMidpoint<uint32_t, ZeroRand>/131072                -0.0016         -0.0016        300887        300413        300887        300416
<...>
BM_StdMidpoint<uint64_t, ZeroRand>/65536                 +0.0002         +0.0002        169634        169667        169634        169669
<...>
BM_StdMidpoint<uint16_t, ZeroRand>/262144                -0.0014         -0.0014        592252        591396        592255        591404
<...>
BM_StdMidpoint<uint8_t, ZeroRand>/524288                 +0.0832         +0.0832        987295       1069421        987309       1069413

What can we tell from the benchmark?

BM_StdMidpoint<[u]int8_t, RandRand> indeed has the worst performance.
All BM_StdMidpoint<uint{8,16,32}_t, ZeroRand> are all performant, even the 8-bit case. That is because there we are computing mid point between zero and some random number, thus if the branch predictor is in use, it is in optimal situation.
Promoting 8-bit CMOV did improve performance of BM_StdMidpoint<[u]int8_t, RandRand>, by -59%..-64%.

What about branch predictor?
BM_StdMidpoint<uint8_t, ZeroRand> was faster than BM_StdMidpoint<uint{16,32,64}_t, ZeroRand>, which may mean that well-predicted branch is better than cmov.
Promoting 8-bit CMOV degraded performance of BM_StdMidpoint<uint8_t, ZeroRand>, cmov is up to +10% worse than well-predicted branch.
However, i do not believe this is a concern. If the branch is well predicted, then the PGO will also say that it is well predicted, and LLVM will happily expand cmov back into branch: https://godbolt.org/z/P5ufig

What about partial register stalls?

I'm not really able to answer that.
What i can say is that if the branch is unpredictable (if it is predictable, then use PGO and you'll have branch)
in ~50% of cases you will have to pay branch misprediction penalty.

$ grep -i MispredictPenalty X86Sched*.td
X86SchedBroadwell.td:  let MispredictPenalty = 16;
X86SchedHaswell.td:  let MispredictPenalty = 16;
X86SchedSandyBridge.td:  let MispredictPenalty = 16;
X86SchedSkylakeClient.td:  let MispredictPenalty = 14;
X86SchedSkylakeServer.td:  let MispredictPenalty = 14;
X86ScheduleBdVer2.td:  let MispredictPenalty = 20; // Minimum branch misdirection penalty.
X86ScheduleBtVer2.td:  let MispredictPenalty = 14; // Minimum branch misdirection penalty
X86ScheduleSLM.td:  let MispredictPenalty = 10;
X86ScheduleZnver1.td:  let MispredictPenalty = 17;

.. which it can be as small as 10 cycles and as large as 20 cycles.
Partial register stalls do not seem to be an issue for AMD CPU's.
For intel CPU's, they should be around ~5 cycles?
Is that actually an issue here? I'm not sure.

In short, i'd say this is an improvement, at least on this microbenchmark.

Fixes PR40965.

Diff Detail

Repository: rL LLVM

Event Timeline

lebedev.ri created this revision.Mar 6 2019, 11:36 AM

Herald added subscribers: jdoerfert, jfb. · View Herald TranscriptMar 6 2019, 11:36 AM

lebedev.ri added a parent revision: D59001: X86TargetLowering::LowerSELECT(): don't promote CMOV's if the subtarget does't have them.Mar 6 2019, 11:36 AM

lebedev.ri added a project: Restricted Project.Mar 6 2019, 11:53 AM

Don't introduce Subtarget.hasCMov() requirement for i16 CMOV promotion.
Do require Subtarget.hasCMov() for i8 CMOV promotion. We should not have that limitation, but EmitLoweredSelect() can not deal with these extensions being inserted between two CMOV's. That e.g. breaks pseudo_cmov_lower.ll test. PR40974

lebedev.ri removed a parent revision: D59001: X86TargetLowering::LowerSELECT(): don't promote CMOV's if the subtarget does't have them.Mar 7 2019, 10:01 AM

lebedev.ri mentioned this in D59001: X86TargetLowering::LowerSELECT(): don't promote CMOV's if the subtarget does't have them.Mar 7 2019, 10:05 AM

Actually upload the right patch.

lebedev.ri mentioned this in D59147: Broken, not for review: X86TargetLowering::EmitLoweredSelect(): ignore harmless instrs between two PHI's.Mar 8 2019, 11:53 AM

lebedev.ri added inline comments.

lib/Target/X86/X86ISelLowering.cpp

20550–20558

I have looked into this a bit more, hacked together a patch D59147 that resolves the regression seen in D59001,
but it is unable to resolve the problem illustrated by possible pseudo_cmov_lower.ll, @foo9 regression.

bb.2.entry:
; predecessors: %bb.0, %bb.1
  successors: %bb.3(0x40000000), %bb.4(0x40000000); %bb.3(50.00%), %bb.4(50.00%)
  liveins: $eflags
  %484:gr32 = PHI %483:gr32, %bb.1, %336:gr32, %bb.0    // <- base PHI
  %485:gr32_abcd = COPY %484:gr32
  %486:gr8 = COPY %485.sub_8bit:gr32_abcd
  %487:gr32 = MOVZX32rr8 killed %368:gr8                // <- def %487:gr32
  JA_1 %bb.4, implicit $eflags

bb.3.entry:
; predecessors: %bb.2
  successors: %bb.4(0x80000000); %bb.4(100.00%)
  liveins: $eflags

bb.4.entry:
; predecessors: %bb.2, %bb.3
  successors: %bb.5(0x40000000), %bb.6(0x40000000); %bb.5(50.00%), %bb.6(50.00%)
  liveins: $eflags
  %488:gr32 = PHI %487:gr32, %bb.3, %339:gr32, %bb.2    // <- second PHI of chain // <- use %487:gr32
  %489:gr32_abcd = COPY %488:gr32
  %490:gr8 = COPY %489.sub_8bit:gr32_abcd
  %491:gr32 = MOVZX32rr8 killed %367:gr8
  JA_1 %bb.6, implicit $eflags

%488:gr32 = PHI uses %487:gr32 which is defined *after* the original %484:gr32 = PHI.
Some extra sinking/hoisting would be required.
Or, like @craig.topper mentioned in IRC, just promote all the i8's!!!1 :)

TLDR: if this Subtarget.hasCMov() limitation here is a problem, please advice how to proceed.

craig.topper added inline comments.Mar 14 2019, 2:39 PM

lib/Target/X86/X86ISelLowering.cpp
20552	"deail" -> deal
20557	The MayFoldLoad restriction really only applies to i16.

Address nits.

lebedev.ri added inline comments.Mar 15 2019, 1:45 AM

lib/Target/X86/X86ISelLowering.cpp
20552	Oops, i have noticed that when submitting the original patch, but did not want to update right away, and it stuck..

Looks good to me.

test/CodeGen/X86/cmov-promotion.ll
155–159	I noticed that we could avoid the sign extend if we move -19 instead of 237 to ECX, and we commute the operands of that CMOV (along with the condition: from NE to E). The following sequence should be equivalent: ; CMOV-NEXT: movl $117, %eax ; CMOV-NEXT: movl $-19, %ecx ; CMOV-NEXT: cmovel %ecx, %eax Same for other 'spromotion' tests below. P.s.: none of these things require changes to your patch. This was just FIY (something that I found interesting while looking at the test changes).
test/CodeGen/X86/copy-eflags.ll
257–269	Nice change. It is a shame that we have to repeat the same CMPQ because of the NEGL which modifies FLAGS. In theory, we could reorder that sequence and avoid to repeat the same compare. Again, this may not be that important and it has nothing to do with your patch.

This revision is now accepted and ready to land.Mar 15 2019, 5:22 AM

lebedev.ri added inline comments.Mar 15 2019, 5:59 AM

test/CodeGen/X86/cmov-promotion.ll
155–159	Given that the input is define i16 @old(i1 %c) { %t0 = select i1 %c, i8 117, i8 -19 %ret = sext i8 %t0 to i16 ret i16 %ret } why can't we simply widen the hands of select, like define i16 @new(i1 %c) { %ret = select i1 %c, i16 117, i16 -19 ret i16 %ret } https://rise4fun.com/Alive/cs8 ? So instead of testb $1, %dil movl $117, %eax movl $237, %ecx cmovnel %eax, %ecx movsbl %cl, %eax we get testb $1, %dil movl $117, %ecx movl $65517, %eax # imm = 0xFFED cmovnel %ecx, %eax I.e. the 'obvious' fix here is that if we are widening result of CMOV, and both of the possibilities are constants, then just widen those constants and CMOV itself. Why do you think we'd need to flip the CMOV condition?

lebedev.ri added a child revision: D59412: [X86] X86ISelLowering::combineSextInRegCmov(): also handle i8 CMOV's.Mar 15 2019, 6:44 AM

lebedev.ri marked 3 inline comments as done.

lebedev.ri added inline comments.

test/CodeGen/X86/cmov-promotion.ll
155–159	D59035

why can't we simply widen the hands of select, like

define i16 @new(i1 %c) {
%ret = select i1 %c, i16 117, i16 -19
ret i16 %ret
}

https://rise4fun.com/Alive/cs8
?

I never said that we cannot fix it in a different way. I just wanted to point out a poor codegen issue, and show one possible way to fix it (if you had to do it manually).

If by widening the select operands we fix the issue then great.

To answer to the question:
"Why do you think we'd need to flip the CMOV condition?"

I don't *think* that it is needed if you use a different mapping for registers.
The reason why your solution doesn't require to flip the CMOV condition is only because regalloc swapped the mappings for the moves.

With your approach you get:

movl    $117, %ecx
movl    $65517, %eax            # imm = 0xFFED

In my case, I didn't touch register mappings:

movl    $117, %eax
movl    $-19, %ecx

Since the result has to be moved to EAX, in your case you can simply write

cmovnel %ecx, %eax

While in my case, I had to invert the operands. However, that would also have required changing the condition code from NE to E.

I hope it now makes more sense. But more importantly, it was just meant to show a possible way to fix an issue. I am happy if we can avoid changing that CMOV by simply using different register mappings.

In D59035#1430807, @andreadb wrote:

...

Ahh, i see, that does make sense, thank you!

lebedev.ri mentioned this in D59412: [X86] X86ISelLowering::combineSextInRegCmov(): also handle i8 CMOV's.Mar 15 2019, 1:07 PM

LGTM

In D59035#1431414, @craig.topper wrote:

LGTM

In D59035#1430651, @andreadb wrote:

Looks good to me.

Thank you for the reviews!

Closed by commit rL356300: [X86] Promote i8 CMOV's (PR40965) (authored by lebedevri). · Explain WhyMar 15 2019, 2:20 PM

This revision was automatically updated to reflect the committed changes.

lebedev.ri mentioned this in D65148: [SimplifyCFG] Bump phi-node-folding-threshold from 2 to 3.Sep 4 2019, 3:45 PM

nick added a subscriber: nick.Nov 12 2019, 9:57 AM

nick added inline comments.

llvm/trunk/test/CodeGen/X86/cmov.ll
190 ↗	(On Diff #190899)	The comment should have been removed?

Herald added a reviewer: mclow.lists. · View Herald TranscriptNov 12 2019, 9:57 AM

lebedev.ri marked 2 inline comments as done.Nov 19 2019, 5:25 AM

lebedev.ri added inline comments.

llvm/trunk/test/CodeGen/X86/cmov.ll
190 ↗	(On Diff #190899)	Indeed, fixed in 6de85095ed7d8f161a7f39422d42b5bc2d2dcb98, thanks!

Revision Contents

Path

Size

lib/

Target/

X86/

X86ISelLowering.cpp

14 lines

test/

CodeGen/

X86/

8bit_cmov_of_trunc_promotion.ll

310 lines

57 lines

27 lines

23 lines

37 lines

fast-isel-select-pseudo-cmov.ll

40 lines

fshl.ll

12 lines

fshr.ll

12 lines

i386-shrink-wrapping.ll

38 lines

87 lines

72 lines

32 lines

22 lines

67 lines

33 lines

9 lines

32 lines

22 lines

24 lines

16 lines

19 lines

14 lines

x86-shrink-wrapping.ll

20 lines

Diff 189741

lib/Target/X86/X86ISelLowering.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 20,529 Lines • ▼ Show 20 Lines	if ((CondCode == X86::COND_AE \|\| CondCode == X86::COND_B) &&
return DAG.getNOT(DL, Res, Res.getValueType());		return DAG.getNOT(DL, Res, Res.getValueType());
return Res;		return Res;
}		}
}		}

// X86 doesn't have an i8 cmov. If both operands are the result of a truncate		// X86 doesn't have an i8 cmov. If both operands are the result of a truncate
// widen the cmov and push the truncate through. This avoids introducing a new		// widen the cmov and push the truncate through. This avoids introducing a new
// branch during isel and doesn't add any extensions.		// branch during isel and doesn't add any extensions.
if (Op.getValueType() == MVT::i8 &&		if (Subtarget.hasCMov() && Op1.getOpcode() == ISD::TRUNCATE &&
Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) {		Op2.getOpcode() == ISD::TRUNCATE) {
SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);		SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0);
if (T1.getValueType() == T2.getValueType() &&		if (T1.getValueType() == T2.getValueType() &&
// Blacklist CopyFromReg to avoid partial register stalls.		// Blacklist CopyFromReg to avoid partial register stalls.
T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){		T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){
SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, T1.getValueType(), T2, T1,		SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, T1.getValueType(), T2, T1,
CC, Cond);		CC, Cond);
return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);		return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
}		}
}		}

// Promote i16 cmovs if it won't prevent folding a load.		// Or finally, promote i8/i16 cmovs if it won't prevent folding a load.
if (Op.getValueType() == MVT::i16 && !MayFoldLoad(Op1) && !MayFoldLoad(Op2)) {		// FIXME: we should not limit promotion of i8 case to only when the CMOV is
		// legal, but EmitLoweredSelect() can not deail with these extensions
		craig.topperUnsubmitted Not Done Reply Inline Actions "deail" -> deal craig.topper: "deail" -> deal
		lebedev.riAuthorUnsubmitted Done Reply Inline Actions Oops, i have noticed that when submitting the original patch, but did not want to update right away, and it stuck.. lebedev.ri: Oops, i have noticed that when submitting the original patch, but did not want to update right…
		// being inserted between two CMOV's. (in i16 case too TBN)
		// https://bugs.llvm.org/show_bug.cgi?id=40974
		if (((Op.getValueType() == MVT::i8 && Subtarget.hasCMov()) \|\|
		Op.getValueType() == MVT::i16) &&
		!MayFoldLoad(Op1) && !MayFoldLoad(Op2)) {
		craig.topperUnsubmitted Done Reply Inline Actions The MayFoldLoad restriction really only applies to i16. craig.topper: The MayFoldLoad restriction really only applies to i16.
Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);		Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
		lebedev.riAuthorUnsubmitted Not Done Reply Inline Actions I have looked into this a bit more, hacked together a patch D59147 that resolves the regression seen in D59001, but it is unable to resolve the problem illustrated by possible `pseudo_cmov_lower.ll`, `@foo9` regression. bb.2.entry: ; predecessors: %bb.0, %bb.1 successors: %bb.3(0x40000000), %bb.4(0x40000000); %bb.3(50.00%), %bb.4(50.00%) liveins: $eflags %484:gr32 = PHI %483:gr32, %bb.1, %336:gr32, %bb.0 // <- base PHI %485:gr32_abcd = COPY %484:gr32 %486:gr8 = COPY %485.sub_8bit:gr32_abcd %487:gr32 = MOVZX32rr8 killed %368:gr8 // <- def %487:gr32 JA_1 %bb.4, implicit $eflags bb.3.entry: ; predecessors: %bb.2 successors: %bb.4(0x80000000); %bb.4(100.00%) liveins: $eflags bb.4.entry: ; predecessors: %bb.2, %bb.3 successors: %bb.5(0x40000000), %bb.6(0x40000000); %bb.5(50.00%), %bb.6(50.00%) liveins: $eflags %488:gr32 = PHI %487:gr32, %bb.3, %339:gr32, %bb.2 // <- second PHI of chain // <- use %487:gr32 %489:gr32_abcd = COPY %488:gr32 %490:gr8 = COPY %489.sub_8bit:gr32_abcd %491:gr32 = MOVZX32rr8 killed %367:gr8 JA_1 %bb.6, implicit $eflags `%488:gr32 = PHI` uses `%487:gr32` which is defined after the original `%484:gr32 = PHI`. Some extra sinking/hoisting would be required. Or, like @craig.topper mentioned in IRC, just promote all the i8's!!!1 :) TLDR: if this `Subtarget.hasCMov()` limitation here is a problem, please advice how to proceed. lebedev.ri: I have looked into this a bit more, hacked together a patch D59147 that resolves the regression…
Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);		Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
SDValue Ops[] = { Op2, Op1, CC, Cond };		SDValue Ops[] = { Op2, Op1, CC, Cond };
SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, MVT::i32, Ops);		SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, MVT::i32, Ops);
return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);		return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov);
}		}

// X86ISD::CMOV means set the result (which is operand 1) to the RHS if		// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
// condition is true.		// condition is true.
▲ Show 20 Lines • Show All 23,041 Lines • Show Last 20 Lines

test/CodeGen/X86/8bit_cmov_of_trunc_promotion.ll

Show First 20 Lines • Show All 74 Lines • ▼ Show 20 Lines	; X86_64-NEXT: retq
%t1 = icmp sgt i8 %a1, %a2		%t1 = icmp sgt i8 %a1, %a2
%t2 = select i1 %t1, i8 %a1, i8 %a2		%t2 = select i1 %t1, i8 %a1, i8 %a2
ret i8 %t2		ret i8 %t2
}		}

; Values don't come from regs, but there is only one truncation.		; Values don't come from regs, but there is only one truncation.

define i8 @neg_only_one_truncation(i32 %a1_wide_orig, i8 %a2_orig, i32 %inc) nounwind {		define i8 @neg_only_one_truncation(i32 %a1_wide_orig, i8 %a2_orig, i32 %inc) nounwind {
; I386-LABEL: neg_only_one_truncation:		; I386-NOCMOV-LABEL: neg_only_one_truncation:
; I386: # %bb.0:		; I386-NOCMOV: # %bb.0:
; I386-NEXT: movl {{[0-9]+}}(%esp), %ecx		; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I386-NEXT: movl {{[0-9]+}}(%esp), %eax		; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; I386-NEXT: addl %ecx, %eax		; I386-NOCMOV-NEXT: addl %ecx, %eax
; I386-NEXT: addb {{[0-9]+}}(%esp), %cl		; I386-NOCMOV-NEXT: addb {{[0-9]+}}(%esp), %cl
; I386-NEXT: cmpb %cl, %al		; I386-NOCMOV-NEXT: cmpb %cl, %al
; I386-NEXT: jge .LBB1_2		; I386-NOCMOV-NEXT: jge .LBB1_2
; I386-NEXT: # %bb.1:		; I386-NOCMOV-NEXT: # %bb.1:
; I386-NEXT: movl %ecx, %eax		; I386-NOCMOV-NEXT: movl %ecx, %eax
; I386-NEXT: .LBB1_2:		; I386-NOCMOV-NEXT: .LBB1_2:
; I386-NEXT: # kill: def $al killed $al killed $eax		; I386-NOCMOV-NEXT: # kill: def $al killed $al killed $eax
; I386-NEXT: retl		; I386-NOCMOV-NEXT: retl
;		;
; I686-LABEL: neg_only_one_truncation:		; I386-CMOV-LABEL: neg_only_one_truncation:
; I686: # %bb.0:		; I386-CMOV: # %bb.0:
; I686-NEXT: movl {{[0-9]+}}(%esp), %ecx		; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; I686-NEXT: movl {{[0-9]+}}(%esp), %eax		; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I686-NEXT: addl %ecx, %eax		; I386-CMOV-NEXT: addl %eax, %ecx
; I686-NEXT: addb {{[0-9]+}}(%esp), %cl		; I386-CMOV-NEXT: addb {{[0-9]+}}(%esp), %al
; I686-NEXT: cmpb %cl, %al		; I386-CMOV-NEXT: cmpb %al, %cl
; I686-NEXT: jge .LBB1_2		; I386-CMOV-NEXT: movzbl %al, %eax
; I686-NEXT: # %bb.1:		; I386-CMOV-NEXT: cmovgel %ecx, %eax
; I686-NEXT: movl %ecx, %eax		; I386-CMOV-NEXT: # kill: def $al killed $al killed $eax
; I686-NEXT: .LBB1_2:		; I386-CMOV-NEXT: retl
; I686-NEXT: # kill: def $al killed $al killed $eax		;
; I686-NEXT: retl		; I686-NOCMOV-LABEL: neg_only_one_truncation:
		; I686-NOCMOV: # %bb.0:
		; I686-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
		; I686-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
		; I686-NOCMOV-NEXT: addl %ecx, %eax
		; I686-NOCMOV-NEXT: addb {{[0-9]+}}(%esp), %cl
		; I686-NOCMOV-NEXT: cmpb %cl, %al
		; I686-NOCMOV-NEXT: jge .LBB1_2
		; I686-NOCMOV-NEXT: # %bb.1:
		; I686-NOCMOV-NEXT: movl %ecx, %eax
		; I686-NOCMOV-NEXT: .LBB1_2:
		; I686-NOCMOV-NEXT: # kill: def $al killed $al killed $eax
		; I686-NOCMOV-NEXT: retl
		;
		; I686-CMOV-LABEL: neg_only_one_truncation:
		; I686-CMOV: # %bb.0:
		; I686-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
		; I686-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
		; I686-CMOV-NEXT: addl %eax, %ecx
		; I686-CMOV-NEXT: addb {{[0-9]+}}(%esp), %al
		; I686-CMOV-NEXT: cmpb %al, %cl
		; I686-CMOV-NEXT: movzbl %al, %eax
		; I686-CMOV-NEXT: cmovgel %ecx, %eax
		; I686-CMOV-NEXT: # kill: def $al killed $al killed $eax
		; I686-CMOV-NEXT: retl
;		;
; X86_64-LABEL: neg_only_one_truncation:		; X86_64-LABEL: neg_only_one_truncation:
; X86_64: # %bb.0:		; X86_64: # %bb.0:
; X86_64-NEXT: movl %edi, %eax		; X86_64-NEXT: addl %edx, %edi
; X86_64-NEXT: addl %edx, %eax
; X86_64-NEXT: addb %sil, %dl		; X86_64-NEXT: addb %sil, %dl
; X86_64-NEXT: cmpb %dl, %al		; X86_64-NEXT: cmpb %dl, %dil
; X86_64-NEXT: jge .LBB1_2		; X86_64-NEXT: movzbl %dl, %eax
; X86_64-NEXT: # %bb.1:		; X86_64-NEXT: cmovgel %edi, %eax
; X86_64-NEXT: movl %edx, %eax
; X86_64-NEXT: .LBB1_2:
; X86_64-NEXT: # kill: def $al killed $al killed $eax		; X86_64-NEXT: # kill: def $al killed $al killed $eax
; X86_64-NEXT: retq		; X86_64-NEXT: retq
%a1_wide = add i32 %a1_wide_orig, %inc		%a1_wide = add i32 %a1_wide_orig, %inc
%inc_short = trunc i32 %inc to i8		%inc_short = trunc i32 %inc to i8
%a2 = add i8 %a2_orig, %inc_short		%a2 = add i8 %a2_orig, %inc_short
%a1 = trunc i32 %a1_wide to i8		%a1 = trunc i32 %a1_wide to i8
%t1 = icmp sgt i8 %a1, %a2		%t1 = icmp sgt i8 %a1, %a2
%t2 = select i1 %t1, i8 %a1, i8 %a2		%t2 = select i1 %t1, i8 %a1, i8 %a2
ret i8 %t2		ret i8 %t2
}		}

; Values don't come from regs, but truncation from different types.		; Values don't come from regs, but truncation from different types.

define i8 @neg_type_mismatch(i32 %a1_wide_orig, i16 %a2_wide_orig, i32 %inc) nounwind {		define i8 @neg_type_mismatch(i32 %a1_wide_orig, i16 %a2_wide_orig, i32 %inc) nounwind {
; I386-LABEL: neg_type_mismatch:		; I386-NOCMOV-LABEL: neg_type_mismatch:
; I386: # %bb.0:		; I386-NOCMOV: # %bb.0:
; I386-NEXT: movl {{[0-9]+}}(%esp), %ecx		; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I386-NEXT: movl {{[0-9]+}}(%esp), %eax		; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; I386-NEXT: addl %ecx, %eax		; I386-NOCMOV-NEXT: addl %ecx, %eax
; I386-NEXT: addw {{[0-9]+}}(%esp), %cx		; I386-NOCMOV-NEXT: addw {{[0-9]+}}(%esp), %cx
; I386-NEXT: cmpb %cl, %al		; I386-NOCMOV-NEXT: cmpb %cl, %al
; I386-NEXT: jge .LBB2_2		; I386-NOCMOV-NEXT: jge .LBB2_2
; I386-NEXT: # %bb.1:		; I386-NOCMOV-NEXT: # %bb.1:
; I386-NEXT: movl %ecx, %eax		; I386-NOCMOV-NEXT: movl %ecx, %eax
; I386-NEXT: .LBB2_2:		; I386-NOCMOV-NEXT: .LBB2_2:
; I386-NEXT: # kill: def $al killed $al killed $eax		; I386-NOCMOV-NEXT: # kill: def $al killed $al killed $eax
; I386-NEXT: retl		; I386-NOCMOV-NEXT: retl
;		;
; I686-LABEL: neg_type_mismatch:		; I386-CMOV-LABEL: neg_type_mismatch:
; I686: # %bb.0:		; I386-CMOV: # %bb.0:
; I686-NEXT: movl {{[0-9]+}}(%esp), %ecx		; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; I686-NEXT: movl {{[0-9]+}}(%esp), %eax		; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I686-NEXT: addl %ecx, %eax		; I386-CMOV-NEXT: addl %eax, %ecx
; I686-NEXT: addw {{[0-9]+}}(%esp), %cx		; I386-CMOV-NEXT: addw {{[0-9]+}}(%esp), %ax
; I686-NEXT: cmpb %cl, %al		; I386-CMOV-NEXT: cmpb %al, %cl
; I686-NEXT: jge .LBB2_2		; I386-CMOV-NEXT: cmovgel %ecx, %eax
; I686-NEXT: # %bb.1:		; I386-CMOV-NEXT: # kill: def $al killed $al killed $eax
; I686-NEXT: movl %ecx, %eax		; I386-CMOV-NEXT: retl
; I686-NEXT: .LBB2_2:		;
; I686-NEXT: # kill: def $al killed $al killed $eax		; I686-NOCMOV-LABEL: neg_type_mismatch:
; I686-NEXT: retl		; I686-NOCMOV: # %bb.0:
		; I686-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
		; I686-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
		; I686-NOCMOV-NEXT: addl %ecx, %eax
		; I686-NOCMOV-NEXT: addw {{[0-9]+}}(%esp), %cx
		; I686-NOCMOV-NEXT: cmpb %cl, %al
		; I686-NOCMOV-NEXT: jge .LBB2_2
		; I686-NOCMOV-NEXT: # %bb.1:
		; I686-NOCMOV-NEXT: movl %ecx, %eax
		; I686-NOCMOV-NEXT: .LBB2_2:
		; I686-NOCMOV-NEXT: # kill: def $al killed $al killed $eax
		; I686-NOCMOV-NEXT: retl
		;
		; I686-CMOV-LABEL: neg_type_mismatch:
		; I686-CMOV: # %bb.0:
		; I686-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
		; I686-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
		; I686-CMOV-NEXT: addl %eax, %ecx
		; I686-CMOV-NEXT: addw {{[0-9]+}}(%esp), %ax
		; I686-CMOV-NEXT: cmpb %al, %cl
		; I686-CMOV-NEXT: cmovgel %ecx, %eax
		; I686-CMOV-NEXT: # kill: def $al killed $al killed $eax
		; I686-CMOV-NEXT: retl
;		;
; X86_64-LABEL: neg_type_mismatch:		; X86_64-LABEL: neg_type_mismatch:
; X86_64: # %bb.0:		; X86_64: # %bb.0:
; X86_64-NEXT: movl %edi, %eax
; X86_64-NEXT: addl %edx, %eax
; X86_64-NEXT: addl %edx, %esi
; X86_64-NEXT: cmpb %sil, %al
; X86_64-NEXT: jge .LBB2_2
; X86_64-NEXT: # %bb.1:
; X86_64-NEXT: movl %esi, %eax		; X86_64-NEXT: movl %esi, %eax
; X86_64-NEXT: .LBB2_2:		; X86_64-NEXT: addl %edx, %edi
		; X86_64-NEXT: addl %edx, %eax
		; X86_64-NEXT: cmpb %al, %dil
		; X86_64-NEXT: cmovgel %edi, %eax
; X86_64-NEXT: # kill: def $al killed $al killed $eax		; X86_64-NEXT: # kill: def $al killed $al killed $eax
; X86_64-NEXT: retq		; X86_64-NEXT: retq
%a1_wide = add i32 %a1_wide_orig, %inc		%a1_wide = add i32 %a1_wide_orig, %inc
%inc_short = trunc i32 %inc to i16		%inc_short = trunc i32 %inc to i16
%a2_wide = add i16 %a2_wide_orig, %inc_short		%a2_wide = add i16 %a2_wide_orig, %inc_short
%a1 = trunc i32 %a1_wide to i8		%a1 = trunc i32 %a1_wide to i8
%a2 = trunc i16 %a2_wide to i8		%a2 = trunc i16 %a2_wide to i8
%t1 = icmp sgt i8 %a1, %a2		%t1 = icmp sgt i8 %a1, %a2
%t2 = select i1 %t1, i8 %a1, i8 %a2		%t2 = select i1 %t1, i8 %a1, i8 %a2
ret i8 %t2		ret i8 %t2
}		}

; One value come from regs		; One value come from regs

define i8 @negative_CopyFromReg(i32 %a1_wide, i32 %a2_wide_orig, i32 %inc) nounwind {		define i8 @negative_CopyFromReg(i32 %a1_wide, i32 %a2_wide_orig, i32 %inc) nounwind {
; I386-LABEL: negative_CopyFromReg:		; I386-NOCMOV-LABEL: negative_CopyFromReg:
; I386: # %bb.0:		; I386-NOCMOV: # %bb.0:
; I386-NEXT: movb {{[0-9]+}}(%esp), %al		; I386-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al
; I386-NEXT: movl {{[0-9]+}}(%esp), %ecx		; I386-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I386-NEXT: addl {{[0-9]+}}(%esp), %ecx		; I386-NOCMOV-NEXT: addl {{[0-9]+}}(%esp), %ecx
; I386-NEXT: cmpb %cl, %al		; I386-NOCMOV-NEXT: cmpb %cl, %al
; I386-NEXT: jge .LBB3_2		; I386-NOCMOV-NEXT: jge .LBB3_2
; I386-NEXT: # %bb.1:		; I386-NOCMOV-NEXT: # %bb.1:
; I386-NEXT: movl %ecx, %eax		; I386-NOCMOV-NEXT: movl %ecx, %eax
; I386-NEXT: .LBB3_2:		; I386-NOCMOV-NEXT: .LBB3_2:
; I386-NEXT: retl		; I386-NOCMOV-NEXT: retl
;		;
; I686-LABEL: negative_CopyFromReg:		; I386-CMOV-LABEL: negative_CopyFromReg:
; I686: # %bb.0:		; I386-CMOV: # %bb.0:
; I686-NEXT: movb {{[0-9]+}}(%esp), %al		; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I686-NEXT: movl {{[0-9]+}}(%esp), %ecx		; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; I686-NEXT: addl {{[0-9]+}}(%esp), %ecx		; I386-CMOV-NEXT: addl {{[0-9]+}}(%esp), %eax
; I686-NEXT: cmpb %cl, %al		; I386-CMOV-NEXT: cmpb %al, %cl
; I686-NEXT: jge .LBB3_2		; I386-CMOV-NEXT: cmovgel %ecx, %eax
; I686-NEXT: # %bb.1:		; I386-CMOV-NEXT: # kill: def $al killed $al killed $eax
; I686-NEXT: movl %ecx, %eax		; I386-CMOV-NEXT: retl
; I686-NEXT: .LBB3_2:		;
; I686-NEXT: retl		; I686-NOCMOV-LABEL: negative_CopyFromReg:
		; I686-NOCMOV: # %bb.0:
		; I686-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al
		; I686-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
		; I686-NOCMOV-NEXT: addl {{[0-9]+}}(%esp), %ecx
		; I686-NOCMOV-NEXT: cmpb %cl, %al
		; I686-NOCMOV-NEXT: jge .LBB3_2
		; I686-NOCMOV-NEXT: # %bb.1:
		; I686-NOCMOV-NEXT: movl %ecx, %eax
		; I686-NOCMOV-NEXT: .LBB3_2:
		; I686-NOCMOV-NEXT: retl
		;
		; I686-CMOV-LABEL: negative_CopyFromReg:
		; I686-CMOV: # %bb.0:
		; I686-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
		; I686-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
		; I686-CMOV-NEXT: addl {{[0-9]+}}(%esp), %eax
		; I686-CMOV-NEXT: cmpb %al, %cl
		; I686-CMOV-NEXT: cmovgel %ecx, %eax
		; I686-CMOV-NEXT: # kill: def $al killed $al killed $eax
		; I686-CMOV-NEXT: retl
;		;
; X86_64-LABEL: negative_CopyFromReg:		; X86_64-LABEL: negative_CopyFromReg:
; X86_64: # %bb.0:		; X86_64: # %bb.0:
; X86_64-NEXT: movl %edi, %eax
; X86_64-NEXT: addl %edx, %esi
; X86_64-NEXT: cmpb %sil, %al
; X86_64-NEXT: jge .LBB3_2
; X86_64-NEXT: # %bb.1:
; X86_64-NEXT: movl %esi, %eax		; X86_64-NEXT: movl %esi, %eax
; X86_64-NEXT: .LBB3_2:		; X86_64-NEXT: addl %edx, %eax
		; X86_64-NEXT: cmpb %al, %dil
		; X86_64-NEXT: cmovgel %edi, %eax
; X86_64-NEXT: # kill: def $al killed $al killed $eax		; X86_64-NEXT: # kill: def $al killed $al killed $eax
; X86_64-NEXT: retq		; X86_64-NEXT: retq
%a2_wide = add i32 %a2_wide_orig, %inc		%a2_wide = add i32 %a2_wide_orig, %inc
%a1 = trunc i32 %a1_wide to i8		%a1 = trunc i32 %a1_wide to i8
%a2 = trunc i32 %a2_wide to i8		%a2 = trunc i32 %a2_wide to i8
%t1 = icmp sgt i8 %a1, %a2		%t1 = icmp sgt i8 %a1, %a2
%t2 = select i1 %t1, i8 %a1, i8 %a2		%t2 = select i1 %t1, i8 %a1, i8 %a2
ret i8 %t2		ret i8 %t2
}		}

; Both values come from regs		; Both values come from regs

define i8 @negative_CopyFromRegs(i32 %a1_wide, i32 %a2_wide) nounwind {		define i8 @negative_CopyFromRegs(i32 %a1_wide, i32 %a2_wide) nounwind {
; I386-LABEL: negative_CopyFromRegs:		; I386-NOCMOV-LABEL: negative_CopyFromRegs:
; I386: # %bb.0:		; I386-NOCMOV: # %bb.0:
; I386-NEXT: movb {{[0-9]+}}(%esp), %cl		; I386-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %cl
; I386-NEXT: movb {{[0-9]+}}(%esp), %al		; I386-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al
; I386-NEXT: cmpb %cl, %al		; I386-NOCMOV-NEXT: cmpb %cl, %al
; I386-NEXT: jge .LBB4_2		; I386-NOCMOV-NEXT: jge .LBB4_2
; I386-NEXT: # %bb.1:		; I386-NOCMOV-NEXT: # %bb.1:
; I386-NEXT: movl %ecx, %eax		; I386-NOCMOV-NEXT: movl %ecx, %eax
; I386-NEXT: .LBB4_2:		; I386-NOCMOV-NEXT: .LBB4_2:
; I386-NEXT: retl		; I386-NOCMOV-NEXT: retl
;		;
; I686-LABEL: negative_CopyFromRegs:		; I386-CMOV-LABEL: negative_CopyFromRegs:
; I686: # %bb.0:		; I386-CMOV: # %bb.0:
; I686-NEXT: movb {{[0-9]+}}(%esp), %cl		; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; I686-NEXT: movb {{[0-9]+}}(%esp), %al		; I386-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I686-NEXT: cmpb %cl, %al		; I386-CMOV-NEXT: cmpb %al, %cl
; I686-NEXT: jge .LBB4_2		; I386-CMOV-NEXT: cmovgel %ecx, %eax
; I686-NEXT: # %bb.1:		; I386-CMOV-NEXT: # kill: def $al killed $al killed $eax
; I686-NEXT: movl %ecx, %eax		; I386-CMOV-NEXT: retl
; I686-NEXT: .LBB4_2:		;
; I686-NEXT: retl		; I686-NOCMOV-LABEL: negative_CopyFromRegs:
		; I686-NOCMOV: # %bb.0:
		; I686-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %cl
		; I686-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %al
		; I686-NOCMOV-NEXT: cmpb %cl, %al
		; I686-NOCMOV-NEXT: jge .LBB4_2
		; I686-NOCMOV-NEXT: # %bb.1:
		; I686-NOCMOV-NEXT: movl %ecx, %eax
		; I686-NOCMOV-NEXT: .LBB4_2:
		; I686-NOCMOV-NEXT: retl
		;
		; I686-CMOV-LABEL: negative_CopyFromRegs:
		; I686-CMOV: # %bb.0:
		; I686-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
		; I686-CMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
		; I686-CMOV-NEXT: cmpb %al, %cl
		; I686-CMOV-NEXT: cmovgel %ecx, %eax
		; I686-CMOV-NEXT: # kill: def $al killed $al killed $eax
		; I686-CMOV-NEXT: retl
;		;
; X86_64-LABEL: negative_CopyFromRegs:		; X86_64-LABEL: negative_CopyFromRegs:
; X86_64: # %bb.0:		; X86_64: # %bb.0:
; X86_64-NEXT: movl %edi, %eax
; X86_64-NEXT: cmpb %sil, %al
; X86_64-NEXT: jge .LBB4_2
; X86_64-NEXT: # %bb.1:
; X86_64-NEXT: movl %esi, %eax		; X86_64-NEXT: movl %esi, %eax
; X86_64-NEXT: .LBB4_2:		; X86_64-NEXT: cmpb %al, %dil
		; X86_64-NEXT: cmovgel %edi, %eax
; X86_64-NEXT: # kill: def $al killed $al killed $eax		; X86_64-NEXT: # kill: def $al killed $al killed $eax
; X86_64-NEXT: retq		; X86_64-NEXT: retq
%a1 = trunc i32 %a1_wide to i8		%a1 = trunc i32 %a1_wide to i8
%a2 = trunc i32 %a2_wide to i8		%a2 = trunc i32 %a2_wide to i8
%t1 = icmp sgt i8 %a1, %a2		%t1 = icmp sgt i8 %a1, %a2
%t2 = select i1 %t1, i8 %a1, i8 %a2		%t2 = select i1 %t1, i8 %a1, i8 %a2
ret i8 %t2		ret i8 %t2
}		}

test/CodeGen/X86/cmov-promotion.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+cmov \| FileCheck %s --check-prefix=CMOV		; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+cmov \| FileCheck %s --check-prefix=CMOV
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-cmov \| FileCheck %s --check-prefix=NO_CMOV		; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-cmov \| FileCheck %s --check-prefix=NO_CMOV

define i16 @cmov_zpromotion_8_to_16(i1 %c) {		define i16 @cmov_zpromotion_8_to_16(i1 %c) {
; CMOV-LABEL: cmov_zpromotion_8_to_16:		; CMOV-LABEL: cmov_zpromotion_8_to_16:
; CMOV: # %bb.0:		; CMOV: # %bb.0:
; CMOV-NEXT: testb $1, %dil		; CMOV-NEXT: testb $1, %dil
; CMOV-NEXT: movb $117, %al		; CMOV-NEXT: movl $117, %ecx
; CMOV-NEXT: jne .LBB0_2		; CMOV-NEXT: movl $237, %eax
; CMOV-NEXT: # %bb.1:		; CMOV-NEXT: cmovnel %ecx, %eax
; CMOV-NEXT: movb $-19, %al
; CMOV-NEXT: .LBB0_2:
; CMOV-NEXT: movzbl %al, %eax
; CMOV-NEXT: # kill: def $ax killed $ax killed $eax		; CMOV-NEXT: # kill: def $ax killed $ax killed $eax
; CMOV-NEXT: retq		; CMOV-NEXT: retq
;		;
; NO_CMOV-LABEL: cmov_zpromotion_8_to_16:		; NO_CMOV-LABEL: cmov_zpromotion_8_to_16:
; NO_CMOV: # %bb.0:		; NO_CMOV: # %bb.0:
; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp)		; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp)
; NO_CMOV-NEXT: movb $117, %al		; NO_CMOV-NEXT: movb $117, %al
; NO_CMOV-NEXT: jne .LBB0_2		; NO_CMOV-NEXT: jne .LBB0_2
; NO_CMOV-NEXT: # %bb.1:		; NO_CMOV-NEXT: # %bb.1:
; NO_CMOV-NEXT: movb $-19, %al		; NO_CMOV-NEXT: movb $-19, %al
; NO_CMOV-NEXT: .LBB0_2:		; NO_CMOV-NEXT: .LBB0_2:
; NO_CMOV-NEXT: movzbl %al, %eax		; NO_CMOV-NEXT: movzbl %al, %eax
; NO_CMOV-NEXT: # kill: def $ax killed $ax killed $eax		; NO_CMOV-NEXT: # kill: def $ax killed $ax killed $eax
; NO_CMOV-NEXT: retl		; NO_CMOV-NEXT: retl
%t0 = select i1 %c, i8 117, i8 -19		%t0 = select i1 %c, i8 117, i8 -19
%ret = zext i8 %t0 to i16		%ret = zext i8 %t0 to i16
ret i16 %ret		ret i16 %ret
}		}

define i32 @cmov_zpromotion_8_to_32(i1 %c) {		define i32 @cmov_zpromotion_8_to_32(i1 %c) {
; CMOV-LABEL: cmov_zpromotion_8_to_32:		; CMOV-LABEL: cmov_zpromotion_8_to_32:
; CMOV: # %bb.0:		; CMOV: # %bb.0:
; CMOV-NEXT: testb $1, %dil		; CMOV-NEXT: testb $1, %dil
; CMOV-NEXT: movb $126, %al		; CMOV-NEXT: movl $126, %ecx
; CMOV-NEXT: jne .LBB1_2		; CMOV-NEXT: movl $255, %eax
; CMOV-NEXT: # %bb.1:		; CMOV-NEXT: cmovnel %ecx, %eax
; CMOV-NEXT: movb $-1, %al
; CMOV-NEXT: .LBB1_2:
; CMOV-NEXT: movzbl %al, %eax
; CMOV-NEXT: retq		; CMOV-NEXT: retq
;		;
; NO_CMOV-LABEL: cmov_zpromotion_8_to_32:		; NO_CMOV-LABEL: cmov_zpromotion_8_to_32:
; NO_CMOV: # %bb.0:		; NO_CMOV: # %bb.0:
; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp)		; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp)
; NO_CMOV-NEXT: movb $126, %al		; NO_CMOV-NEXT: movb $126, %al
; NO_CMOV-NEXT: jne .LBB1_2		; NO_CMOV-NEXT: jne .LBB1_2
; NO_CMOV-NEXT: # %bb.1:		; NO_CMOV-NEXT: # %bb.1:
; NO_CMOV-NEXT: movb $-1, %al		; NO_CMOV-NEXT: movb $-1, %al
; NO_CMOV-NEXT: .LBB1_2:		; NO_CMOV-NEXT: .LBB1_2:
; NO_CMOV-NEXT: movzbl %al, %eax		; NO_CMOV-NEXT: movzbl %al, %eax
; NO_CMOV-NEXT: retl		; NO_CMOV-NEXT: retl
%t0 = select i1 %c, i8 12414, i8 -1		%t0 = select i1 %c, i8 12414, i8 -1
%ret = zext i8 %t0 to i32		%ret = zext i8 %t0 to i32
ret i32 %ret		ret i32 %ret
}		}

define i64 @cmov_zpromotion_8_to_64(i1 %c) {		define i64 @cmov_zpromotion_8_to_64(i1 %c) {
; CMOV-LABEL: cmov_zpromotion_8_to_64:		; CMOV-LABEL: cmov_zpromotion_8_to_64:
; CMOV: # %bb.0:		; CMOV: # %bb.0:
; CMOV-NEXT: testb $1, %dil		; CMOV-NEXT: testb $1, %dil
; CMOV-NEXT: movb $126, %al		; CMOV-NEXT: movl $126, %ecx
; CMOV-NEXT: jne .LBB2_2		; CMOV-NEXT: movl $255, %eax
; CMOV-NEXT: # %bb.1:		; CMOV-NEXT: cmovnel %ecx, %eax
; CMOV-NEXT: movb $-1, %al
; CMOV-NEXT: .LBB2_2:
; CMOV-NEXT: movzbl %al, %eax
; CMOV-NEXT: retq		; CMOV-NEXT: retq
;		;
; NO_CMOV-LABEL: cmov_zpromotion_8_to_64:		; NO_CMOV-LABEL: cmov_zpromotion_8_to_64:
; NO_CMOV: # %bb.0:		; NO_CMOV: # %bb.0:
; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp)		; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp)
; NO_CMOV-NEXT: movb $126, %al		; NO_CMOV-NEXT: movb $126, %al
; NO_CMOV-NEXT: jne .LBB2_2		; NO_CMOV-NEXT: jne .LBB2_2
; NO_CMOV-NEXT: # %bb.1:		; NO_CMOV-NEXT: # %bb.1:
▲ Show 20 Lines • Show All 77 Lines • ▼ Show 20 Lines	; NO_CMOV-NEXT: retl
%ret = zext i32 %t0 to i64		%ret = zext i32 %t0 to i64
ret i64 %ret		ret i64 %ret
}		}

define i16 @cmov_spromotion_8_to_16(i1 %c) {		define i16 @cmov_spromotion_8_to_16(i1 %c) {
; CMOV-LABEL: cmov_spromotion_8_to_16:		; CMOV-LABEL: cmov_spromotion_8_to_16:
; CMOV: # %bb.0:		; CMOV: # %bb.0:
; CMOV-NEXT: testb $1, %dil		; CMOV-NEXT: testb $1, %dil
; CMOV-NEXT: movb $117, %al		; CMOV-NEXT: movl $117, %eax
; CMOV-NEXT: jne .LBB6_2		; CMOV-NEXT: movl $237, %ecx
; CMOV-NEXT: # %bb.1:		; CMOV-NEXT: cmovnel %eax, %ecx
; CMOV-NEXT: movb $-19, %al		; CMOV-NEXT: movsbl %cl, %eax
; CMOV-NEXT: .LBB6_2:
; CMOV-NEXT: movsbl %al, %eax
; CMOV-NEXT: # kill: def $ax killed $ax killed $eax		; CMOV-NEXT: # kill: def $ax killed $ax killed $eax
		andreadbUnsubmitted Done Reply Inline Actions I noticed that we could avoid the sign extend if we move -19 instead of 237 to ECX, and we commute the operands of that CMOV (along with the condition: from NE to E). The following sequence should be equivalent: ; CMOV-NEXT: movl $117, %eax ; CMOV-NEXT: movl $-19, %ecx ; CMOV-NEXT: cmovel %ecx, %eax Same for other 'spromotion' tests below. P.s.: none of these things require changes to your patch. This was just FIY (something that I found interesting while looking at the test changes). andreadb: I noticed that we could avoid the sign extend if we move -19 instead of 237 to ECX, and we…
		lebedev.riAuthorUnsubmitted Done Reply Inline Actions Given that the input is define i16 @old(i1 %c) { %t0 = select i1 %c, i8 117, i8 -19 %ret = sext i8 %t0 to i16 ret i16 %ret } why can't we simply widen the hands of select, like define i16 @new(i1 %c) { %ret = select i1 %c, i16 117, i16 -19 ret i16 %ret } https://rise4fun.com/Alive/cs8 ? So instead of testb $1, %dil movl $117, %eax movl $237, %ecx cmovnel %eax, %ecx movsbl %cl, %eax we get testb $1, %dil movl $117, %ecx movl $65517, %eax # imm = 0xFFED cmovnel %ecx, %eax I.e. the 'obvious' fix here is that if we are widening result of CMOV, and both of the possibilities are constants, then just widen those constants and CMOV itself. Why do you think we'd need to flip the CMOV condition? lebedev.ri: Given that the input is ``` define i16 @old(i1 %c) { %t0 = select i1 %c, i8 117, i8 -19…
		lebedev.riAuthorUnsubmitted Done Reply Inline Actions D59035 lebedev.ri: D59035
; CMOV-NEXT: retq		; CMOV-NEXT: retq
;		;
; NO_CMOV-LABEL: cmov_spromotion_8_to_16:		; NO_CMOV-LABEL: cmov_spromotion_8_to_16:
; NO_CMOV: # %bb.0:		; NO_CMOV: # %bb.0:
; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp)		; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp)
; NO_CMOV-NEXT: movb $117, %al		; NO_CMOV-NEXT: movb $117, %al
; NO_CMOV-NEXT: jne .LBB6_2		; NO_CMOV-NEXT: jne .LBB6_2
; NO_CMOV-NEXT: # %bb.1:		; NO_CMOV-NEXT: # %bb.1:
; NO_CMOV-NEXT: movb $-19, %al		; NO_CMOV-NEXT: movb $-19, %al
; NO_CMOV-NEXT: .LBB6_2:		; NO_CMOV-NEXT: .LBB6_2:
; NO_CMOV-NEXT: movsbl %al, %eax		; NO_CMOV-NEXT: movsbl %al, %eax
; NO_CMOV-NEXT: # kill: def $ax killed $ax killed $eax		; NO_CMOV-NEXT: # kill: def $ax killed $ax killed $eax
; NO_CMOV-NEXT: retl		; NO_CMOV-NEXT: retl
%t0 = select i1 %c, i8 117, i8 -19		%t0 = select i1 %c, i8 117, i8 -19
%ret = sext i8 %t0 to i16		%ret = sext i8 %t0 to i16
ret i16 %ret		ret i16 %ret
}		}

define i32 @cmov_spromotion_8_to_32(i1 %c) {		define i32 @cmov_spromotion_8_to_32(i1 %c) {
; CMOV-LABEL: cmov_spromotion_8_to_32:		; CMOV-LABEL: cmov_spromotion_8_to_32:
; CMOV: # %bb.0:		; CMOV: # %bb.0:
; CMOV-NEXT: testb $1, %dil		; CMOV-NEXT: testb $1, %dil
; CMOV-NEXT: movb $126, %al		; CMOV-NEXT: movl $126, %eax
; CMOV-NEXT: jne .LBB7_2		; CMOV-NEXT: movl $255, %ecx
; CMOV-NEXT: # %bb.1:		; CMOV-NEXT: cmovnel %eax, %ecx
; CMOV-NEXT: movb $-1, %al		; CMOV-NEXT: movsbl %cl, %eax
; CMOV-NEXT: .LBB7_2:
; CMOV-NEXT: movsbl %al, %eax
; CMOV-NEXT: retq		; CMOV-NEXT: retq
;		;
; NO_CMOV-LABEL: cmov_spromotion_8_to_32:		; NO_CMOV-LABEL: cmov_spromotion_8_to_32:
; NO_CMOV: # %bb.0:		; NO_CMOV: # %bb.0:
; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp)		; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp)
; NO_CMOV-NEXT: movb $126, %al		; NO_CMOV-NEXT: movb $126, %al
; NO_CMOV-NEXT: jne .LBB7_2		; NO_CMOV-NEXT: jne .LBB7_2
; NO_CMOV-NEXT: # %bb.1:		; NO_CMOV-NEXT: # %bb.1:
; NO_CMOV-NEXT: movb $-1, %al		; NO_CMOV-NEXT: movb $-1, %al
; NO_CMOV-NEXT: .LBB7_2:		; NO_CMOV-NEXT: .LBB7_2:
; NO_CMOV-NEXT: movsbl %al, %eax		; NO_CMOV-NEXT: movsbl %al, %eax
; NO_CMOV-NEXT: retl		; NO_CMOV-NEXT: retl
%t0 = select i1 %c, i8 12414, i8 -1		%t0 = select i1 %c, i8 12414, i8 -1
%ret = sext i8 %t0 to i32		%ret = sext i8 %t0 to i32
ret i32 %ret		ret i32 %ret
}		}

define i64 @cmov_spromotion_8_to_64(i1 %c) {		define i64 @cmov_spromotion_8_to_64(i1 %c) {
; CMOV-LABEL: cmov_spromotion_8_to_64:		; CMOV-LABEL: cmov_spromotion_8_to_64:
; CMOV: # %bb.0:		; CMOV: # %bb.0:
; CMOV-NEXT: testb $1, %dil		; CMOV-NEXT: testb $1, %dil
; CMOV-NEXT: movb $126, %al		; CMOV-NEXT: movl $126, %eax
; CMOV-NEXT: jne .LBB8_2		; CMOV-NEXT: movl $255, %ecx
; CMOV-NEXT: # %bb.1:		; CMOV-NEXT: cmovnel %eax, %ecx
; CMOV-NEXT: movb $-1, %al		; CMOV-NEXT: movsbq %cl, %rax
; CMOV-NEXT: .LBB8_2:
; CMOV-NEXT: movsbq %al, %rax
; CMOV-NEXT: retq		; CMOV-NEXT: retq
;		;
; NO_CMOV-LABEL: cmov_spromotion_8_to_64:		; NO_CMOV-LABEL: cmov_spromotion_8_to_64:
; NO_CMOV: # %bb.0:		; NO_CMOV: # %bb.0:
; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp)		; NO_CMOV-NEXT: testb $1, {{[0-9]+}}(%esp)
; NO_CMOV-NEXT: movb $126, %al		; NO_CMOV-NEXT: movb $126, %al
; NO_CMOV-NEXT: jne .LBB8_2		; NO_CMOV-NEXT: jne .LBB8_2
; NO_CMOV-NEXT: # %bb.1:		; NO_CMOV-NEXT: # %bb.1:
▲ Show 20 Lines • Show All 83 Lines • Show Last 20 Lines

test/CodeGen/X86/cmov.ll

	Show First 20 Lines • Show All 85 Lines • ▼ Show 20 Lines
	; CHECK-NEXT: sarl %cl, %edx			; CHECK-NEXT: sarl %cl, %edx
	; CHECK-NEXT: movb {{.*}}(%rip), %al			; CHECK-NEXT: movb {{.*}}(%rip), %al
	; CHECK-NEXT: testb %al, %al			; CHECK-NEXT: testb %al, %al
	; CHECK-NEXT: je .LBB3_2			; CHECK-NEXT: je .LBB3_2
	; CHECK-NEXT: # %bb.1: # %bb.i.i.i			; CHECK-NEXT: # %bb.1: # %bb.i.i.i
	; CHECK-NEXT: movb {{.*}}(%rip), %cl			; CHECK-NEXT: movb {{.*}}(%rip), %cl
	; CHECK-NEXT: .LBB3_2: # %func_4.exit.i			; CHECK-NEXT: .LBB3_2: # %func_4.exit.i
	; CHECK-NEXT: pushq %rbx			; CHECK-NEXT: pushq %rbx
				; CHECK-NEXT: xorl %esi, %esi
	; CHECK-NEXT: testb %dl, %dl			; CHECK-NEXT: testb %dl, %dl
	; CHECK-NEXT: setne %bl			; CHECK-NEXT: setne %bl
	; CHECK-NEXT: movl %eax, %ecx			; CHECK-NEXT: movzbl %al, %ecx
	; CHECK-NEXT: je .LBB3_4			; CHECK-NEXT: cmovnel %esi, %ecx
	; CHECK-NEXT: # %bb.3: # %func_4.exit.i
	; CHECK-NEXT: xorl %ecx, %ecx
	; CHECK-NEXT: .LBB3_4: # %func_4.exit.i
	; CHECK-NEXT: testb %al, %al			; CHECK-NEXT: testb %al, %al
	; CHECK-NEXT: je .LBB3_7			; CHECK-NEXT: je .LBB3_5
	; CHECK-NEXT: # %bb.5: # %func_4.exit.i			; CHECK-NEXT: # %bb.3: # %func_4.exit.i
	; CHECK-NEXT: testb %bl, %bl			; CHECK-NEXT: testb %bl, %bl
	; CHECK-NEXT: jne .LBB3_7			; CHECK-NEXT: jne .LBB3_5
	; CHECK-NEXT: # %bb.6: # %bb.i.i			; CHECK-NEXT: # %bb.4: # %bb.i.i
	; CHECK-NEXT: movb {{.*}}(%rip), %cl			; CHECK-NEXT: movb {{.*}}(%rip), %cl
	; CHECK-NEXT: xorl %ebx, %ebx			; CHECK-NEXT: xorl %ebx, %ebx
	; CHECK-NEXT: movl %eax, %ecx			; CHECK-NEXT: movl %eax, %ecx
	; CHECK-NEXT: .LBB3_7: # %func_1.exit			; CHECK-NEXT: .LBB3_5: # %func_1.exit
	; CHECK-NEXT: movb %cl, {{.*}}(%rip)			; CHECK-NEXT: movb %cl, {{.*}}(%rip)
	; CHECK-NEXT: movzbl %cl, %esi			; CHECK-NEXT: movzbl %cl, %esi
	; CHECK-NEXT: movl $_2E_str, %edi			; CHECK-NEXT: movl $_2E_str, %edi
	; CHECK-NEXT: xorl %eax, %eax			; CHECK-NEXT: xorl %eax, %eax
	; CHECK-NEXT: callq printf			; CHECK-NEXT: callq printf
	; CHECK-NEXT: movl %ebx, %eax			; CHECK-NEXT: movl %ebx, %eax
	; CHECK-NEXT: popq %rbx			; CHECK-NEXT: popq %rbx
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	▲ Show 20 Lines • Show All 69 Lines • ▼ Show 20 Lines
	}			}


	; Don't try to use a 16-bit conditional move to do an 8-bit select,			; Don't try to use a 16-bit conditional move to do an 8-bit select,
	; because it isn't worth it. Just use a branch instead.			; because it isn't worth it. Just use a branch instead.
	define i8 @test7(i1 inreg %c, i8 inreg %a, i8 inreg %b) nounwind {			define i8 @test7(i1 inreg %c, i8 inreg %a, i8 inreg %b) nounwind {
	; CHECK-LABEL: test7:			; CHECK-LABEL: test7:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: testb $1, %dil
	; CHECK-NEXT: jne .LBB6_1
	; CHECK-NEXT: # %bb.2:
	; CHECK-NEXT: movl %edx, %eax
	; CHECK-NEXT: # kill: def $al killed $al killed $eax
	; CHECK-NEXT: retq
	; CHECK-NEXT: .LBB6_1:
	; CHECK-NEXT: movl %esi, %eax			; CHECK-NEXT: movl %esi, %eax
				; CHECK-NEXT: testb $1, %dil
				; CHECK-NEXT: cmovel %edx, %eax
	; CHECK-NEXT: # kill: def $al killed $al killed $eax			; CHECK-NEXT: # kill: def $al killed $al killed $eax
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%d = select i1 %c, i8 %a, i8 %b			%d = select i1 %c, i8 %a, i8 %b
	ret i8 %d			ret i8 %d
	}			}

	define i32 @smin(i32 %x) {			define i32 @smin(i32 %x) {
	; CHECK-LABEL: smin:			; CHECK-LABEL: smin:
	Show All 12 Lines

test/CodeGen/X86/cmovcmov.ll

	Show First 20 Lines • Show All 319 Lines • ▼ Show 20 Lines
	; %12 = phi(%7, %bb.8, %11, %bb.0, %12, %bb.7)			; %12 = phi(%7, %bb.8, %11, %bb.0, %12, %bb.7)
	; %13 = COPY %12			; %13 = COPY %12
	; Which was invalid as %12 is not the same value as %13			; Which was invalid as %12 is not the same value as %13

	define void @no_cascade_opt(i32 %v0, i32 %v1, i32 %v2, i32 %v3) {			define void @no_cascade_opt(i32 %v0, i32 %v1, i32 %v2, i32 %v3) {
	; CMOV-LABEL: no_cascade_opt:			; CMOV-LABEL: no_cascade_opt:
	; CMOV: # %bb.0: # %entry			; CMOV: # %bb.0: # %entry
	; CMOV-NEXT: cmpl %edx, %esi			; CMOV-NEXT: cmpl %edx, %esi
	; CMOV-NEXT: movb $20, %al			; CMOV-NEXT: movl $20, %eax
	; CMOV-NEXT: movb $20, %dl			; CMOV-NEXT: cmovll %eax, %ecx
	; CMOV-NEXT: jge .LBB7_1			; CMOV-NEXT: cmovlel %ecx, %eax
	; CMOV-NEXT: # %bb.2: # %entry
	; CMOV-NEXT: jle .LBB7_3
	; CMOV-NEXT: .LBB7_4: # %entry
	; CMOV-NEXT: testl %edi, %edi
	; CMOV-NEXT: jne .LBB7_5
	; CMOV-NEXT: .LBB7_6: # %entry
	; CMOV-NEXT: movb %al, {{.*}}(%rip)
	; CMOV-NEXT: retq
	; CMOV-NEXT: .LBB7_1: # %entry
	; CMOV-NEXT: movl %ecx, %edx
	; CMOV-NEXT: jg .LBB7_4
	; CMOV-NEXT: .LBB7_3: # %entry
	; CMOV-NEXT: movl %edx, %eax
	; CMOV-NEXT: testl %edi, %edi			; CMOV-NEXT: testl %edi, %edi
	; CMOV-NEXT: je .LBB7_6			; CMOV-NEXT: cmovnel %ecx, %eax
	; CMOV-NEXT: .LBB7_5: # %entry
	; CMOV-NEXT: movl %edx, %eax
	; CMOV-NEXT: movb %al, {{.*}}(%rip)			; CMOV-NEXT: movb %al, {{.*}}(%rip)
	; CMOV-NEXT: retq			; CMOV-NEXT: retq
	;			;
	; NOCMOV-LABEL: no_cascade_opt:			; NOCMOV-LABEL: no_cascade_opt:
	; NOCMOV: # %bb.0: # %entry			; NOCMOV: # %bb.0: # %entry
	; NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax			; NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
	; NOCMOV-NEXT: cmpl {{[0-9]+}}(%esp), %eax			; NOCMOV-NEXT: cmpl {{[0-9]+}}(%esp), %eax
	; NOCMOV-NEXT: movb $20, %al			; NOCMOV-NEXT: movb $20, %al
	Show All 32 Lines

test/CodeGen/X86/copy-eflags.ll

	Show First 20 Lines • Show All 241 Lines • ▼ Show 20 Lines
	; X32-NEXT: jne .LBB3_5			; X32-NEXT: jne .LBB3_5
	; X32-NEXT: # %bb.4: # %bb1			; X32-NEXT: # %bb.4: # %bb1
	; X32-NEXT: # in Loop: Header=BB3_1 Depth=1			; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
	; X32-NEXT: movl %edx, %edi			; X32-NEXT: movl %edx, %edi
	; X32-NEXT: jmp .LBB3_5			; X32-NEXT: jmp .LBB3_5
	;			;
	; X64-LABEL: PR37100:			; X64-LABEL: PR37100:
	; X64: # %bb.0: # %bb			; X64: # %bb.0: # %bb
	; X64-NEXT: movq %rdx, %r11			; X64-NEXT: movq %rdx, %rsi
	; X64-NEXT: movl {{[0-9]+}}(%rsp), %r10d			; X64-NEXT: movl {{[0-9]+}}(%rsp), %r10d
	; X64-NEXT: jmp .LBB3_1			; X64-NEXT: movzbl %cl, %r11d
	; X64-NEXT: .p2align 4, 0x90			; X64-NEXT: .p2align 4, 0x90
	; X64-NEXT: .LBB3_5: # %bb1
	; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
	; X64-NEXT: movl %r10d, %eax
	; X64-NEXT: cltd
	; X64-NEXT: idivl %esi
	; X64-NEXT: .LBB3_1: # %bb1			; X64-NEXT: .LBB3_1: # %bb1
	; X64-NEXT: # =>This Inner Loop Header: Depth=1			; X64-NEXT: # =>This Inner Loop Header: Depth=1
	; X64-NEXT: movsbq %dil, %rax			; X64-NEXT: movsbq %dil, %rax
	; X64-NEXT: xorl %esi, %esi			; X64-NEXT: xorl %ecx, %ecx
	; X64-NEXT: cmpq %rax, %r11			; X64-NEXT: cmpq %rax, %rsi
	; X64-NEXT: setl %sil			; X64-NEXT: setl %cl
	; X64-NEXT: negl %esi			; X64-NEXT: negl %ecx
	; X64-NEXT: cmpq %rax, %r11			; X64-NEXT: cmpq %rax, %rsi
	; X64-NEXT: jl .LBB3_3			; X64-NEXT: movzbl %al, %edi
	; X64-NEXT: # %bb.2: # %bb1			; X64-NEXT: cmovgel %r11d, %edi
	; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
	; X64-NEXT: movl %ecx, %edi
	; X64-NEXT: .LBB3_3: # %bb1
	; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
	; X64-NEXT: movb %dil, (%r8)			; X64-NEXT: movb %dil, (%r8)
	; X64-NEXT: jl .LBB3_5			; X64-NEXT: cmovgel (%r9), %ecx
	; X64-NEXT: # %bb.4: # %bb1			; X64-NEXT: movl %r10d, %eax
	; X64-NEXT: # in Loop: Header=BB3_1 Depth=1			; X64-NEXT: cltd
	; X64-NEXT: movl (%r9), %esi			; X64-NEXT: idivl %ecx
	; X64-NEXT: jmp .LBB3_5			; X64-NEXT: jmp .LBB3_1
				andreadbUnsubmitted Not Done Reply Inline Actions Nice change. It is a shame that we have to repeat the same CMPQ because of the NEGL which modifies FLAGS. In theory, we could reorder that sequence and avoid to repeat the same compare. Again, this may not be that important and it has nothing to do with your patch. andreadb: Nice change. It is a shame that we have to repeat the same CMPQ because of the NEGL which…
	bb:			bb:
	br label %bb1			br label %bb1

	bb1:			bb1:
	%tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ]			%tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ]
	%tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ]			%tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ]
	%tmp3 = icmp sgt i16 %tmp2, 7			%tmp3 = icmp sgt i16 %tmp2, 7
	%tmp4 = select i1 %tmp3, i16 %tmp2, i16 7			%tmp4 = select i1 %tmp3, i16 %tmp2, i16 7
	▲ Show 20 Lines • Show All 69 Lines • Show Last 20 Lines

test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc < %s -mtriple=x86_64-apple-darwin10 \| FileCheck %s --check-prefix=CHECK --check-prefix=SSE			; RUN: llc < %s -mtriple=x86_64-apple-darwin10 \| FileCheck %s --check-prefix=CHECK --check-prefixes=ISEL,SSE,SSE-ISEL
	; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort=1 \| FileCheck %s --check-prefix=CHECK --check-prefix=SSE			; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort=1 \| FileCheck %s --check-prefix=CHECK --check-prefixes=FASTISEL,SSE,SSE-FASTISEL
	; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mcpu=corei7-avx \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX			; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -mcpu=corei7-avx \| FileCheck %s --check-prefix=CHECK --check-prefixes=ISEL,AVX,AVX-ISEL
	; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort=1 -mcpu=corei7-avx \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX			; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -fast-isel -fast-isel-abort=1 -mcpu=corei7-avx \| FileCheck %s --check-prefix=CHECK --check-prefixes=FASTISEL,AVX,AVX-FASTISEL


	define float @select_fcmp_one_f32(float %a, float %b, float %c, float %d) {			define float @select_fcmp_one_f32(float %a, float %b, float %c, float %d) {
	; SSE-LABEL: select_fcmp_one_f32:			; SSE-LABEL: select_fcmp_one_f32:
	; SSE: ## %bb.0:			; SSE: ## %bb.0:
	; SSE-NEXT: ucomiss %xmm1, %xmm0			; SSE-NEXT: ucomiss %xmm1, %xmm0
	; SSE-NEXT: jne LBB0_2			; SSE-NEXT: jne LBB0_2
	; SSE-NEXT: ## %bb.1:			; SSE-NEXT: ## %bb.1:
	▲ Show 20 Lines • Show All 259 Lines • ▼ Show 20 Lines
	; AVX-NEXT: LBB11_2:			; AVX-NEXT: LBB11_2:
	; AVX-NEXT: retq			; AVX-NEXT: retq
	%1 = icmp sle i64 %a, %b			%1 = icmp sle i64 %a, %b
	%2 = select i1 %1, float %c, float %d			%2 = select i1 %1, float %c, float %d
	ret float %2			ret float %2
	}			}

	define i8 @select_icmp_sle_i8(i64 %a, i64 %b, i8 %c, i8 %d) {			define i8 @select_icmp_sle_i8(i64 %a, i64 %b, i8 %c, i8 %d) {
	; CHECK-LABEL: select_icmp_sle_i8:			; ISEL-LABEL: select_icmp_sle_i8:
	; CHECK: ## %bb.0:			; ISEL: ## %bb.0:
	; CHECK-NEXT: cmpq %rsi, %rdi			; ISEL-NEXT: movl %edx, %eax
	; CHECK-NEXT: jle LBB12_1			; ISEL-NEXT: cmpq %rsi, %rdi
	; CHECK-NEXT: ## %bb.2:			; ISEL-NEXT: cmovgl %ecx, %eax
	; CHECK-NEXT: movl %ecx, %eax			; ISEL-NEXT: ## kill: def $al killed $al killed $eax
	; CHECK-NEXT: ## kill: def $al killed $al killed $eax			; ISEL-NEXT: retq
	; CHECK-NEXT: retq			;
	; CHECK-NEXT: LBB12_1:			; FASTISEL-LABEL: select_icmp_sle_i8:
	; CHECK-NEXT: movl %edx, %eax			; FASTISEL: ## %bb.0:
	; CHECK-NEXT: ## kill: def $al killed $al killed $eax			; FASTISEL-NEXT: cmpq %rsi, %rdi
	; CHECK-NEXT: retq			; FASTISEL-NEXT: jle LBB12_1
				; FASTISEL-NEXT: ## %bb.2:
				; FASTISEL-NEXT: movl %ecx, %eax
				; FASTISEL-NEXT: ## kill: def $al killed $al killed $eax
				; FASTISEL-NEXT: retq
				; FASTISEL-NEXT: LBB12_1:
				; FASTISEL-NEXT: movl %edx, %eax
				; FASTISEL-NEXT: ## kill: def $al killed $al killed $eax
				; FASTISEL-NEXT: retq
	%1 = icmp sle i64 %a, %b			%1 = icmp sle i64 %a, %b
	%2 = select i1 %1, i8 %c, i8 %d			%2 = select i1 %1, i8 %c, i8 %d
	ret i8 %2			ret i8 %2
	}			}

test/CodeGen/X86/fshl.ll

	Show All 30 Lines
	; X86-NEXT: # %bb.1:			; X86-NEXT: # %bb.1:
	; X86-NEXT: orb %ah, %ch			; X86-NEXT: orb %ah, %ch
	; X86-NEXT: movb %ch, %al			; X86-NEXT: movb %ch, %al
	; X86-NEXT: .LBB0_2:			; X86-NEXT: .LBB0_2:
	; X86-NEXT: retl			; X86-NEXT: retl
	;			;
	; X64-LABEL: var_shift_i8:			; X64-LABEL: var_shift_i8:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: movl %edi, %eax
	; X64-NEXT: andb $7, %dl			; X64-NEXT: andb $7, %dl
				; X64-NEXT: movl %edi, %eax
	; X64-NEXT: movl %edx, %ecx			; X64-NEXT: movl %edx, %ecx
	; X64-NEXT: shlb %cl, %dil			; X64-NEXT: shlb %cl, %al
	; X64-NEXT: movb $8, %cl			; X64-NEXT: movb $8, %cl
	; X64-NEXT: subb %dl, %cl			; X64-NEXT: subb %dl, %cl
	; X64-NEXT: shrb %cl, %sil			; X64-NEXT: shrb %cl, %sil
				; X64-NEXT: orb %al, %sil
				; X64-NEXT: movzbl %sil, %eax
	; X64-NEXT: testb %dl, %dl			; X64-NEXT: testb %dl, %dl
	; X64-NEXT: je .LBB0_2			; X64-NEXT: cmovel %edi, %eax
	; X64-NEXT: # %bb.1:
	; X64-NEXT: orb %sil, %dil
	; X64-NEXT: movl %edi, %eax
	; X64-NEXT: .LBB0_2:
	; X64-NEXT: # kill: def $al killed $al killed $eax			; X64-NEXT: # kill: def $al killed $al killed $eax
	; X64-NEXT: retq			; X64-NEXT: retq
	%tmp = tail call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 %z)			%tmp = tail call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 %z)
	ret i8 %tmp			ret i8 %tmp
	}			}

	define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind {			define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind {
	; X86-FAST-LABEL: var_shift_i16:			; X86-FAST-LABEL: var_shift_i16:
	▲ Show 20 Lines • Show All 448 Lines • Show Last 20 Lines

test/CodeGen/X86/fshr.ll

	Show All 30 Lines
	; X86-NEXT: # %bb.1:			; X86-NEXT: # %bb.1:
	; X86-NEXT: orb %ch, %ah			; X86-NEXT: orb %ch, %ah
	; X86-NEXT: movb %ah, %al			; X86-NEXT: movb %ah, %al
	; X86-NEXT: .LBB0_2:			; X86-NEXT: .LBB0_2:
	; X86-NEXT: retl			; X86-NEXT: retl
	;			;
	; X64-LABEL: var_shift_i8:			; X64-LABEL: var_shift_i8:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: movl %esi, %eax
	; X64-NEXT: andb $7, %dl			; X64-NEXT: andb $7, %dl
				; X64-NEXT: movl %esi, %eax
	; X64-NEXT: movl %edx, %ecx			; X64-NEXT: movl %edx, %ecx
	; X64-NEXT: shrb %cl, %sil			; X64-NEXT: shrb %cl, %al
	; X64-NEXT: movb $8, %cl			; X64-NEXT: movb $8, %cl
	; X64-NEXT: subb %dl, %cl			; X64-NEXT: subb %dl, %cl
	; X64-NEXT: shlb %cl, %dil			; X64-NEXT: shlb %cl, %dil
				; X64-NEXT: orb %al, %dil
				; X64-NEXT: movzbl %dil, %eax
	; X64-NEXT: testb %dl, %dl			; X64-NEXT: testb %dl, %dl
	; X64-NEXT: je .LBB0_2			; X64-NEXT: cmovel %esi, %eax
	; X64-NEXT: # %bb.1:
	; X64-NEXT: orb %sil, %dil
	; X64-NEXT: movl %edi, %eax
	; X64-NEXT: .LBB0_2:
	; X64-NEXT: # kill: def $al killed $al killed $eax			; X64-NEXT: # kill: def $al killed $al killed $eax
	; X64-NEXT: retq			; X64-NEXT: retq
	%tmp = tail call i8 @llvm.fshr.i8(i8 %x, i8 %y, i8 %z)			%tmp = tail call i8 @llvm.fshr.i8(i8 %x, i8 %y, i8 %z)
	ret i8 %tmp			ret i8 %tmp
	}			}

	define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind {			define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind {
	; X86-FAST-LABEL: var_shift_i16:			; X86-FAST-LABEL: var_shift_i16:
	▲ Show 20 Lines • Show All 444 Lines • Show Last 20 Lines

test/CodeGen/X86/i386-shrink-wrapping.ll

	Show All 13 Lines

	; Check that we are clobbering the flags when they are live-in of the			; Check that we are clobbering the flags when they are live-in of the
	; prologue block and the prologue needs to adjust the stack.			; prologue block and the prologue needs to adjust the stack.
	; PR25607.			; PR25607.
	;			;
	; CHECK-LABEL: eflagsLiveInPrologue:			; CHECK-LABEL: eflagsLiveInPrologue:
	;			;
	; DISABLE: pushl			; DISABLE: pushl
	; DISABLE-NEXT: pushl			; DISABLE-NEXT: subl $8, %esp
	; DISABLE-NEXT: subl $20, %esp
	;			;
	; CHECK: movl L_a$non_lazy_ptr, [[A:%[a-z]+]]			; CHECK: movl L_a$non_lazy_ptr, [[A:%[a-z]+]]
	; CHECK-NEXT: cmpl $0, ([[A]])			; CHECK-NEXT: cmpl $0, ([[A]])
	; CHECK-NEXT: je [[PREHEADER_LABEL:LBB[0-9_]+]]			; CHECK-NEXT: je [[PREHEADER_LABEL:LBB[0-9_]+]]
	;			;
	; CHECK: movb $1, _d			; CHECK: movb $1, _d
	;			;
	; CHECK: [[PREHEADER_LABEL]]:			; CHECK: [[PREHEADER_LABEL]]:
	; CHECK-NEXT: movl L_b$non_lazy_ptr, [[B:%[a-z]+]]			; CHECK-NEXT: movl L_b$non_lazy_ptr, [[B:%[a-z]+]]
	; CHECK-NEXT: movl ([[B]]), [[TMP1:%[a-z]+]]			; CHECK-NEXT: movl ([[B]]), [[TMP1:%[a-z]+]]
	; CHECK-NEXT: testl [[TMP1]], [[TMP1]]			; CHECK-NEXT: testl [[TMP1]], [[TMP1]]
	; CHECK-NEXT: je [[FOREND_LABEL:LBB[0-9_]+]]			; CHECK-NEXT: je [[FOREND_LABEL:LBB[0-9_]+]]
	;			;
	; Skip the loop.			; Skip the loop.
	; [...]			; [...]
	;			;
	; The for.end block is split to accomadate the different selects.			; The for.end block is split to accomadate the different selects.
	; We are interested in the one with the call, so skip until the branch.			; We are interested in the one with the call, so skip until the branch.
	; CHECK: [[FOREND_LABEL]]:			; CHECK: [[FOREND_LABEL]]:
	; CHECK-NEXT: xorl
				; ENABLE: pushl
				; ENABLE-NEXT: subl $8, %esp

				; CHECK: xorl [[CMOVE_VAL:%edx]], [[CMOVE_VAL]]
	; CHECK-NEXT: cmpb $0, _d			; CHECK-NEXT: cmpb $0, _d
	; CHECK-NEXT: movl $0, %edx			; CHECK-NEXT: movl $6, [[IMM_VAL:%ecx]]
	; CHECK-NEXT: jne [[CALL_LABEL:LBB[0-9_]+]]
	;
	; CHECK: movb $6, %dl
	;
	; CHECK: [[CALL_LABEL]]
	;
	; ENABLE-NEXT: pushl
	; ENABLE-NEXT: pushl
	; We must not use sub here otherwise we will clobber the eflags.
	; ENABLE-NEXT: leal -20(%esp), %esp
	;
	; CHECK-NEXT: L_e$non_lazy_ptr, [[E:%[a-z]+]]
	; CHECK-NEXT: movb %dl, ([[E]])
	; CHECK-NEXT: movzbl %dl, [[CONV:%[a-z]+]]
	; CHECK-NEXT: movl $6, [[CONV:%[a-z]+]]
	; The eflags is used in the next instruction.			; The eflags is used in the next instruction.
	; If that instruction disappear, we are not exercising the bug			; If that instruction disappear, we are not exercising the bug
	; anymore.			; anymore.
	; CHECK-NEXT: cmovnel {{%[a-z]+}}, [[CONV]]			; CHECK-NEXT: cmovnel [[CMOVE_VAL]], [[IMM_VAL]]
	;
	; Skip all the crust of vaarg lowering.			; CHECK-NEXT: L_e$non_lazy_ptr, [[E:%[a-z]+]]
				; CHECK-NEXT: movb %cl, ([[E]])
				; CHECK-NEXT: leal 1(%ecx), %esi

	; CHECK: calll _varfunc			; CHECK: calll _varfunc
	; Set the return value to 0.			; Set the return value to 0.
	; CHECK-NEXT: xorl %eax, %eax			; CHECK-NEXT: xorl %eax, %eax
	; CHECK-NEXT: addl $20, %esp			; CHECK-NEXT: addl $8, %esp
	; CHECK-NEXT: popl
	; CHECK-NEXT: popl			; CHECK-NEXT: popl
	; CHECK-NEXT: retl			; CHECK-NEXT: retl
	define i32 @eflagsLiveInPrologue() #0 {			define i32 @eflagsLiveInPrologue() #0 {
	entry:			entry:
	%tmp = load i32, i32* @a, align 4			%tmp = load i32, i32* @a, align 4
	%tobool = icmp eq i32 %tmp, 0			%tobool = icmp eq i32 %tmp, 0
	br i1 %tobool, label %for.cond.preheader, label %if.then			br i1 %tobool, label %for.cond.preheader, label %if.then

	Show All 34 Lines

test/CodeGen/X86/midpoint-int.ll

Show First 20 Lines • Show All 1,013 Lines • ▼ Show 20 Lines
; 8-bit width		; 8-bit width
; ---------------------------------------------------------------------------- ;		; ---------------------------------------------------------------------------- ;

; Values come from regs		; Values come from regs

define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind {		define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind {
; X64-LABEL: scalar_i8_signed_reg_reg:		; X64-LABEL: scalar_i8_signed_reg_reg:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: cmpb %sil, %dil		; X64-NEXT: movl %esi, %eax
		; X64-NEXT: cmpb %al, %dil
; X64-NEXT: setle %cl		; X64-NEXT: setle %cl
; X64-NEXT: movl %esi, %edx
; X64-NEXT: jg .LBB15_2
; X64-NEXT: # %bb.1:
; X64-NEXT: movl %edi, %edx		; X64-NEXT: movl %edi, %edx
; X64-NEXT: .LBB15_2:		; X64-NEXT: cmovgl %esi, %edx
; X64-NEXT: movl %edi, %eax		; X64-NEXT: cmovgel %edi, %eax
; X64-NEXT: jge .LBB15_4
; X64-NEXT: # %bb.3:
; X64-NEXT: movl %esi, %eax
; X64-NEXT: .LBB15_4:
; X64-NEXT: subb %dl, %al
; X64-NEXT: addb %cl, %cl		; X64-NEXT: addb %cl, %cl
; X64-NEXT: decb %cl		; X64-NEXT: decb %cl
		; X64-NEXT: subb %dl, %al
; X64-NEXT: shrb %al		; X64-NEXT: shrb %al
		; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: mulb %cl		; X64-NEXT: mulb %cl
; X64-NEXT: addb %dil, %al		; X64-NEXT: addb %dil, %al
; X64-NEXT: retq		; X64-NEXT: retq
;		;
; X32-LABEL: scalar_i8_signed_reg_reg:		; X32-LABEL: scalar_i8_signed_reg_reg:
; X32: # %bb.0:		; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %ah		; X32-NEXT: movb {{[0-9]+}}(%esp), %ah
; X32-NEXT: movb {{[0-9]+}}(%esp), %cl		; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
Show All 28 Lines
}		}

define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind {		define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind {
; X64-LABEL: scalar_i8_unsigned_reg_reg:		; X64-LABEL: scalar_i8_unsigned_reg_reg:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax		; X64-NEXT: movl %esi, %eax
; X64-NEXT: cmpb %al, %dil		; X64-NEXT: cmpb %al, %dil
; X64-NEXT: setbe %cl		; X64-NEXT: setbe %cl
; X64-NEXT: ja .LBB16_1
; X64-NEXT: # %bb.2:
; X64-NEXT: movl %edi, %edx		; X64-NEXT: movl %edi, %edx
; X64-NEXT: jmp .LBB16_3		; X64-NEXT: cmoval %esi, %edx
; X64-NEXT: .LBB16_1:		; X64-NEXT: cmoval %edi, %eax
; X64-NEXT: movl %eax, %edx
; X64-NEXT: movl %edi, %eax
; X64-NEXT: .LBB16_3:
; X64-NEXT: subb %dl, %al
; X64-NEXT: addb %cl, %cl		; X64-NEXT: addb %cl, %cl
; X64-NEXT: decb %cl		; X64-NEXT: decb %cl
		; X64-NEXT: subb %dl, %al
; X64-NEXT: shrb %al		; X64-NEXT: shrb %al
; X64-NEXT: # kill: def $al killed $al killed $eax		; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: mulb %cl		; X64-NEXT: mulb %cl
; X64-NEXT: addb %dil, %al		; X64-NEXT: addb %dil, %al
; X64-NEXT: retq		; X64-NEXT: retq
;		;
; X32-LABEL: scalar_i8_unsigned_reg_reg:		; X32-LABEL: scalar_i8_unsigned_reg_reg:
; X32: # %bb.0:		; X32: # %bb.0:
Show All 27 Lines	; X32-NEXT: retl
ret i8 %a10		ret i8 %a10
}		}

; Values are loaded. Only check signed case.		; Values are loaded. Only check signed case.

define i8 @scalar_i8_signed_mem_reg(i8* %a1_addr, i8 %a2) nounwind {		define i8 @scalar_i8_signed_mem_reg(i8* %a1_addr, i8 %a2) nounwind {
; X64-LABEL: scalar_i8_signed_mem_reg:		; X64-LABEL: scalar_i8_signed_mem_reg:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movb (%rdi), %cl		; X64-NEXT: movzbl (%rdi), %ecx
; X64-NEXT: cmpb %sil, %cl		; X64-NEXT: cmpb %sil, %cl
; X64-NEXT: setle %dl		; X64-NEXT: setle %dl
; X64-NEXT: movl %esi, %edi
; X64-NEXT: jg .LBB17_2
; X64-NEXT: # %bb.1:
; X64-NEXT: movl %ecx, %edi		; X64-NEXT: movl %ecx, %edi
; X64-NEXT: .LBB17_2:		; X64-NEXT: cmovgl %esi, %edi
; X64-NEXT: movl %ecx, %eax		; X64-NEXT: movl %ecx, %eax
; X64-NEXT: jge .LBB17_4		; X64-NEXT: cmovll %esi, %eax
; X64-NEXT: # %bb.3:
; X64-NEXT: movl %esi, %eax
; X64-NEXT: .LBB17_4:
; X64-NEXT: subb %dil, %al
; X64-NEXT: addb %dl, %dl		; X64-NEXT: addb %dl, %dl
; X64-NEXT: decb %dl		; X64-NEXT: decb %dl
		; X64-NEXT: subb %dil, %al
; X64-NEXT: shrb %al		; X64-NEXT: shrb %al
		; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: mulb %dl		; X64-NEXT: mulb %dl
; X64-NEXT: addb %cl, %al		; X64-NEXT: addb %cl, %al
; X64-NEXT: retq		; X64-NEXT: retq
;		;
; X32-LABEL: scalar_i8_signed_mem_reg:		; X32-LABEL: scalar_i8_signed_mem_reg:
; X32: # %bb.0:		; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %ah		; X32-NEXT: movb {{[0-9]+}}(%esp), %ah
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
Show All 27 Lines	; X32-NEXT: retl
%t9 = mul nsw i8 %t8, %t4 ; signed		%t9 = mul nsw i8 %t8, %t4 ; signed
%a10 = add nsw i8 %t9, %a1 ; signed		%a10 = add nsw i8 %t9, %a1 ; signed
ret i8 %a10		ret i8 %a10
}		}

define i8 @scalar_i8_signed_reg_mem(i8 %a1, i8* %a2_addr) nounwind {		define i8 @scalar_i8_signed_reg_mem(i8 %a1, i8* %a2_addr) nounwind {
; X64-LABEL: scalar_i8_signed_reg_mem:		; X64-LABEL: scalar_i8_signed_reg_mem:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movb (%rsi), %dl		; X64-NEXT: movzbl (%rsi), %eax
; X64-NEXT: cmpb %dl, %dil		; X64-NEXT: cmpb %al, %dil
; X64-NEXT: setle %cl		; X64-NEXT: setle %cl
; X64-NEXT: movl %edx, %esi		; X64-NEXT: movl %edi, %edx
; X64-NEXT: jg .LBB18_2		; X64-NEXT: cmovgl %eax, %edx
; X64-NEXT: # %bb.1:		; X64-NEXT: cmovgel %edi, %eax
; X64-NEXT: movl %edi, %esi
; X64-NEXT: .LBB18_2:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: jge .LBB18_4
; X64-NEXT: # %bb.3:
; X64-NEXT: movl %edx, %eax
; X64-NEXT: .LBB18_4:
; X64-NEXT: subb %sil, %al
; X64-NEXT: addb %cl, %cl		; X64-NEXT: addb %cl, %cl
; X64-NEXT: decb %cl		; X64-NEXT: decb %cl
		; X64-NEXT: subb %dl, %al
; X64-NEXT: shrb %al		; X64-NEXT: shrb %al
		; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: mulb %cl		; X64-NEXT: mulb %cl
; X64-NEXT: addb %dil, %al		; X64-NEXT: addb %dil, %al
; X64-NEXT: retq		; X64-NEXT: retq
;		;
; X32-LABEL: scalar_i8_signed_reg_mem:		; X32-LABEL: scalar_i8_signed_reg_mem:
; X32: # %bb.0:		; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %cl		; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax		; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
Show All 27 Lines	; X32-NEXT: retl
%t9 = mul nsw i8 %t8, %t4 ; signed		%t9 = mul nsw i8 %t8, %t4 ; signed
%a10 = add nsw i8 %t9, %a1 ; signed		%a10 = add nsw i8 %t9, %a1 ; signed
ret i8 %a10		ret i8 %a10
}		}

define i8 @scalar_i8_signed_mem_mem(i8* %a1_addr, i8* %a2_addr) nounwind {		define i8 @scalar_i8_signed_mem_mem(i8* %a1_addr, i8* %a2_addr) nounwind {
; X64-LABEL: scalar_i8_signed_mem_mem:		; X64-LABEL: scalar_i8_signed_mem_mem:
; X64: # %bb.0:		; X64: # %bb.0:
; X64-NEXT: movb (%rdi), %dil		; X64-NEXT: movzbl (%rdi), %ecx
; X64-NEXT: movb (%rsi), %cl		; X64-NEXT: movzbl (%rsi), %eax
; X64-NEXT: cmpb %cl, %dil		; X64-NEXT: cmpb %al, %cl
; X64-NEXT: setle %dl		; X64-NEXT: setle %dl
; X64-NEXT: movl %ecx, %esi		; X64-NEXT: movl %ecx, %esi
; X64-NEXT: jg .LBB19_2		; X64-NEXT: cmovgl %eax, %esi
; X64-NEXT: # %bb.1:		; X64-NEXT: cmovgel %ecx, %eax
; X64-NEXT: movl %edi, %esi
; X64-NEXT: .LBB19_2:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: jge .LBB19_4
; X64-NEXT: # %bb.3:
; X64-NEXT: movl %ecx, %eax
; X64-NEXT: .LBB19_4:
; X64-NEXT: subb %sil, %al
; X64-NEXT: addb %dl, %dl		; X64-NEXT: addb %dl, %dl
; X64-NEXT: decb %dl		; X64-NEXT: decb %dl
		; X64-NEXT: subb %sil, %al
; X64-NEXT: shrb %al		; X64-NEXT: shrb %al
		; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: mulb %dl		; X64-NEXT: mulb %dl
; X64-NEXT: addb %dil, %al		; X64-NEXT: addb %cl, %al
; X64-NEXT: retq		; X64-NEXT: retq
;		;
; X32-LABEL: scalar_i8_signed_mem_mem:		; X32-LABEL: scalar_i8_signed_mem_mem:
; X32: # %bb.0:		; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax		; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx		; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movb (%ecx), %cl		; X32-NEXT: movb (%ecx), %cl
; X32-NEXT: movb (%eax), %ah		; X32-NEXT: movb (%eax), %ah
Show All 31 Lines

test/CodeGen/X86/pr5145.ll

	; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py			; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
	; RUN: llc -mtriple=x86_64-- < %s \| FileCheck %s			; RUN: llc -mtriple=x86_64-- < %s \| FileCheck %s
	@sc8 = external global i8			@sc8 = external global i8

	define void @atomic_maxmin_i8() {			define void @atomic_maxmin_i8() {
	; CHECK-LABEL: atomic_maxmin_i8:			; CHECK-LABEL: atomic_maxmin_i8:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: movb {{.*}}(%rip), %al			; CHECK-NEXT: movb {{.*}}(%rip), %al
	; CHECK-NEXT: .p2align 4, 0x90			; CHECK-NEXT: .p2align 4, 0x90
	; CHECK-NEXT: .LBB0_1: # %atomicrmw.start			; CHECK-NEXT: .LBB0_1: # %atomicrmw.start
	; CHECK-NEXT: # =>This Inner Loop Header: Depth=1			; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
	; CHECK-NEXT: cmpb $4, %al			; CHECK-NEXT: cmpb $4, %al
	; CHECK-NEXT: movl %eax, %ecx			; CHECK-NEXT: movzbl %al, %eax
	; CHECK-NEXT: jg .LBB0_3			; CHECK-NEXT: movl $5, %ecx
	; CHECK-NEXT: # %bb.2: # %atomicrmw.start			; CHECK-NEXT: cmovgl %eax, %ecx
	; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1			; CHECK-NEXT: # kill: def $al killed $al killed $eax
	; CHECK-NEXT: movb $5, %cl
	; CHECK-NEXT: .LBB0_3: # %atomicrmw.start
	; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
	; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip)			; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip)
	; CHECK-NEXT: jne .LBB0_1			; CHECK-NEXT: jne .LBB0_1
	; CHECK-NEXT: # %bb.4: # %atomicrmw.end			; CHECK-NEXT: # %bb.2: # %atomicrmw.end
	; CHECK-NEXT: movb {{.*}}(%rip), %al			; CHECK-NEXT: movb {{.*}}(%rip), %al
	; CHECK-NEXT: .p2align 4, 0x90			; CHECK-NEXT: .p2align 4, 0x90
	; CHECK-NEXT: .LBB0_5: # %atomicrmw.start2			; CHECK-NEXT: .LBB0_3: # %atomicrmw.start2
	; CHECK-NEXT: # =>This Inner Loop Header: Depth=1			; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
	; CHECK-NEXT: cmpb $7, %al			; CHECK-NEXT: cmpb $7, %al
	; CHECK-NEXT: movl %eax, %ecx			; CHECK-NEXT: movzbl %al, %eax
	; CHECK-NEXT: jl .LBB0_7			; CHECK-NEXT: movl $6, %ecx
	; CHECK-NEXT: # %bb.6: # %atomicrmw.start2			; CHECK-NEXT: cmovll %eax, %ecx
	; CHECK-NEXT: # in Loop: Header=BB0_5 Depth=1			; CHECK-NEXT: # kill: def $al killed $al killed $eax
	; CHECK-NEXT: movb $6, %cl
	; CHECK-NEXT: .LBB0_7: # %atomicrmw.start2
	; CHECK-NEXT: # in Loop: Header=BB0_5 Depth=1
	; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip)			; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip)
	; CHECK-NEXT: jne .LBB0_5			; CHECK-NEXT: jne .LBB0_3
	; CHECK-NEXT: # %bb.8: # %atomicrmw.end1			; CHECK-NEXT: # %bb.4: # %atomicrmw.end1
	; CHECK-NEXT: movb {{.*}}(%rip), %al			; CHECK-NEXT: movb {{.*}}(%rip), %al
	; CHECK-NEXT: .p2align 4, 0x90			; CHECK-NEXT: .p2align 4, 0x90
	; CHECK-NEXT: .LBB0_9: # %atomicrmw.start8			; CHECK-NEXT: .LBB0_5: # %atomicrmw.start8
	; CHECK-NEXT: # =>This Inner Loop Header: Depth=1			; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
	; CHECK-NEXT: cmpb $7, %al			; CHECK-NEXT: cmpb $7, %al
	; CHECK-NEXT: movl %eax, %ecx			; CHECK-NEXT: movzbl %al, %eax
	; CHECK-NEXT: ja .LBB0_11			; CHECK-NEXT: movl $7, %ecx
	; CHECK-NEXT: # %bb.10: # %atomicrmw.start8			; CHECK-NEXT: cmoval %eax, %ecx
	; CHECK-NEXT: # in Loop: Header=BB0_9 Depth=1			; CHECK-NEXT: # kill: def $al killed $al killed $eax
	; CHECK-NEXT: movb $7, %cl
	; CHECK-NEXT: .LBB0_11: # %atomicrmw.start8
	; CHECK-NEXT: # in Loop: Header=BB0_9 Depth=1
	; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip)			; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip)
	; CHECK-NEXT: jne .LBB0_9			; CHECK-NEXT: jne .LBB0_5
	; CHECK-NEXT: # %bb.12: # %atomicrmw.end7			; CHECK-NEXT: # %bb.6: # %atomicrmw.end7
	; CHECK-NEXT: movb {{.*}}(%rip), %al			; CHECK-NEXT: movb {{.*}}(%rip), %al
	; CHECK-NEXT: .p2align 4, 0x90			; CHECK-NEXT: .p2align 4, 0x90
	; CHECK-NEXT: .LBB0_13: # %atomicrmw.start14			; CHECK-NEXT: .LBB0_7: # %atomicrmw.start14
	; CHECK-NEXT: # =>This Inner Loop Header: Depth=1			; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
	; CHECK-NEXT: cmpb $9, %al			; CHECK-NEXT: cmpb $9, %al
	; CHECK-NEXT: movl %eax, %ecx			; CHECK-NEXT: movzbl %al, %eax
	; CHECK-NEXT: jb .LBB0_15			; CHECK-NEXT: movl $8, %ecx
	; CHECK-NEXT: # %bb.14: # %atomicrmw.start14			; CHECK-NEXT: cmovbl %eax, %ecx
	; CHECK-NEXT: # in Loop: Header=BB0_13 Depth=1			; CHECK-NEXT: # kill: def $al killed $al killed $eax
	; CHECK-NEXT: movb $8, %cl
	; CHECK-NEXT: .LBB0_15: # %atomicrmw.start14
	; CHECK-NEXT: # in Loop: Header=BB0_13 Depth=1
	; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip)			; CHECK-NEXT: lock cmpxchgb %cl, {{.*}}(%rip)
	; CHECK-NEXT: jne .LBB0_13			; CHECK-NEXT: jne .LBB0_7
	; CHECK-NEXT: # %bb.16: # %atomicrmw.end13			; CHECK-NEXT: # %bb.8: # %atomicrmw.end13
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%1 = atomicrmw max i8* @sc8, i8 5 acquire			%1 = atomicrmw max i8* @sc8, i8 5 acquire
	%2 = atomicrmw min i8* @sc8, i8 6 acquire			%2 = atomicrmw min i8* @sc8, i8 6 acquire
	%3 = atomicrmw umax i8* @sc8, i8 7 acquire			%3 = atomicrmw umax i8* @sc8, i8 7 acquire
	%4 = atomicrmw umin i8* @sc8, i8 8 acquire			%4 = atomicrmw umin i8* @sc8, i8 8 acquire
	ret void			ret void
	}			}

test/CodeGen/X86/sadd_sat.ll

	Show First 20 Lines • Show All 90 Lines • ▼ Show 20 Lines

	define i4 @func3(i4 %x, i4 %y) nounwind {			define i4 @func3(i4 %x, i4 %y) nounwind {
	; X86-LABEL: func3:			; X86-LABEL: func3:
	; X86: # %bb.0:			; X86: # %bb.0:
	; X86-NEXT: movb {{[0-9]+}}(%esp), %al			; X86-NEXT: movb {{[0-9]+}}(%esp), %al
	; X86-NEXT: movb {{[0-9]+}}(%esp), %dl			; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
	; X86-NEXT: shlb $4, %dl			; X86-NEXT: shlb $4, %dl
	; X86-NEXT: shlb $4, %al			; X86-NEXT: shlb $4, %al
	; X86-NEXT: movl %eax, %ecx			; X86-NEXT: xorl %ecx, %ecx
	; X86-NEXT: addb %dl, %cl			; X86-NEXT: movb %al, %ah
				; X86-NEXT: addb %dl, %ah
	; X86-NEXT: setns %cl			; X86-NEXT: setns %cl
				; X86-NEXT: addl $127, %ecx
	; X86-NEXT: addb %dl, %al			; X86-NEXT: addb %dl, %al
	; X86-NEXT: jno .LBB2_2			; X86-NEXT: movzbl %al, %eax
	; X86-NEXT: # %bb.1:			; X86-NEXT: cmovol %ecx, %eax
	; X86-NEXT: addb $127, %cl
	; X86-NEXT: movl %ecx, %eax
	; X86-NEXT: .LBB2_2:
	; X86-NEXT: sarb $4, %al			; X86-NEXT: sarb $4, %al
				; X86-NEXT: # kill: def $al killed $al killed $eax
	; X86-NEXT: retl			; X86-NEXT: retl
	;			;
	; X64-LABEL: func3:			; X64-LABEL: func3:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: movl %edi, %eax
	; X64-NEXT: shlb $4, %sil			; X64-NEXT: shlb $4, %sil
	; X64-NEXT: shlb $4, %al			; X64-NEXT: shlb $4, %dil
	; X64-NEXT: movl %eax, %ecx			; X64-NEXT: xorl %ecx, %ecx
	; X64-NEXT: addb %sil, %cl			; X64-NEXT: movl %edi, %eax
	; X64-NEXT: setns %cl
	; X64-NEXT: addb %sil, %al			; X64-NEXT: addb %sil, %al
	; X64-NEXT: jno .LBB2_2			; X64-NEXT: setns %cl
	; X64-NEXT: # %bb.1:			; X64-NEXT: addl $127, %ecx
	; X64-NEXT: addb $127, %cl			; X64-NEXT: addb %sil, %dil
	; X64-NEXT: movl %ecx, %eax			; X64-NEXT: movzbl %dil, %eax
	; X64-NEXT: .LBB2_2:			; X64-NEXT: cmovol %ecx, %eax
	; X64-NEXT: sarb $4, %al			; X64-NEXT: sarb $4, %al
	; X64-NEXT: # kill: def $al killed $al killed $eax			; X64-NEXT: # kill: def $al killed $al killed $eax
	; X64-NEXT: retq			; X64-NEXT: retq
	%tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %y);			%tmp = call i4 @llvm.sadd.sat.i4(i4 %x, i4 %y);
	ret i4 %tmp;			ret i4 %tmp;
	}			}

	define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {			define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
	▲ Show 20 Lines • Show All 81 Lines • Show Last 20 Lines

test/CodeGen/X86/sadd_sat_vec.ll

	Show First 20 Lines • Show All 493 Lines • ▼ Show 20 Lines

	; Scalarization			; Scalarization

	define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {			define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {
	; SSE-LABEL: v1i8:			; SSE-LABEL: v1i8:
	; SSE: # %bb.0:			; SSE: # %bb.0:
	; SSE-NEXT: movb (%rdi), %cl			; SSE-NEXT: movb (%rdi), %cl
	; SSE-NEXT: movb (%rsi), %dil			; SSE-NEXT: movb (%rsi), %dil
				; SSE-NEXT: xorl %esi, %esi
	; SSE-NEXT: movl %ecx, %eax			; SSE-NEXT: movl %ecx, %eax
	; SSE-NEXT: addb %dil, %al			; SSE-NEXT: addb %dil, %al
	; SSE-NEXT: setns %sil			; SSE-NEXT: setns %sil
				; SSE-NEXT: addl $127, %esi
	; SSE-NEXT: addb %dil, %cl			; SSE-NEXT: addb %dil, %cl
	; SSE-NEXT: jno .LBB13_2			; SSE-NEXT: movzbl %cl, %eax
	; SSE-NEXT: # %bb.1:			; SSE-NEXT: cmovol %esi, %eax
	; SSE-NEXT: addb $127, %sil			; SSE-NEXT: movb %al, (%rdx)
	; SSE-NEXT: movl %esi, %ecx
	; SSE-NEXT: .LBB13_2:
	; SSE-NEXT: movb %cl, (%rdx)
	; SSE-NEXT: retq			; SSE-NEXT: retq
	;			;
	; AVX-LABEL: v1i8:			; AVX-LABEL: v1i8:
	; AVX: # %bb.0:			; AVX: # %bb.0:
	; AVX-NEXT: movb (%rdi), %cl			; AVX-NEXT: movb (%rdi), %cl
	; AVX-NEXT: movb (%rsi), %dil			; AVX-NEXT: movb (%rsi), %dil
				; AVX-NEXT: xorl %esi, %esi
	; AVX-NEXT: movl %ecx, %eax			; AVX-NEXT: movl %ecx, %eax
	; AVX-NEXT: addb %dil, %al			; AVX-NEXT: addb %dil, %al
	; AVX-NEXT: setns %sil			; AVX-NEXT: setns %sil
				; AVX-NEXT: addl $127, %esi
	; AVX-NEXT: addb %dil, %cl			; AVX-NEXT: addb %dil, %cl
	; AVX-NEXT: jno .LBB13_2			; AVX-NEXT: movzbl %cl, %eax
	; AVX-NEXT: # %bb.1:			; AVX-NEXT: cmovol %esi, %eax
	; AVX-NEXT: addb $127, %sil			; AVX-NEXT: movb %al, (%rdx)
	; AVX-NEXT: movl %esi, %ecx
	; AVX-NEXT: .LBB13_2:
	; AVX-NEXT: movb %cl, (%rdx)
	; AVX-NEXT: retq			; AVX-NEXT: retq
	%x = load <1 x i8>, <1 x i8>* %px			%x = load <1 x i8>, <1 x i8>* %px
	%y = load <1 x i8>, <1 x i8>* %py			%y = load <1 x i8>, <1 x i8>* %py
	%z = call <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8> %x, <1 x i8> %y)			%z = call <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8> %x, <1 x i8> %y)
	store <1 x i8> %z, <1 x i8>* %pz			store <1 x i8> %z, <1 x i8>* %pz
	ret void			ret void
	}			}

	▲ Show 20 Lines • Show All 2,539 Lines • Show Last 20 Lines

test/CodeGen/X86/sat-add.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 \| FileCheck %s --check-prefixes=ANY,SSE,SSE2		; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 \| FileCheck %s --check-prefixes=ANY,SSE,SSE2
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 \| FileCheck %s --check-prefixes=ANY,SSE,SSE41		; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 \| FileCheck %s --check-prefixes=ANY,SSE,SSE41

; There are at least 3 potential patterns corresponding to an unsigned saturated add: min, cmp with sum, cmp with not.		; There are at least 3 potential patterns corresponding to an unsigned saturated add: min, cmp with sum, cmp with not.
; Test each of those patterns with i8/i16/i32/i64.		; Test each of those patterns with i8/i16/i32/i64.
; Test each of those with a constant operand and a variable operand.		; Test each of those with a constant operand and a variable operand.
; Test each of those with a 128-bit vector type.		; Test each of those with a 128-bit vector type.

define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {		define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {
; ANY-LABEL: unsigned_sat_constant_i8_using_min:		; ANY-LABEL: unsigned_sat_constant_i8_using_min:
; ANY: # %bb.0:		; ANY: # %bb.0:
; ANY-NEXT: movl %edi, %eax		; ANY-NEXT: cmpb $-43, %dil
; ANY-NEXT: cmpb $-43, %al		; ANY-NEXT: movl $213, %eax
; ANY-NEXT: jb .LBB0_2		; ANY-NEXT: cmovbl %edi, %eax
; ANY-NEXT: # %bb.1:
; ANY-NEXT: movb $-43, %al
; ANY-NEXT: .LBB0_2:
; ANY-NEXT: addb $42, %al		; ANY-NEXT: addb $42, %al
; ANY-NEXT: # kill: def $al killed $al killed $eax		; ANY-NEXT: # kill: def $al killed $al killed $eax
; ANY-NEXT: retq		; ANY-NEXT: retq
%c = icmp ult i8 %x, -43		%c = icmp ult i8 %x, -43
%s = select i1 %c, i8 %x, i8 -43		%s = select i1 %c, i8 %x, i8 -43
%r = add i8 %s, 42		%r = add i8 %s, 42
ret i8 %r		ret i8 %r
}		}

define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {		define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_sum:		; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
; ANY: # %bb.0:		; ANY: # %bb.0:
; ANY-NEXT: addb $42, %dil		; ANY-NEXT: addb $42, %dil
; ANY-NEXT: movb $-1, %al		; ANY-NEXT: movzbl %dil, %ecx
; ANY-NEXT: jb .LBB1_2		; ANY-NEXT: movl $255, %eax
; ANY-NEXT: # %bb.1:		; ANY-NEXT: cmovael %ecx, %eax
; ANY-NEXT: movl %edi, %eax		; ANY-NEXT: # kill: def $al killed $al killed $eax
; ANY-NEXT: .LBB1_2:
; ANY-NEXT: retq		; ANY-NEXT: retq
%a = add i8 %x, 42		%a = add i8 %x, 42
%c = icmp ugt i8 %x, %a		%c = icmp ugt i8 %x, %a
%r = select i1 %c, i8 -1, i8 %a		%r = select i1 %c, i8 -1, i8 %a
ret i8 %r		ret i8 %r
}		}

define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) {		define i8 @unsigned_sat_constant_i8_using_cmp_notval(i8 %x) {
; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_notval:		; ANY-LABEL: unsigned_sat_constant_i8_using_cmp_notval:
; ANY: # %bb.0:		; ANY: # %bb.0:
; ANY-NEXT: addb $42, %dil		; ANY-NEXT: addb $42, %dil
; ANY-NEXT: movb $-1, %al		; ANY-NEXT: movzbl %dil, %ecx
; ANY-NEXT: jb .LBB2_2		; ANY-NEXT: movl $255, %eax
; ANY-NEXT: # %bb.1:		; ANY-NEXT: cmovael %ecx, %eax
; ANY-NEXT: movl %edi, %eax		; ANY-NEXT: # kill: def $al killed $al killed $eax
; ANY-NEXT: .LBB2_2:
; ANY-NEXT: retq		; ANY-NEXT: retq
%a = add i8 %x, 42		%a = add i8 %x, 42
%c = icmp ugt i8 %x, -43		%c = icmp ugt i8 %x, -43
%r = select i1 %c, i8 -1, i8 %a		%r = select i1 %c, i8 -1, i8 %a
ret i8 %r		ret i8 %r
}		}

define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {		define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
▲ Show 20 Lines • Show All 117 Lines • ▼ Show 20 Lines	; ANY-NEXT: retq
%c = icmp ugt i64 %x, -43		%c = icmp ugt i64 %x, -43
%r = select i1 %c, i64 -1, i64 %a		%r = select i1 %c, i64 -1, i64 %a
ret i64 %r		ret i64 %r
}		}

define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {		define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {
; ANY-LABEL: unsigned_sat_variable_i8_using_min:		; ANY-LABEL: unsigned_sat_variable_i8_using_min:
; ANY: # %bb.0:		; ANY: # %bb.0:
; ANY-NEXT: movl %edi, %eax		; ANY-NEXT: movl %esi, %eax
; ANY-NEXT: movl %esi, %ecx		; ANY-NEXT: notb %al
; ANY-NEXT: notb %cl		; ANY-NEXT: cmpb %al, %dil
; ANY-NEXT: cmpb %cl, %al		; ANY-NEXT: movzbl %al, %eax
; ANY-NEXT: jb .LBB12_2		; ANY-NEXT: cmovbl %edi, %eax
; ANY-NEXT: # %bb.1:
; ANY-NEXT: movl %ecx, %eax
; ANY-NEXT: .LBB12_2:
; ANY-NEXT: addb %sil, %al		; ANY-NEXT: addb %sil, %al
; ANY-NEXT: # kill: def $al killed $al killed $eax		; ANY-NEXT: # kill: def $al killed $al killed $eax
; ANY-NEXT: retq		; ANY-NEXT: retq
%noty = xor i8 %y, -1		%noty = xor i8 %y, -1
%c = icmp ult i8 %x, %noty		%c = icmp ult i8 %x, %noty
%s = select i1 %c, i8 %x, i8 %noty		%s = select i1 %c, i8 %x, i8 %noty
%r = add i8 %s, %y		%r = add i8 %s, %y
ret i8 %r		ret i8 %r
}		}

define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {		define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_sum:		; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
; ANY: # %bb.0:		; ANY: # %bb.0:
; ANY-NEXT: addb %sil, %dil		; ANY-NEXT: addb %sil, %dil
; ANY-NEXT: movb $-1, %al		; ANY-NEXT: movzbl %dil, %ecx
; ANY-NEXT: jb .LBB13_2		; ANY-NEXT: movl $255, %eax
; ANY-NEXT: # %bb.1:		; ANY-NEXT: cmovael %ecx, %eax
; ANY-NEXT: movl %edi, %eax		; ANY-NEXT: # kill: def $al killed $al killed $eax
; ANY-NEXT: .LBB13_2:
; ANY-NEXT: retq		; ANY-NEXT: retq
%a = add i8 %x, %y		%a = add i8 %x, %y
%c = icmp ugt i8 %x, %a		%c = icmp ugt i8 %x, %a
%r = select i1 %c, i8 -1, i8 %a		%r = select i1 %c, i8 -1, i8 %a
ret i8 %r		ret i8 %r
}		}

define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) {		define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) {
; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_notval:		; ANY-LABEL: unsigned_sat_variable_i8_using_cmp_notval:
; ANY: # %bb.0:		; ANY: # %bb.0:
; ANY-NEXT: movl %esi, %eax		; ANY-NEXT: # kill: def $esi killed $esi def $rsi
; ANY-NEXT: notb %al		; ANY-NEXT: # kill: def $edi killed $edi def $rdi
; ANY-NEXT: cmpb %al, %dil		; ANY-NEXT: leal (%rdi,%rsi), %eax
; ANY-NEXT: movb $-1, %al		; ANY-NEXT: notb %sil
; ANY-NEXT: ja .LBB14_2		; ANY-NEXT: cmpb %sil, %dil
; ANY-NEXT: # %bb.1:		; ANY-NEXT: movzbl %al, %ecx
; ANY-NEXT: addb %sil, %dil		; ANY-NEXT: movl $255, %eax
; ANY-NEXT: movl %edi, %eax		; ANY-NEXT: cmovbel %ecx, %eax
; ANY-NEXT: .LBB14_2:		; ANY-NEXT: # kill: def $al killed $al killed $eax
; ANY-NEXT: retq		; ANY-NEXT: retq
%noty = xor i8 %y, -1		%noty = xor i8 %y, -1
%a = add i8 %x, %y		%a = add i8 %x, %y
%c = icmp ugt i8 %x, %noty		%c = icmp ugt i8 %x, %noty
%r = select i1 %c, i8 -1, i8 %a		%r = select i1 %c, i8 -1, i8 %a
ret i8 %r		ret i8 %r
}		}

▲ Show 20 Lines • Show All 713 Lines • Show Last 20 Lines

test/CodeGen/X86/select.ll

	Show First 20 Lines • Show All 1,130 Lines • ▼ Show 20 Lines
	; reproducer for pr29002			; reproducer for pr29002
	define void @clamp_i8(i32 %src, i8* %dst) {			define void @clamp_i8(i32 %src, i8* %dst) {
	; GENERIC-LABEL: clamp_i8:			; GENERIC-LABEL: clamp_i8:
	; GENERIC: ## %bb.0:			; GENERIC: ## %bb.0:
	; GENERIC-NEXT: cmpl $127, %edi			; GENERIC-NEXT: cmpl $127, %edi
	; GENERIC-NEXT: movl $127, %eax			; GENERIC-NEXT: movl $127, %eax
	; GENERIC-NEXT: cmovlel %edi, %eax			; GENERIC-NEXT: cmovlel %edi, %eax
	; GENERIC-NEXT: cmpl $-128, %eax			; GENERIC-NEXT: cmpl $-128, %eax
	; GENERIC-NEXT: movb $-128, %cl			; GENERIC-NEXT: movl $128, %ecx
	; GENERIC-NEXT: jl LBB21_2			; GENERIC-NEXT: cmovgel %eax, %ecx
	; GENERIC-NEXT: ## %bb.1:
	; GENERIC-NEXT: movl %eax, %ecx
	; GENERIC-NEXT: LBB21_2:
	; GENERIC-NEXT: movb %cl, (%rsi)			; GENERIC-NEXT: movb %cl, (%rsi)
	; GENERIC-NEXT: retq			; GENERIC-NEXT: retq
	;			;
	; ATOM-LABEL: clamp_i8:			; ATOM-LABEL: clamp_i8:
	; ATOM: ## %bb.0:			; ATOM: ## %bb.0:
	; ATOM-NEXT: cmpl $127, %edi			; ATOM-NEXT: cmpl $127, %edi
	; ATOM-NEXT: movl $127, %eax			; ATOM-NEXT: movl $127, %eax
	; ATOM-NEXT: movb $-128, %cl			; ATOM-NEXT: movl $128, %ecx
	; ATOM-NEXT: cmovlel %edi, %eax			; ATOM-NEXT: cmovlel %edi, %eax
	; ATOM-NEXT: cmpl $-128, %eax			; ATOM-NEXT: cmpl $-128, %eax
	; ATOM-NEXT: jl LBB21_2			; ATOM-NEXT: cmovgel %eax, %ecx
	; ATOM-NEXT: ## %bb.1:
	; ATOM-NEXT: movl %eax, %ecx
	; ATOM-NEXT: LBB21_2:
	; ATOM-NEXT: movb %cl, (%rsi)			; ATOM-NEXT: movb %cl, (%rsi)
	; ATOM-NEXT: retq			; ATOM-NEXT: retq
	;			;
	; ATHLON-LABEL: clamp_i8:			; ATHLON-LABEL: clamp_i8:
	; ATHLON: ## %bb.0:			; ATHLON: ## %bb.0:
	; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %eax			; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %eax
	; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %edx			; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %ecx
	; ATHLON-NEXT: cmpl $127, %edx			; ATHLON-NEXT: cmpl $127, %ecx
	; ATHLON-NEXT: movl $127, %ecx			; ATHLON-NEXT: movl $127, %edx
	; ATHLON-NEXT: cmovlel %edx, %ecx			; ATHLON-NEXT: cmovlel %ecx, %edx
	; ATHLON-NEXT: cmpl $-128, %ecx			; ATHLON-NEXT: cmpl $-128, %edx
	; ATHLON-NEXT: movb $-128, %dl			; ATHLON-NEXT: movl $128, %ecx
	; ATHLON-NEXT: jl LBB21_2			; ATHLON-NEXT: cmovgel %edx, %ecx
	; ATHLON-NEXT: ## %bb.1:			; ATHLON-NEXT: movb %cl, (%eax)
	; ATHLON-NEXT: movl %ecx, %edx
	; ATHLON-NEXT: LBB21_2:
	; ATHLON-NEXT: movb %dl, (%eax)
	; ATHLON-NEXT: retl			; ATHLON-NEXT: retl
	;			;
	; MCU-LABEL: clamp_i8:			; MCU-LABEL: clamp_i8:
	; MCU: # %bb.0:			; MCU: # %bb.0:
	; MCU-NEXT: cmpl $127, %eax			; MCU-NEXT: cmpl $127, %eax
	; MCU-NEXT: movl $127, %ecx			; MCU-NEXT: movl $127, %ecx
	; MCU-NEXT: jg .LBB21_2			; MCU-NEXT: jg .LBB21_2
	; MCU-NEXT: # %bb.1:			; MCU-NEXT: # %bb.1:
	▲ Show 20 Lines • Show All 462 Lines • Show Last 20 Lines

test/CodeGen/X86/select_const.ll

	Show First 20 Lines • Show All 373 Lines • ▼ Show 20 Lines
	}			}

	; This doesn't need a branch, but don't do the wrong thing if subtraction of the constants overflows.			; This doesn't need a branch, but don't do the wrong thing if subtraction of the constants overflows.

	define i8 @sel_67_neg125(i32 %x) {			define i8 @sel_67_neg125(i32 %x) {
	; CHECK-LABEL: sel_67_neg125:			; CHECK-LABEL: sel_67_neg125:
	; CHECK: # %bb.0:			; CHECK: # %bb.0:
	; CHECK-NEXT: cmpl $42, %edi			; CHECK-NEXT: cmpl $42, %edi
	; CHECK-NEXT: movb $67, %al			; CHECK-NEXT: movl $67, %ecx
	; CHECK-NEXT: jg .LBB31_2			; CHECK-NEXT: movl $131, %eax
	; CHECK-NEXT: # %bb.1:			; CHECK-NEXT: cmovgl %ecx, %eax
	; CHECK-NEXT: movb $-125, %al			; CHECK-NEXT: # kill: def $al killed $al killed $eax
	; CHECK-NEXT: .LBB31_2:
	; CHECK-NEXT: retq			; CHECK-NEXT: retq
	%cmp = icmp sgt i32 %x, 42			%cmp = icmp sgt i32 %x, 42
	%sel = select i1 %cmp, i8 67, i8 -125			%sel = select i1 %cmp, i8 67, i8 -125
	ret i8 %sel			ret i8 %sel
	}			}


	; In general, select of 2 constants could be:			; In general, select of 2 constants could be:
	▲ Show 20 Lines • Show All 119 Lines • Show Last 20 Lines

test/CodeGen/X86/ssub_sat.ll

	Show First 20 Lines • Show All 90 Lines • ▼ Show 20 Lines

	define i4 @func3(i4 %x, i4 %y) nounwind {			define i4 @func3(i4 %x, i4 %y) nounwind {
	; X86-LABEL: func3:			; X86-LABEL: func3:
	; X86: # %bb.0:			; X86: # %bb.0:
	; X86-NEXT: movb {{[0-9]+}}(%esp), %al			; X86-NEXT: movb {{[0-9]+}}(%esp), %al
	; X86-NEXT: movb {{[0-9]+}}(%esp), %dl			; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
	; X86-NEXT: shlb $4, %dl			; X86-NEXT: shlb $4, %dl
	; X86-NEXT: shlb $4, %al			; X86-NEXT: shlb $4, %al
	; X86-NEXT: movl %eax, %ecx			; X86-NEXT: xorl %ecx, %ecx
	; X86-NEXT: subb %dl, %cl			; X86-NEXT: movb %al, %ah
				; X86-NEXT: subb %dl, %ah
	; X86-NEXT: setns %cl			; X86-NEXT: setns %cl
				; X86-NEXT: addl $127, %ecx
	; X86-NEXT: subb %dl, %al			; X86-NEXT: subb %dl, %al
	; X86-NEXT: jno .LBB2_2			; X86-NEXT: movzbl %al, %eax
	; X86-NEXT: # %bb.1:			; X86-NEXT: cmovol %ecx, %eax
	; X86-NEXT: addb $127, %cl
	; X86-NEXT: movl %ecx, %eax
	; X86-NEXT: .LBB2_2:
	; X86-NEXT: sarb $4, %al			; X86-NEXT: sarb $4, %al
				; X86-NEXT: # kill: def $al killed $al killed $eax
	; X86-NEXT: retl			; X86-NEXT: retl
	;			;
	; X64-LABEL: func3:			; X64-LABEL: func3:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: movl %edi, %eax
	; X64-NEXT: shlb $4, %sil			; X64-NEXT: shlb $4, %sil
	; X64-NEXT: shlb $4, %al			; X64-NEXT: shlb $4, %dil
	; X64-NEXT: movl %eax, %ecx			; X64-NEXT: xorl %ecx, %ecx
	; X64-NEXT: subb %sil, %cl			; X64-NEXT: movl %edi, %eax
	; X64-NEXT: setns %cl
	; X64-NEXT: subb %sil, %al			; X64-NEXT: subb %sil, %al
	; X64-NEXT: jno .LBB2_2			; X64-NEXT: setns %cl
	; X64-NEXT: # %bb.1:			; X64-NEXT: addl $127, %ecx
	; X64-NEXT: addb $127, %cl			; X64-NEXT: subb %sil, %dil
	; X64-NEXT: movl %ecx, %eax			; X64-NEXT: movzbl %dil, %eax
	; X64-NEXT: .LBB2_2:			; X64-NEXT: cmovol %ecx, %eax
	; X64-NEXT: sarb $4, %al			; X64-NEXT: sarb $4, %al
	; X64-NEXT: # kill: def $al killed $al killed $eax			; X64-NEXT: # kill: def $al killed $al killed $eax
	; X64-NEXT: retq			; X64-NEXT: retq
	%tmp = call i4 @llvm.ssub.sat.i4(i4 %x, i4 %y);			%tmp = call i4 @llvm.ssub.sat.i4(i4 %x, i4 %y);
	ret i4 %tmp;			ret i4 %tmp;
	}			}

	define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {			define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
	▲ Show 20 Lines • Show All 83 Lines • Show Last 20 Lines

test/CodeGen/X86/ssub_sat_vec.ll

	Show First 20 Lines • Show All 493 Lines • ▼ Show 20 Lines

	; Scalarization			; Scalarization

	define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {			define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {
	; SSE-LABEL: v1i8:			; SSE-LABEL: v1i8:
	; SSE: # %bb.0:			; SSE: # %bb.0:
	; SSE-NEXT: movb (%rdi), %cl			; SSE-NEXT: movb (%rdi), %cl
	; SSE-NEXT: movb (%rsi), %dil			; SSE-NEXT: movb (%rsi), %dil
				; SSE-NEXT: xorl %esi, %esi
	; SSE-NEXT: movl %ecx, %eax			; SSE-NEXT: movl %ecx, %eax
	; SSE-NEXT: subb %dil, %al			; SSE-NEXT: subb %dil, %al
	; SSE-NEXT: setns %sil			; SSE-NEXT: setns %sil
				; SSE-NEXT: addl $127, %esi
	; SSE-NEXT: subb %dil, %cl			; SSE-NEXT: subb %dil, %cl
	; SSE-NEXT: jno .LBB13_2			; SSE-NEXT: movzbl %cl, %eax
	; SSE-NEXT: # %bb.1:			; SSE-NEXT: cmovol %esi, %eax
	; SSE-NEXT: addb $127, %sil			; SSE-NEXT: movb %al, (%rdx)
	; SSE-NEXT: movl %esi, %ecx
	; SSE-NEXT: .LBB13_2:
	; SSE-NEXT: movb %cl, (%rdx)
	; SSE-NEXT: retq			; SSE-NEXT: retq
	;			;
	; AVX-LABEL: v1i8:			; AVX-LABEL: v1i8:
	; AVX: # %bb.0:			; AVX: # %bb.0:
	; AVX-NEXT: movb (%rdi), %cl			; AVX-NEXT: movb (%rdi), %cl
	; AVX-NEXT: movb (%rsi), %dil			; AVX-NEXT: movb (%rsi), %dil
				; AVX-NEXT: xorl %esi, %esi
	; AVX-NEXT: movl %ecx, %eax			; AVX-NEXT: movl %ecx, %eax
	; AVX-NEXT: subb %dil, %al			; AVX-NEXT: subb %dil, %al
	; AVX-NEXT: setns %sil			; AVX-NEXT: setns %sil
				; AVX-NEXT: addl $127, %esi
	; AVX-NEXT: subb %dil, %cl			; AVX-NEXT: subb %dil, %cl
	; AVX-NEXT: jno .LBB13_2			; AVX-NEXT: movzbl %cl, %eax
	; AVX-NEXT: # %bb.1:			; AVX-NEXT: cmovol %esi, %eax
	; AVX-NEXT: addb $127, %sil			; AVX-NEXT: movb %al, (%rdx)
	; AVX-NEXT: movl %esi, %ecx
	; AVX-NEXT: .LBB13_2:
	; AVX-NEXT: movb %cl, (%rdx)
	; AVX-NEXT: retq			; AVX-NEXT: retq
	%x = load <1 x i8>, <1 x i8>* %px			%x = load <1 x i8>, <1 x i8>* %px
	%y = load <1 x i8>, <1 x i8>* %py			%y = load <1 x i8>, <1 x i8>* %py
	%z = call <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8> %x, <1 x i8> %y)			%z = call <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8> %x, <1 x i8> %y)
	store <1 x i8> %z, <1 x i8>* %pz			store <1 x i8> %z, <1 x i8>* %pz
	ret void			ret void
	}			}

	▲ Show 20 Lines • Show All 2,623 Lines • Show Last 20 Lines

test/CodeGen/X86/uadd_sat.ll

	Show First 20 Lines • Show All 45 Lines • ▼ Show 20 Lines
	; X64-NEXT: retq			; X64-NEXT: retq
	%tmp = call i64 @llvm.uadd.sat.i64(i64 %x, i64 %y);			%tmp = call i64 @llvm.uadd.sat.i64(i64 %x, i64 %y);
	ret i64 %tmp;			ret i64 %tmp;
	}			}

	define i4 @func3(i4 %x, i4 %y) nounwind {			define i4 @func3(i4 %x, i4 %y) nounwind {
	; X86-LABEL: func3:			; X86-LABEL: func3:
	; X86: # %bb.0:			; X86: # %bb.0:
	; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
	; X86-NEXT: movb {{[0-9]+}}(%esp), %al			; X86-NEXT: movb {{[0-9]+}}(%esp), %al
	; X86-NEXT: shlb $4, %al			; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
	; X86-NEXT: shlb $4, %cl			; X86-NEXT: shlb $4, %cl
	; X86-NEXT: addb %al, %cl			; X86-NEXT: shlb $4, %al
	; X86-NEXT: movb $-1, %al			; X86-NEXT: addb %cl, %al
	; X86-NEXT: jb .LBB2_2			; X86-NEXT: movzbl %al, %ecx
	; X86-NEXT: # %bb.1:			; X86-NEXT: movl $255, %eax
	; X86-NEXT: movl %ecx, %eax			; X86-NEXT: cmovael %ecx, %eax
	; X86-NEXT: .LBB2_2:
	; X86-NEXT: shrb $4, %al			; X86-NEXT: shrb $4, %al
				; X86-NEXT: # kill: def $al killed $al killed $eax
	; X86-NEXT: retl			; X86-NEXT: retl
	;			;
	; X64-LABEL: func3:			; X64-LABEL: func3:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: shlb $4, %sil			; X64-NEXT: shlb $4, %sil
	; X64-NEXT: shlb $4, %dil			; X64-NEXT: shlb $4, %dil
	; X64-NEXT: addb %sil, %dil			; X64-NEXT: addb %sil, %dil
	; X64-NEXT: movb $-1, %al			; X64-NEXT: movzbl %dil, %ecx
	; X64-NEXT: jb .LBB2_2			; X64-NEXT: movl $255, %eax
	; X64-NEXT: # %bb.1:			; X64-NEXT: cmovael %ecx, %eax
	; X64-NEXT: movl %edi, %eax
	; X64-NEXT: .LBB2_2:
	; X64-NEXT: shrb $4, %al			; X64-NEXT: shrb $4, %al
				; X64-NEXT: # kill: def $al killed $al killed $eax
	; X64-NEXT: retq			; X64-NEXT: retq
	%tmp = call i4 @llvm.uadd.sat.i4(i4 %x, i4 %y);			%tmp = call i4 @llvm.uadd.sat.i4(i4 %x, i4 %y);
	ret i4 %tmp;			ret i4 %tmp;
	}			}

	define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {			define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
	; X86-LABEL: vec:			; X86-LABEL: vec:
	; X86: # %bb.0:			; X86: # %bb.0:
	▲ Show 20 Lines • Show All 44 Lines • Show Last 20 Lines

test/CodeGen/X86/uadd_sat_vec.ll

	Show First 20 Lines • Show All 493 Lines • ▼ Show 20 Lines

	; Scalarization			; Scalarization

	define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {			define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {
	; SSE-LABEL: v1i8:			; SSE-LABEL: v1i8:
	; SSE: # %bb.0:			; SSE: # %bb.0:
	; SSE-NEXT: movb (%rdi), %al			; SSE-NEXT: movb (%rdi), %al
	; SSE-NEXT: addb (%rsi), %al			; SSE-NEXT: addb (%rsi), %al
	; SSE-NEXT: movb $-1, %cl			; SSE-NEXT: movzbl %al, %eax
	; SSE-NEXT: jb .LBB13_2			; SSE-NEXT: movl $255, %ecx
	; SSE-NEXT: # %bb.1:			; SSE-NEXT: cmovael %eax, %ecx
	; SSE-NEXT: movl %eax, %ecx
	; SSE-NEXT: .LBB13_2:
	; SSE-NEXT: movb %cl, (%rdx)			; SSE-NEXT: movb %cl, (%rdx)
	; SSE-NEXT: retq			; SSE-NEXT: retq
	;			;
	; AVX-LABEL: v1i8:			; AVX-LABEL: v1i8:
	; AVX: # %bb.0:			; AVX: # %bb.0:
	; AVX-NEXT: movb (%rdi), %al			; AVX-NEXT: movb (%rdi), %al
	; AVX-NEXT: addb (%rsi), %al			; AVX-NEXT: addb (%rsi), %al
	; AVX-NEXT: movb $-1, %cl			; AVX-NEXT: movzbl %al, %eax
	; AVX-NEXT: jb .LBB13_2			; AVX-NEXT: movl $255, %ecx
	; AVX-NEXT: # %bb.1:			; AVX-NEXT: cmovael %eax, %ecx
	; AVX-NEXT: movl %eax, %ecx
	; AVX-NEXT: .LBB13_2:
	; AVX-NEXT: movb %cl, (%rdx)			; AVX-NEXT: movb %cl, (%rdx)
	; AVX-NEXT: retq			; AVX-NEXT: retq
	%x = load <1 x i8>, <1 x i8>* %px			%x = load <1 x i8>, <1 x i8>* %px
	%y = load <1 x i8>, <1 x i8>* %py			%y = load <1 x i8>, <1 x i8>* %py
	%z = call <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8> %x, <1 x i8> %y)			%z = call <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8> %x, <1 x i8> %y)
	store <1 x i8> %z, <1 x i8>* %pz			store <1 x i8> %z, <1 x i8>* %pz
	ret void			ret void
	}			}
	▲ Show 20 Lines • Show All 1,159 Lines • Show Last 20 Lines

test/CodeGen/X86/usub_sat.ll

	Show First 20 Lines • Show All 49 Lines • ▼ Show 20 Lines

	define i4 @func3(i4 %x, i4 %y) nounwind {			define i4 @func3(i4 %x, i4 %y) nounwind {
	; X86-LABEL: func3:			; X86-LABEL: func3:
	; X86: # %bb.0:			; X86: # %bb.0:
	; X86-NEXT: movb {{[0-9]+}}(%esp), %al			; X86-NEXT: movb {{[0-9]+}}(%esp), %al
	; X86-NEXT: movb {{[0-9]+}}(%esp), %cl			; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
	; X86-NEXT: shlb $4, %cl			; X86-NEXT: shlb $4, %cl
	; X86-NEXT: shlb $4, %al			; X86-NEXT: shlb $4, %al
				; X86-NEXT: xorl %edx, %edx
	; X86-NEXT: subb %cl, %al			; X86-NEXT: subb %cl, %al
	; X86-NEXT: jae .LBB2_2			; X86-NEXT: movzbl %al, %eax
	; X86-NEXT: # %bb.1:			; X86-NEXT: cmovbl %edx, %eax
	; X86-NEXT: xorl %eax, %eax
	; X86-NEXT: .LBB2_2:
	; X86-NEXT: shrb $4, %al			; X86-NEXT: shrb $4, %al
	; X86-NEXT: # kill: def $al killed $al killed $eax			; X86-NEXT: # kill: def $al killed $al killed $eax
	; X86-NEXT: retl			; X86-NEXT: retl
	;			;
	; X64-LABEL: func3:			; X64-LABEL: func3:
	; X64: # %bb.0:			; X64: # %bb.0:
	; X64-NEXT: movl %edi, %eax
	; X64-NEXT: shlb $4, %sil			; X64-NEXT: shlb $4, %sil
	; X64-NEXT: shlb $4, %al			; X64-NEXT: shlb $4, %dil
	; X64-NEXT: subb %sil, %al			; X64-NEXT: xorl %ecx, %ecx
	; X64-NEXT: jae .LBB2_2			; X64-NEXT: subb %sil, %dil
	; X64-NEXT: # %bb.1:			; X64-NEXT: movzbl %dil, %eax
	; X64-NEXT: xorl %eax, %eax			; X64-NEXT: cmovbl %ecx, %eax
	; X64-NEXT: .LBB2_2:
	; X64-NEXT: shrb $4, %al			; X64-NEXT: shrb $4, %al
	; X64-NEXT: # kill: def $al killed $al killed $eax			; X64-NEXT: # kill: def $al killed $al killed $eax
	; X64-NEXT: retq			; X64-NEXT: retq
	%tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %y);			%tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %y);
	ret i4 %tmp;			ret i4 %tmp;
	}			}

	define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {			define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
	▲ Show 20 Lines • Show All 43 Lines • Show Last 20 Lines

test/CodeGen/X86/usub_sat_vec.ll

	Show First 20 Lines • Show All 492 Lines • ▼ Show 20 Lines
	}			}

	; Scalarization			; Scalarization

	define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {			define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind {
	; SSE-LABEL: v1i8:			; SSE-LABEL: v1i8:
	; SSE: # %bb.0:			; SSE: # %bb.0:
	; SSE-NEXT: movb (%rdi), %al			; SSE-NEXT: movb (%rdi), %al
				; SSE-NEXT: xorl %ecx, %ecx
	; SSE-NEXT: subb (%rsi), %al			; SSE-NEXT: subb (%rsi), %al
	; SSE-NEXT: jae .LBB13_2			; SSE-NEXT: movzbl %al, %eax
	; SSE-NEXT: # %bb.1:			; SSE-NEXT: cmovbl %ecx, %eax
	; SSE-NEXT: xorl %eax, %eax
	; SSE-NEXT: .LBB13_2:
	; SSE-NEXT: movb %al, (%rdx)			; SSE-NEXT: movb %al, (%rdx)
	; SSE-NEXT: retq			; SSE-NEXT: retq
	;			;
	; AVX-LABEL: v1i8:			; AVX-LABEL: v1i8:
	; AVX: # %bb.0:			; AVX: # %bb.0:
	; AVX-NEXT: movb (%rdi), %al			; AVX-NEXT: movb (%rdi), %al
				; AVX-NEXT: xorl %ecx, %ecx
	; AVX-NEXT: subb (%rsi), %al			; AVX-NEXT: subb (%rsi), %al
	; AVX-NEXT: jae .LBB13_2			; AVX-NEXT: movzbl %al, %eax
	; AVX-NEXT: # %bb.1:			; AVX-NEXT: cmovbl %ecx, %eax
	; AVX-NEXT: xorl %eax, %eax
	; AVX-NEXT: .LBB13_2:
	; AVX-NEXT: movb %al, (%rdx)			; AVX-NEXT: movb %al, (%rdx)
	; AVX-NEXT: retq			; AVX-NEXT: retq
	%x = load <1 x i8>, <1 x i8>* %px			%x = load <1 x i8>, <1 x i8>* %px
	%y = load <1 x i8>, <1 x i8>* %py			%y = load <1 x i8>, <1 x i8>* %py
	%z = call <1 x i8> @llvm.usub.sat.v1i8(<1 x i8> %x, <1 x i8> %y)			%z = call <1 x i8> @llvm.usub.sat.v1i8(<1 x i8> %x, <1 x i8> %y)
	store <1 x i8> %z, <1 x i8>* %pz			store <1 x i8> %z, <1 x i8>* %pz
	ret void			ret void
	}			}
	▲ Show 20 Lines • Show All 1,005 Lines • Show Last 20 Lines

test/CodeGen/X86/x86-shrink-wrapping.ll

	Show First 20 Lines • Show All 67 Lines • ▼ Show 20 Lines
	declare i32 @doSomething(i32, i32*)			declare i32 @doSomething(i32, i32*)


	; Check that we do not perform the restore inside the loop whereas the save			; Check that we do not perform the restore inside the loop whereas the save
	; is outside.			; is outside.
	; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:			; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:
	;			;
	; Shrink-wrapping allows to skip the prologue in the else case.			; Shrink-wrapping allows to skip the prologue in the else case.
	; ENABLE: testl %edi, %edi			; ENABLE: testl %edi, %edi
	; ENABLE: je [[ELSE_LABEL:LBB[0-9_]+]]			; ENABLE: je [[ELSE_LABEL:LBB[0-9_]+]]
	;			;
	; Prologue code.			; Prologue code.
	; Make sure we save the CSR used in the inline asm: rbx.			; Make sure we save the CSR used in the inline asm: rbx.
	; CHECK: pushq %rbx			; CHECK: pushq %rbx
	;			;
	; DISABLE: testl %edi, %edi			; DISABLE: testl %edi, %edi
	; DISABLE: je [[ELSE_LABEL:LBB[0-9_]+]]			; DISABLE: je [[ELSE_LABEL:LBB[0-9_]+]]
	▲ Show 20 Lines • Show All 418 Lines • ▼ Show 20 Lines
	%struct.language_function = type opaque			%struct.language_function = type opaque
	%struct.lang_decl = type opaque			%struct.lang_decl = type opaque
	%struct.rtx_def = type { i32, [1 x %union.rtunion_def] }			%struct.rtx_def = type { i32, [1 x %union.rtunion_def] }
	%union.rtunion_def = type { i64 }			%union.rtunion_def = type { i64 }

	declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rtx_def* readonly)			declare hidden fastcc %struct.temp_slot* @find_temp_slot_from_address(%struct.rtx_def* readonly)

	; CHECK-LABEL: useLEA:			; CHECK-LABEL: useLEA:
	; DISABLE: pushq			; DISABLE: pushq
	;			;
	; CHECK: testq %rdi, %rdi			; CHECK: testq %rdi, %rdi
	; CHECK-NEXT: je [[CLEANUP:LBB[0-9_]+]]			; CHECK-NEXT: je [[CLEANUP:LBB[0-9_]+]]
	;			;
	; CHECK: cmpw $66, (%rdi)			; CHECK: cmpw $66, (%rdi)
	; CHECK-NEXT: jne [[CLEANUP]]			; CHECK-NEXT: jne [[CLEANUP]]
	;			;
	; CHECK: movq 8(%rdi), %rdi			; CHECK: movq 8(%rdi), %rdi
	▲ Show 20 Lines • Show All 280 Lines • ▼ Show 20 Lines
	; The stack adjustment can use SUB instr because we do not need to			; The stack adjustment can use SUB instr because we do not need to
	; preserve the EFLAGS at this point.			; preserve the EFLAGS at this point.
	; DISABLE-NEXT: subq $16, %rsp			; DISABLE-NEXT: subq $16, %rsp
	;			;
	; Load the value of b.			; Load the value of b.
	; Create the zero value for the select assignment.			; Create the zero value for the select assignment.
	; CHECK: xorl [[CMOVE_VAL:%eax]], [[CMOVE_VAL]]			; CHECK: xorl [[CMOVE_VAL:%eax]], [[CMOVE_VAL]]
	; CHECK-NEXT: cmpb $0, _b(%rip)			; CHECK-NEXT: cmpb $0, _b(%rip)
	; CHECK-NEXT: jne [[STOREC_LABEL:LBB[0-9_]+]]			; CHECK-NEXT: movl $48, [[IMM_VAL:%ecx]]
	;			; CHECK-NEXT: cmovnel [[CMOVE_VAL]], [[IMM_VAL]]
	; CHECK: movb $48, [[CMOVE_VAL:%al]]			; CHECK-NEXT: movb %cl, _c(%rip)
	;
	; CHECK: [[STOREC_LABEL]]:
	;
	; ENABLE-NEXT: pushq
	; For the stack adjustment, we need to preserve the EFLAGS.
	; ENABLE-NEXT: leaq -16(%rsp), %rsp
	;
	; Technically, we should use CMOVE_VAL here or its subregister.
	; CHECK-NEXT: movb %al, _c(%rip)
	; testb set the EFLAGS read here.
	; CHECK-NEXT: je [[VARFUNC_CALL:LBB[0-9_]+]]			; CHECK-NEXT: je [[VARFUNC_CALL:LBB[0-9_]+]]
	;			;
	; The code of the loop is not interesting.			; The code of the loop is not interesting.
	; [...]			; [...]
	;			;
	; CHECK: [[VARFUNC_CALL]]:			; CHECK: [[VARFUNC_CALL]]:
	; Set the null parameter.			; Set the null parameter.
	; CHECK-NEXT: xorl %edi, %edi			; CHECK-NEXT: xorl %edi, %edi
	▲ Show 20 Lines • Show All 205 Lines • Show Last 20 Lines

This is an archive of the discontinued LLVM Phabricator instance.

[X86] Promote i8 CMOV's (PR40965)ClosedPublic

Details

Indeed, performance should be the main point here.

What about partial register stalls?

Diff Detail

Event Timeline

Revision Contents

Diff 189741

lib/Target/X86/X86ISelLowering.cpp

test/CodeGen/X86/8bit_cmov_of_trunc_promotion.ll

test/CodeGen/X86/cmov-promotion.ll

test/CodeGen/X86/cmov.ll

test/CodeGen/X86/cmovcmov.ll

test/CodeGen/X86/copy-eflags.ll

test/CodeGen/X86/fast-isel-select-pseudo-cmov.ll

test/CodeGen/X86/fshl.ll

test/CodeGen/X86/fshr.ll

test/CodeGen/X86/i386-shrink-wrapping.ll

test/CodeGen/X86/midpoint-int.ll

test/CodeGen/X86/pr5145.ll

test/CodeGen/X86/sadd_sat.ll

test/CodeGen/X86/sadd_sat_vec.ll

test/CodeGen/X86/sat-add.ll

test/CodeGen/X86/select.ll

test/CodeGen/X86/select_const.ll

test/CodeGen/X86/ssub_sat.ll

test/CodeGen/X86/ssub_sat_vec.ll

test/CodeGen/X86/uadd_sat.ll

test/CodeGen/X86/uadd_sat_vec.ll

test/CodeGen/X86/usub_sat.ll

test/CodeGen/X86/usub_sat_vec.ll

test/CodeGen/X86/x86-shrink-wrapping.ll

[X86] Promote i8 CMOV's (PR40965)
ClosedPublic