This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/test/CodeGen/X86/
-
test/
-
CodeGen/
-
X86/
-
2007-01-13-StackPtrIndex.ll
-
2007-08-09-IllegalX86-64Asm.ll
-
2009-02-26-MachineLICMBug.ll
-
2009-03-23-MultiUseSched.ll
-
2009-05-30-ISelBug.ll
-
AMX/
-
amx-across-func.ll
-
amx-greedy-ra-spill-shape.ll
-
amx-intrinsic-chain.ll
-
amx-ldtilecfg-insert.ll
-
amx-lower-tile-copy.ll
-
amx-spill-merge.ll
-
amx-spill.ll
-
MergeConsecutiveStores.ll
-
StackColoring.ll
-
add-and-not.ll
-
addcarry.ll
-
avg.ll
-
avoid-sfb.ll
-
avx-intrinsics-fast-isel.ll
-
avx-load-store.ll
-
avx512-calling-conv.ll
-
avx512-regcall-NoMask.ll
-
avx512bwvl-intrinsics-upgrade.ll
-
bfloat.ll
-
bitcast-and-setcc-512.ll
-
bitreverse.ll
-
break-false-dep.ll
-
bswap.ll
-
callbr-asm-blockplacement.ll
-
callbr-asm-branch-folding.ll
-
callbr-asm-phi-placement.ll
-
cgp-usubo.ll
-
clear_upper_vector_element_bits.ll
-
combine-pmuldq.ll
-
combine-sdiv.ll
-
commute-fcmp.ll
-
compact-unwind.ll
-
conditional-tailcall.ll
-
copy-eflags.ll
-
ctpop-combine.ll
-
dag-update-nodetomatch.ll
-
dagcombine-cse.ll
-
div-rem-pair-recomposition-signed.ll
-
div-rem-pair-recomposition-unsigned.ll
-
divmod128.ll
-
extract-bits.ll
-
flt-rounds.ll
-
fma-commute-loop.ll
-
fmaddsub-combine.ll
-
fmaxnum.ll
-
fminnum.ll
-
fp-stack-2results.ll
-
fp128-libcalls-strict.ll
-
fp128-select.ll
-
fpclamptosat_vec.ll
-
fptosi-sat-vector-128.ll
-
fptoui-sat-vector-128.ll
-
gather-addresses.ll
-
h-registers-1.ll
-
haddsub-2.ll
-
haddsub-4.ll
-
hoist-invariant-load.ll
-
i128-mul.ll
-
load-local-v3i1.ll
-
lrshrink.ll
-
lsr-loop-exit-cond.ll
-
lzcnt-zext-cmp.ll
-
machine-combiner-int-vec.ll
-
machine-cp.ll
-
madd.ll
-
masked-iv-unsafe.ll
-
masked_compressstore.ll
-
masked_expandload.ll
-
masked_gather.ll
-
masked_load.ll
-
masked_store_trunc_ssat.ll
-
masked_store_trunc_usat.ll
-
midpoint-int-vec-256.ll
-
misched-matmul.ll
-
mmx-arith.ll
-
mul-constant-result.ll
-
mul-i1024.ll
-
mul-i256.ll
-
mul-i512.ll
-
muloti.ll
-
musttail-varargs.ll
-
nontemporal-loads.ll
-
oddshuffles.ll
-
or-address.ll
-
paddus.ll
-
pmul.ll
-
pmulh.ll
-
popcnt.ll
-
pr18344.ll
-
pr21792.ll
-
pr23603.ll
-
pr29112.ll
-
pr32329.ll
-
pr35316.ll
-
pr38185.ll
-
pr38217.ll
-
pr43820.ll
-
pr45563-2.ll
-
pr45563.ll
-
pr45995.ll
-
pr46877.ll
-
pr47299.ll
-
pr47857.ll
-
pr53990-incorrect-machine-sink.ll
-
promote-cmp.ll
-
psubus.ll
-
ragreedy-hoist-spill.ll
-
reverse_branches.ll
-
sad.ll
-
sadd_sat_vec.ll
-
sbb-false-dep.ll
-
scalar_widen_div.ll
-
scheduler-backtracking.ll
-
sdiv_fix.ll
-
sdiv_fix_sat.ll
-
setcc-wide-types.ll
-
shift-i128.ll
-
shrink_vmul.ll
-
smul-with-overflow.ll
-
smulo-128-legalisation-lowering.ll
-
speculative-load-hardening-call-and-ret.ll
-
speculative-load-hardening.ll
-
srem-seteq-vec-nonsplat.ll
-
sse-intel-ocl.ll
-
sse-regcall.ll
-
sse2-intrinsics-fast-isel.ll
-
sshl_sat.ll
-
ssub_sat_vec.ll
-
statepoint-invoke-ra-enter-at-end.mir
-
statepoint-invoke-ra-inline-spiller.mir
-
statepoint-invoke-ra-remove-back-copies.mir
-
statepoint-live-in-remat.ll
-
statepoint-live-in.ll
-
statepoint-ra-no-ls.ll
-
statepoint-regs.ll
-
statepoint-spill-slot-size-promotion.ll
-
statepoint-stack-usage.ll
-
statepoint-vreg-details.ll
-
statepoint-vreg-invoke.ll
-
statepoint-vreg-unlimited-tied-opnds.ll
-
statepoint-vreg.ll
-
statepoint-vreg.mir
-
subcarry.ll
-
swifterror.ll
-
tail-dup-merge-loop-headers.ll
-
tail-opts.ll
-
tailcallstack64.ll
-
tailccstack64.ll
-
twoaddr-lea.ll
-
uadd_sat_vec.ll
-
udiv_fix_sat.ll
-
umul-with-overflow.ll
-
unfold-masked-merge-vector-variablemask.ll
-
usub_sat_vec.ll
-
var-permute-128.ll
-
var-permute-512.ll
-
vec_int_to_fp.ll
-
vec_saddo.ll
-
vec_smulo.ll
-
vec_ssubo.ll
-
vec_uaddo.ll
-
vec_umulo.ll
-
vec_usubo.ll
-
vector-bitreverse.ll
-
vector-compare-results.ll
-
vector-fshl-128.ll
-
vector-fshl-256.ll
-
vector-fshl-rot-256.ll
-
vector-fshr-128.ll
-
vector-fshr-256.ll
-
vector-fshr-rot-256.ll
-
vector-idiv-sdiv-256.ll
-
vector-idiv-udiv-256.ll
-
vector-interleave.ll
-
vector-interleaved-load-i16-stride-2.ll
-
vector-interleaved-load-i16-stride-3.ll
-
vector-interleaved-load-i16-stride-4.ll
-
vector-interleaved-load-i16-stride-5.ll
-
vector-interleaved-load-i16-stride-6.ll
-
vector-interleaved-load-i32-stride-2.ll
-
vector-interleaved-load-i32-stride-3.ll
-
vector-interleaved-load-i32-stride-4.ll
-
vector-interleaved-load-i32-stride-6.ll
-
vector-interleaved-load-i64-stride-2.ll
-
vector-interleaved-load-i64-stride-3.ll
-
vector-interleaved-load-i64-stride-4.ll
-
vector-interleaved-load-i64-stride-6.ll
-
vector-interleaved-load-i8-stride-3.ll
-
vector-interleaved-load-i8-stride-4.ll
-
vector-interleaved-load-i8-stride-6.ll
-
vector-interleaved-store-i16-stride-2.ll
-
vector-interleaved-store-i16-stride-3.ll
-
vector-interleaved-store-i16-stride-4.ll
-
vector-interleaved-store-i16-stride-5.ll
-
vector-interleaved-store-i16-stride-6.ll
-
vector-interleaved-store-i32-stride-2.ll
-
vector-interleaved-store-i32-stride-3.ll
-
vector-interleaved-store-i32-stride-4.ll
-
vector-interleaved-store-i32-stride-6.ll
-
vector-interleaved-store-i64-stride-2.ll
-
vector-interleaved-store-i64-stride-3.ll
-
vector-interleaved-store-i64-stride-4.ll
-
vector-interleaved-store-i64-stride-6.ll
-
vector-interleaved-store-i8-stride-3.ll
-
vector-interleaved-store-i8-stride-4.ll
-
vector-interleaved-store-i8-stride-6.ll
-
vector-mulfix-legalize.ll
-
vector-reduce-add-sext.ll
-
vector-reduce-fmax.ll
-
vector-reduce-fmin.ll
-
vector-reduce-mul.ll
-
vector-reduce-umax.ll
-
vector-reduce-umin.ll
-
vector-rotate-256.ll
-
vector-shift-by-select-loop.ll
-
vector-shuffle-v192.ll
-
vector-shuffle-variable-128.ll
-
vector-shuffle-variable-256.ll
-
vector-trunc-math.ll
-
vector-trunc-packus.ll
-
vector-trunc-ssat.ll
-
vector-trunc-usat.ll
-
vector-zext.ll
-
vp2intersect_multiple_pairs.ll
-
vselect-minmax.ll
-
vselect-packss.ll
-
x86-cmov-converter.ll
-
x86-interleaved-access.ll
-
znver3-gather.ll

Differential D133902

X86: Stop assigning register costs for longer encodings
ClosedPublic

Authored by MatzeB on Sep 14 2022, 3:32 PM.

Download Raw Diff

Details

Reviewers

RKSimon
craig.topper
qcolombet
mtrofin
arsenm
reames
wmi
sjarus
lebedev.ri

Commits

rG189900eb149b: X86: Stop assigning register costs for longer encodings.

Summary

This stops reporting CostPerUse 1 for R8-R15 and XMM8-XMM31. This was
previously done because instruction encoding require a REX prefix when
using them resulting in longer instruction encodings. I found that this
regresses the quality of the register allocation as the costs impose an
ordering on eviction candidates. I also feel that there is a bit of an
impedance mismatch as the actual costs occure when encoding instructions
using those registers, but the order of VReg assignments is not
primarily ordered by number of Defs+Uses.

I did extensive measurements with the llvm-test-suite wiht SPEC2006 +
SPEC2017 included, internal services showed similar patterns. Generally
there are a log of improvements but also a lot of regression. But on
average the allocation quality seems to improve at a small code size
regression.

Results for measuring static and dynamic instruction counts:

-O3 + ThinLTO + Instr-PGO

Dynamic Counts (scaled by execution frequency) / Optimization Remarks:

Spills+FoldedSpills   -5.6%
Reloads+FoldedReloads -4.2%
Copies                -0.1%

Static / LLVM Statistics:

regalloc.NumSpills    mean -1.6%, geomean -2.8%
regalloc.NumReloads   mean -1.7%, geomean -3.1%
size..text            mean +0.4%, geomean +0.4%

-O3

Static / LLVM Statistics:

mean -2.2%, geomean -3.1%) regalloc.NumSpills
mean -2.6%, geomean -3.9%) regalloc.NumReloads
mean +0.6%, geomean +0.6%) size..text

-Os

Static / LLVM Statistics:

regalloc.NumSpills   mean -3.0%
regalloc.NumReloads  mean -3.3%
size..text           mean +0.3%, geomean +0.3%

Detailed numbers in https://reviews.llvm.org/P8290

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

MatzeB created this revision.Sep 14 2022, 3:32 PM

Herald added a reviewer: sjarus. · View Herald TranscriptSep 14 2022, 3:32 PM

Herald added a reviewer: lebedev.ri. · View Herald Transcript

Herald added a project: Restricted Project. · View Herald Transcript

Herald added subscribers: lebedev.ri, armkevincheng, eric-k256 and 8 others. · View Herald Transcript

MatzeB requested review of this revision.Sep 14 2022, 3:32 PM

Herald added a project: Restricted Project. · View Herald TranscriptSep 14 2022, 3:32 PM

Herald added subscribers: llvm-commits, wdng. · View Herald Transcript

I guess this somewhat of an RFC, if we would accept better regalloc at slightly bigger programs: I found that dropping CostPerUse produced a small but measurable improvement on X86 regalloc.

I would also love to hear if others can explain why the increased register costs for the larger encodings result in slightly smaller programs, even though I don't us prefering registers with more or less uses when ordering eviction and assignment choices (and as far as the AllocationOrder goes the register with longer encodings are still on the back anyway).

MatzeB edited the summary of this revision. (Show Details)Sep 14 2022, 3:42 PM

Looks like the code change to X86RegisterInfo.td is not in this patch?

Harbormaster completed remote builds in B186744: Diff 460246.Sep 14 2022, 4:21 PM

D86836 introduced support for setting multiple CostPerUse values (which we use on RISC-V to set a non-zero value for some registers only when the 'compressed' extension is enabled). Did you evaluate doing something similar to only set CostPerUse at Os or perhaps Oz?

In D133902#3791069, @craig.topper wrote:

Looks like the code change to X86RegisterInfo.td is not in this patch?

huh. It's included now.

D86836 introduced support for setting multiple CostPerUse values (which we use on RISC-V to set a non-zero value for some registers only when the 'compressed' extension is enabled). Did you evaluate doing something similar to only set CostPerUse at Os or perhaps Oz?

Yes I saw that, didn't push on this angle (yet) as we should really use llvm::shouldOptimizeForSize then which requires a ProfileSummaryInfo reference that isn't around in that callback. So would need to refactor those interfaces in some way...

MatzeB updated this revision to Diff 460453.Sep 15 2022, 10:46 AM

Herald added a subscriber: hiraditya. · View Herald TranscriptSep 15 2022, 10:46 AM

Harbormaster completed remote builds in B186900: Diff 460453.Sep 15 2022, 11:23 AM

Early x86-64 CPUs (often with weak frontend/decoders) might benefit from avoiding REX encodings (I know when I was doing exegesis testing for the atom/silvermont sched models avoiding REX registers gave much more consistent backend numbers), although I'm not certain how much work adding CPU Feature specific costs would be (or how much benefit they'd give for most people).

llvm/lib/Target/X86/X86RegisterInfo.td
128 ↗	(On Diff #460453)	Keep the comments?
221 ↗	(On Diff #460453)	This comment is still useful

LGTM

In D133902#3791035, @MatzeB wrote:

I guess this somewhat of an RFC, if we would accept better regalloc at slightly bigger programs: I found that dropping CostPerUse produced a small but measurable improvement on X86 regalloc.

FWIW, tried it on a server app (was curious if the "slightly larger" bit meant more iCache pressure), doesn't seem to be an issue.

I would also love to hear if others can explain why the increased register costs for the larger encodings result in slightly smaller programs, even though I don't us prefering registers with more or less uses when ordering eviction and assignment choices (and as far as the AllocationOrder goes the register with longer encodings are still on the back anyway).

mtrofin accepted this revision.Sep 16 2022, 12:11 PM

This revision is now accepted and ready to land.Sep 16 2022, 12:11 PM

Looks sensible to me.
I would just second @RKSimon's question about how this works with older x86 HW but if x86 folks are fine with the change, that's fine by me.

Early x86-64 CPUs (often with weak frontend/decoders) might benefit from avoiding REX encodings (I know when I was doing exegesis testing for the atom/silvermont sched models avoiding REX registers gave much more consistent backend numbers), although I'm not certain how much work adding CPU Feature specific costs would be (or how much benefit they'd give for most people).

My gut feeling would be that we are better off without the added complexity, as the size differences are somewhat happening by accident anyway. And in fact looking into my data the size differences seem to mostly stem from small functions and there is not really a trend for bigger functions.

llvm/lib/Target/X86/X86RegisterInfo.td
128 ↗	(On Diff #460453)	To me it seemed that this comment was meant to justify the `CostPerUse = [1]` which I am removing, but no problem to keep them anyway.

rebase, update some tests missed the first time.

MatzeB edited the summary of this revision. (Show Details)Sep 30 2022, 1:50 PM

MatzeB edited the summary of this revision. (Show Details)

Harbormaster completed remote builds in B189770: Diff 464388.Sep 30 2022, 3:03 PM

This revision was landed with ongoing or failed builds.Sep 30 2022, 4:03 PM

Closed by commit rG189900eb149b: X86: Stop assigning register costs for longer encodings. (authored by MatzeB). · Explain Why

This revision was automatically updated to reflect the committed changes.

MatzeB added a commit: rG189900eb149b: X86: Stop assigning register costs for longer encodings..

Fixes for expensive-tests enabled bots and attempt at fixing MLRegalloc tests in https://github.com/llvm/llvm-project/commit/f9317bf0bed0e0f248c18114afa24dcd56d727ae (aka https://reviews.llvm.org/rGf9317bf0bed0 )

mtrofin mentioned this in rG25d65b545530: [mlgo] Fix tests post D133902.Sep 30 2022, 5:28 PM

was just sending the fix or the mlgo bit, thanks for looking into it!

mtrofin mentioned this in rG280ed30b6487: Revert "[mlgo] Fix tests post D133902".Sep 30 2022, 5:30 PM

I'm not sure I have 100% fixed it, we'll see :)

Also thinking about things right now, I realized that my change probably lowers the amount of times ragreedy asks for eviction decisions. I hope that doesn't reduce the quality of ML regalloc, maybe you can keep on eye on that on your end.

(I'm happy to revert if it decreases quality, though we have to find a better way than somewhat "arbitrary" CostPerUse / high register numbers triggering things long-term...)

In D133902#3828661, @MatzeB wrote:

I'm not sure I have 100% fixed it, we'll see :)

Also thinking about things right now, I realized that my change probably lowers the amount of times ragreedy asks for eviction decisions. I hope that doesn't reduce the quality of ML regalloc, maybe you can keep on eye on that on your end.

(I'm happy to revert if it decreases quality, though we have to find a better way than somewhat "arbitrary" CostPerUse / high register numbers triggering things long-term...)

I tried it with your change already, didn't see anything for the workload I looked at - anyway, we can always retrain it if needed.

Seems we need your changed to dev-mode-logging.ll after all, will push them now.

MatzeB mentioned this in rG56c7cf41d451: Adapt dev-mode-logging.ll test to D133902.Sep 30 2022, 5:45 PM

MatzeB mentioned this in rGbd1ea6e110a3: UPdate reference-log-noml.txt as well to adapt for D133902.Sep 30 2022, 6:04 PM

skan added a subscriber: skan.Feb 1 2023, 4:03 AM

Large Diff

This large diff affects 234 files. Files without inline comments have been collapsed. Expand All Files

Revision Contents

Path

Size

llvm/

test/

CodeGen/

X86/

2007-01-13-StackPtrIndex.ll

52 lines

2007-08-09-IllegalX86-64Asm.ll

79 lines

2009-02-26-MachineLICMBug.ll

12 lines

2009-03-23-MultiUseSched.ll

366 lines

2009-05-30-ISelBug.ll

10 lines

AMX/

amx-across-func.ll

40 lines

amx-greedy-ra-spill-shape.ll

87 lines

amx-intrinsic-chain.ll

18 lines

amx-ldtilecfg-insert.ll

18 lines

amx-lower-tile-copy.ll

22 lines

amx-spill-merge.ll

16 lines

amx-spill.ll

29 lines

MergeConsecutiveStores.ll

116 lines

6 lines

14 lines

66 lines

724 lines

120 lines

avx-intrinsics-fast-isel.ll

12 lines

avx-load-store.ll

12 lines

avx512-calling-conv.ll

516 lines

avx512-regcall-NoMask.ll

98 lines

avx512bwvl-intrinsics-upgrade.ll

48 lines

bfloat.ll

74 lines

bitcast-and-setcc-512.ll

60 lines

bitreverse.ll

274 lines

break-false-dep.ll

221 lines

bswap.ll

12 lines

callbr-asm-blockplacement.ll

22 lines

callbr-asm-branch-folding.ll

36 lines

callbr-asm-phi-placement.ll

6 lines

cgp-usubo.ll

10 lines

clear_upper_vector_element_bits.ll

352 lines

70 lines

184 lines

64 lines

4 lines

conditional-tailcall.ll

68 lines

copy-eflags.ll

24 lines

ctpop-combine.ll

32 lines

dag-update-nodetomatch.ll

178 lines

dagcombine-cse.ll

16 lines

div-rem-pair-recomposition-signed.ll

60 lines

div-rem-pair-recomposition-unsigned.ll

60 lines

144 lines

200 lines

16 lines

24 lines

84 lines

64 lines

64 lines

12 lines

fp128-libcalls-strict.ll

32 lines

fp128-select.ll

20 lines

fpclamptosat_vec.ll

372 lines

fptosi-sat-vector-128.ll

296 lines

fptoui-sat-vector-128.ll

118 lines

34 lines

26 lines

362 lines

13 lines

hoist-invariant-load.ll

6 lines

i128-mul.ll

36 lines

load-local-v3i1.ll

16 lines

lrshrink.ll

22 lines

lsr-loop-exit-cond.ll

217 lines

lzcnt-zext-cmp.ll

10 lines

machine-combiner-int-vec.ll

1016 lines

machine-cp.ll

32 lines

madd.ll

310 lines

masked-iv-unsafe.ll

16 lines

masked_compressstore.ll

10 lines

masked_expandload.ll

10 lines

masked_gather.ll

28 lines

masked_load.ll

96 lines

masked_store_trunc_ssat.ll

1172 lines

masked_store_trunc_usat.ll

876 lines

midpoint-int-vec-256.ll

608 lines

misched-matmul.ll

2 lines

mmx-arith.ll

14 lines

mul-constant-result.ll

142 lines

1843 lines

105 lines

440 lines

74 lines

56 lines

33 lines

238 lines

8 lines

44 lines

120 lines

198 lines

36 lines

6 lines

20 lines

6 lines

55 lines

24 lines

8 lines

20 lines

33 lines

486 lines

24 lines

38 lines

54 lines

162 lines

44 lines

16 lines

pr53990-incorrect-machine-sink.ll

8 lines

promote-cmp.ll

38 lines

psubus.ll

626 lines

ragreedy-hoist-spill.ll

53 lines

22 lines

14 lines

366 lines

54 lines

112 lines

scheduler-backtracking.ll

698 lines

66 lines

188 lines

642 lines

24 lines

56 lines

smul-with-overflow.ll

179 lines

smulo-128-legalisation-lowering.ll

434 lines

speculative-load-hardening-call-and-ret.ll

66 lines

speculative-load-hardening.ll

200 lines

srem-seteq-vec-nonsplat.ll

30 lines

sse-intel-ocl.ll

8 lines

sse-regcall.ll

98 lines

sse2-intrinsics-fast-isel.ll

80 lines

sshl_sat.ll

20 lines

ssub_sat_vec.ll

684 lines

statepoint-invoke-ra-enter-at-end.mir

11 lines

statepoint-invoke-ra-inline-spiller.mir

21 lines

statepoint-invoke-ra-remove-back-copies.mir

20 lines

statepoint-live-in-remat.ll

28 lines

statepoint-live-in.ll

12 lines

statepoint-ra-no-ls.ll

18 lines

statepoint-regs.ll

70 lines

statepoint-spill-slot-size-promotion.ll

16 lines

statepoint-stack-usage.ll

12 lines

statepoint-vreg-details.ll

14 lines

statepoint-vreg-invoke.ll

34 lines

statepoint-vreg-unlimited-tied-opnds.ll

22 lines

36 lines

16 lines

64 lines

62 lines

tail-dup-merge-loop-headers.ll

34 lines

16 lines

2 lines

2 lines

10 lines

28 lines

62 lines

umul-with-overflow.ll

140 lines

unfold-masked-merge-vector-variablemask.ll

1822 lines

36 lines

400 lines

164 lines

76 lines

132 lines

2634 lines

132 lines

108 lines

1530 lines

128 lines

344 lines

vector-compare-results.ll

96 lines

vector-fshl-128.ll

14 lines

vector-fshl-256.ll

156 lines

vector-fshl-rot-256.ll

10 lines

vector-fshr-128.ll

14 lines

vector-fshr-256.ll

214 lines

vector-fshr-rot-256.ll

42 lines

vector-idiv-sdiv-256.ll

28 lines

vector-idiv-udiv-256.ll

36 lines

vector-interleave.ll

72 lines

vector-interleaved-load-i16-stride-2.ll

150 lines

vector-interleaved-load-i16-stride-3.ll

874 lines

vector-interleaved-load-i16-stride-4.ll

1986 lines

vector-interleaved-load-i16-stride-5.ll

3073 lines

vector-interleaved-load-i16-stride-6.ll

4261 lines

vector-interleaved-load-i32-stride-2.ll

94 lines

vector-interleaved-load-i32-stride-3.ll

487 lines

vector-interleaved-load-i32-stride-4.ll

1183 lines

vector-interleaved-load-i32-stride-6.ll

1137 lines

vector-interleaved-load-i64-stride-2.ll

174 lines

vector-interleaved-load-i64-stride-3.ll

166 lines

vector-interleaved-load-i64-stride-4.ll

516 lines

vector-interleaved-load-i64-stride-6.ll

511 lines

vector-interleaved-load-i8-stride-3.ll

602 lines

vector-interleaved-load-i8-stride-4.ll

881 lines

vector-interleaved-load-i8-stride-6.ll

2931 lines

vector-interleaved-store-i16-stride-2.ll

12 lines

vector-interleaved-store-i16-stride-3.ll

925 lines

vector-interleaved-store-i16-stride-4.ll

704 lines

vector-interleaved-store-i16-stride-5.ll

2643 lines

vector-interleaved-store-i16-stride-6.ll

3025 lines

vector-interleaved-store-i32-stride-2.ll

262 lines

vector-interleaved-store-i32-stride-3.ll

658 lines

vector-interleaved-store-i32-stride-4.ll

1182 lines

vector-interleaved-store-i32-stride-6.ll

1620 lines

vector-interleaved-store-i64-stride-2.ll

276 lines

vector-interleaved-store-i64-stride-3.ll

189 lines

vector-interleaved-store-i64-stride-4.ll

561 lines

vector-interleaved-store-i64-stride-6.ll

636 lines

vector-interleaved-store-i8-stride-3.ll

264 lines

vector-interleaved-store-i8-stride-4.ll

90 lines

vector-interleaved-store-i8-stride-6.ll

1734 lines

vector-mulfix-legalize.ll

64 lines

vector-reduce-add-sext.ll

72 lines

vector-reduce-fmax.ll

195 lines

vector-reduce-fmin.ll

195 lines

vector-reduce-mul.ll

16 lines

vector-reduce-umax.ll

36 lines

vector-reduce-umin.ll

16 lines

vector-rotate-256.ll

10 lines

vector-shift-by-select-loop.ll

408 lines

vector-shuffle-v192.ll

128 lines

vector-shuffle-variable-128.ll

460 lines

vector-shuffle-variable-256.ll

10 lines

vector-trunc-math.ll

18 lines

vector-trunc-packus.ll

3594 lines

vector-trunc-ssat.ll

4382 lines

vector-trunc-usat.ll

2318 lines

vector-zext.ll

24 lines

vp2intersect_multiple_pairs.ll

12 lines

vselect-minmax.ll

1018 lines

vselect-packss.ll

8 lines

x86-cmov-converter.ll

234 lines

x86-interleaved-access.ll

848 lines

znver3-gather.ll

66 lines

This is an archive of the discontinued LLVM Phabricator instance.

X86: Stop assigning register costs for longer encodingsClosedPublic

Details

-O3 + ThinLTO + Instr-PGO

-O3

-Os

Diff Detail

Event Timeline

Large Diff

Revision Contents

Diff 460246

llvm/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll

llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll

llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll

llvm/test/CodeGen/X86/2009-03-23-MultiUseSched.ll

llvm/test/CodeGen/X86/2009-05-30-ISelBug.ll

llvm/test/CodeGen/X86/AMX/amx-across-func.ll

llvm/test/CodeGen/X86/AMX/amx-greedy-ra-spill-shape.ll

llvm/test/CodeGen/X86/AMX/amx-intrinsic-chain.ll

llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll

llvm/test/CodeGen/X86/AMX/amx-lower-tile-copy.ll

llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll

llvm/test/CodeGen/X86/AMX/amx-spill.ll

llvm/test/CodeGen/X86/MergeConsecutiveStores.ll

llvm/test/CodeGen/X86/StackColoring.ll

llvm/test/CodeGen/X86/add-and-not.ll

llvm/test/CodeGen/X86/addcarry.ll

llvm/test/CodeGen/X86/avg.ll

llvm/test/CodeGen/X86/avoid-sfb.ll

llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll

llvm/test/CodeGen/X86/avx-load-store.ll

llvm/test/CodeGen/X86/avx512-calling-conv.ll

llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll

llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll

llvm/test/CodeGen/X86/bfloat.ll

llvm/test/CodeGen/X86/bitcast-and-setcc-512.ll

llvm/test/CodeGen/X86/bitreverse.ll

llvm/test/CodeGen/X86/break-false-dep.ll

llvm/test/CodeGen/X86/bswap.ll

llvm/test/CodeGen/X86/callbr-asm-blockplacement.ll

llvm/test/CodeGen/X86/callbr-asm-branch-folding.ll

llvm/test/CodeGen/X86/callbr-asm-phi-placement.ll

llvm/test/CodeGen/X86/cgp-usubo.ll

llvm/test/CodeGen/X86/clear_upper_vector_element_bits.ll

llvm/test/CodeGen/X86/combine-pmuldq.ll

llvm/test/CodeGen/X86/combine-sdiv.ll

llvm/test/CodeGen/X86/commute-fcmp.ll

llvm/test/CodeGen/X86/compact-unwind.ll

llvm/test/CodeGen/X86/conditional-tailcall.ll

llvm/test/CodeGen/X86/copy-eflags.ll

llvm/test/CodeGen/X86/ctpop-combine.ll

llvm/test/CodeGen/X86/dag-update-nodetomatch.ll

llvm/test/CodeGen/X86/dagcombine-cse.ll

llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll

llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll

llvm/test/CodeGen/X86/divmod128.ll

llvm/test/CodeGen/X86/extract-bits.ll

llvm/test/CodeGen/X86/flt-rounds.ll

llvm/test/CodeGen/X86/fma-commute-loop.ll

llvm/test/CodeGen/X86/fmaddsub-combine.ll

llvm/test/CodeGen/X86/fmaxnum.ll

llvm/test/CodeGen/X86/fminnum.ll

llvm/test/CodeGen/X86/fp-stack-2results.ll

llvm/test/CodeGen/X86/fp128-libcalls-strict.ll

llvm/test/CodeGen/X86/fp128-select.ll

llvm/test/CodeGen/X86/fpclamptosat_vec.ll

llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll

llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll

llvm/test/CodeGen/X86/gather-addresses.ll

llvm/test/CodeGen/X86/h-registers-1.ll

llvm/test/CodeGen/X86/haddsub-2.ll

llvm/test/CodeGen/X86/haddsub-4.ll

llvm/test/CodeGen/X86/hoist-invariant-load.ll

llvm/test/CodeGen/X86/i128-mul.ll

llvm/test/CodeGen/X86/load-local-v3i1.ll

llvm/test/CodeGen/X86/lrshrink.ll

llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll

llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll

llvm/test/CodeGen/X86/machine-combiner-int-vec.ll

llvm/test/CodeGen/X86/machine-cp.ll

X86: Stop assigning register costs for longer encodings
ClosedPublic