This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Target/AArch64/
-
Target/
-
AArch64/
-
AArch64SchedA55.td
-
test/
-
CodeGen/AArch64/
-
AArch64/
-
GlobalISel/
-
arm64-atomic.ll
-
call-translator-variadic-musttail.ll
-
combine-udiv.ll
-
select-bitfield-insert.ll
-
store-merging.ll
-
swifterror.ll
-
aarch64-be-bv.ll
-
aarch64-dup-ext.ll
-
aarch64-load-ext.ll
-
aarch64-matrix-umull-smull.ll
-
aarch64-mops-consecutive.ll
-
aarch64-smull.ll
-
aarch64-tail-dup-size.ll
-
aarch64_win64cc_vararg.ll
-
active_lane_mask.ll
-
align-down.ll
-
and-mask-removal.ll
-
andorbrcompare.ll
-
arm64-AdvSIMD-Scalar.ll
-
arm64-addr-type-promotion.ll
-
arm64-addrmode.ll
-
arm64-bitfield-extract.ll
-
arm64-cse.ll
-
arm64-fcopysign.ll
-
arm64-fmadd.ll
-
arm64-homogeneous-prolog-epilog-no-helper.ll
-
arm64-indexed-vector-ldst.ll
-
arm64-inline-asm.ll
-
arm64-instruction-mix-remarks.ll
-
arm64-ldp.ll
-
arm64-neon-copy.ll
-
arm64-neon-mul-div.ll
-
arm64-neon-vector-shuffle-extract.ll
-
arm64-nvcast.ll
-
arm64-rev.ll
-
arm64-setcc-int-to-fp-combine.ll
-
arm64-shrink-wrapping.ll
-
arm64-subvector-extend.ll
-
arm64-tls-dynamics.ll
-
arm64-vabs.ll
-
arm64-vhadd.ll
-
arm64-vmul.ll
-
arm64-xaluo.ll
-
arm64_32-addrs.ll
-
arm64_32.ll
-
atomic-ops-lse.ll
-
bfis-in-loop.ll
-
bitfield-insert.ll
-
branch-relax-bcc.ll
-
build-one-lane.ll
-
build-vector-extract.ll
-
cgp-usubo.ll
-
cmp-select-sign.ll
-
combine-mul.ll
-
consthoist-gep.ll
-
copyprop.ll
-
ctpop-nonean.ll
-
dag-numsignbits.ll
-
div-rem-pair-recomposition-signed.ll
-
div-rem-pair-recomposition-unsigned.ll
-
expand-select.ll
-
extract-bits.ll
-
extract-lowbits.ll
-
faddp.ll
-
fast-isel-addressing-modes.ll
-
fast-isel-gep.ll
-
fast-isel-memcpy.ll
-
fast-isel-shift.ll
-
fcvt_combine.ll
-
fold-csel-cttz-and.ll
-
fold-global-offsets.ll
-
fp-intrinsics-vector.ll
-
fp16-v8-instructions.ll
-
fpclamptosat_vec.ll
-
fptosi-sat-scalar.ll
-
fptosi-sat-vector.ll
-
fptoui-sat-scalar.ll
-
fptoui-sat-vector.ll
-
funnel-shift-rot.ll
-
funnel-shift.ll
-
global-merge-3.ll
-
global-merge-group-by-use.ll
-
global-merge-ignore-single-use-minsize.ll
-
half.ll
-
hoist-and-by-const-from-lshr-in-eqcmp-zero.ll
-
i128-math.ll
-
i128_volatile_load_store.ll
-
i256-math.ll
-
icmp-shift-opt.ll
-
insert-extend.ll
-
insert-subvector-res-legalization.ll
-
insert-subvector.ll
-
isinf.ll
-
known-never-nan.ll
-
llvm-ir-to-intrinsic.ll
-
logic-reassociate.ll
-
logic-shift.ll
-
logical_shifted_reg.ll
-
lowerMUL-newload.ll
-
machine-combiner-copy.ll
-
machine-combiner-subadd.ll
-
machine-licm-sink-instr.ll
-
memcpy-scoped-aa.ll
-
merge-trunc-store.ll
-
midpoint-int.ll
-
minmax-of-minmax.ll
-
minmax.ll
-
misched-fusion-lit.ll
-
mul_pow2.ll
-
named-vector-shuffles-neon.ll
-
named-vector-shuffles-sve.ll
-
neg-imm.ll
-
neon-abd.ll
-
neon-bitwise-instructions.ll
-
neon-dotreduce.ll
-
neon-extadd.ll
-
neon-extracttruncate.ll
-
neon-mla-mls.ll
-
neon-reverseshuffle.ll
-
neon-stepvector.ll
-
neon-truncstore.ll
-
neon-wide-splat.ll
-
neon-widen-shuffle.ll
-
nontemporal.ll
-
nzcv-save.ll
-
overeager_mla_fusing.ll
-
peephole-and-tst.ll
-
ragreedy-local-interval-cost.ll
-
rand.ll
-
reduce-and.ll
-
reduce-or.ll
-
reduce-shuffle.ll
-
reduce-xor.ll
-
regress-tblgen-chains.ll
-
rotate-extract.ll
-
sadd_sat_plus.ll
-
sadd_sat_vec.ll
-
sat-add.ll
-
select-constant-xor.ll
-
select-with-and-or.ll
-
select_const.ll
-
select_fmf.ll
-
selectcc-to-shiftand.ll
-
settag-merge-order.ll
-
settag-merge.ll
-
settag.ll
-
shift-amount-mod.ll
-
shift-by-signext.ll
-
shift_minsize.ll
-
shuffle-tbl34.ll
-
shuffles.ll
-
sink-addsub-of-const.ll
-
sinksplat.ll
-
sitofp-fixed-legal.ll
-
sme-intrinsics-loads.ll
-
sme-intrinsics-mova-extract.ll
-
sme-intrinsics-mova-insert.ll
-
sme-intrinsics-stores.ll
-
speculation-hardening-loads.ll
-
split-vector-insert.ll
-
sqrt-fastmath.ll
-
srem-seteq-illegal-types.ll
-
srem-seteq-vec-nonsplat.ll
-
srem-seteq-vec-splat.ll
-
srem-vector-lkk.ll
-
sshl_sat.ll
-
ssub_sat_plus.ll
-
ssub_sat_vec.ll
-
stack-guard-sysreg.ll
-
statepoint-call-lowering.ll
-
sve-abd.ll
-
sve-calling-convention-mixed.ll
-
sve-extract-element.ll
-
sve-extract-fixed-vector.ll
-
sve-extract-scalable-vector.ll
-
sve-fcopysign.ll
-
sve-fix-length-and-combine-512.ll
-
sve-fixed-ld2-alloca.ll
-
sve-fixed-length-bit-counting.ll
-
sve-fixed-length-extract-subvector.ll
-
sve-fixed-length-fp-reduce.ll
-
sve-fixed-length-fp-select.ll
-
sve-fixed-length-fp-to-int.ll
-
sve-fixed-length-frame-offests-crash.ll
-
sve-fixed-length-insert-vector-elt.ll
-
sve-fixed-length-int-arith.ll
-
sve-fixed-length-int-compares.ll
-
sve-fixed-length-int-div.ll
-
sve-fixed-length-int-log.ll
-
sve-fixed-length-int-minmax.ll
-
sve-fixed-length-int-mulh.ll
-
sve-fixed-length-int-reduce.ll
-
sve-fixed-length-int-rem.ll
-
sve-fixed-length-int-select.ll
-
sve-fixed-length-int-shifts.ll
-
sve-fixed-length-int-to-fp.ll
-
sve-fixed-length-int-vselect.ll
-
sve-fixed-length-loads.ll
-
sve-fixed-length-log-reduce.ll
-
sve-fixed-length-masked-gather.ll
-
sve-fixed-length-masked-loads.ll
-
sve-fixed-length-masked-scatter.ll
-
sve-fixed-length-masked-stores.ll
-
sve-fixed-length-permute-rev.ll
-
sve-fixed-length-rev.ll
-
sve-fixed-length-sdiv-pow2.ll
-
sve-fixed-length-shuffles.ll
-
sve-fixed-length-splat-vector.ll
-
sve-fixed-length-stores.ll
-
sve-fixed-length-vector-shuffle.ll
-
sve-fp-reduce.ll
-
sve-gather-scatter-addr-opts.ll
-
sve-implicit-zero-filling.ll
-
sve-insert-element.ll
-
sve-insert-vector.ll
-
sve-int-arith.ll
-
sve-intrinsics-counting-elems-i32.ll
-
sve-intrinsics-index.ll
-
sve-intrinsics-int-arith.ll
-
sve-intrinsics-perm-select.ll
-
sve-ld-post-inc.ll
-
sve-ld1r.ll
-
sve-masked-ldst-sext.ll
-
sve-select.ll
-
sve-split-extract-elt.ll
-
sve-split-fcvt.ll
-
sve-split-fp-reduce.ll
-
sve-split-insert-elt.ll
-
sve-split-int-pred-reduce.ll
-
sve-split-int-reduce.ll
-
sve-split-load.ll
-
sve-srem-combine-loop.ll
-
sve-stepvector.ll
-
sve-trunc.ll
-
sve-umulo-sdnode.ll
-
sve-vecreduce-fold.ll
-
sve-vector-splat.ll
-
sve2-fcopysign.ll
-
swifterror.ll
-
tbl-loops.ll
-
typepromotion-overflow.ll
-
typepromotion-phisret.ll
-
typepromotion-signed.ll
-
uadd_sat_plus.ll
-
uadd_sat_vec.ll
-
udivmodei5.ll
-
umulo-128-legalisation-lowering.ll
-
urem-lkk.ll
-
urem-seteq-illegal-types.ll
-
urem-seteq-vec-nonsplat.ll
-
urem-seteq-vec-nonzero.ll
-
urem-seteq-vec-splat.ll
-
urem-seteq-vec-tautological.ll
-
urem-vector-lkk.ll
-
usub_sat_vec.ll
-
vec_uaddo.ll
-
vec_umulo.ll
-
vecreduce-add.ll
-
vecreduce-fadd-legalization-strict.ll
-
vecreduce-fadd-legalization.ll
-
vecreduce-fadd.ll
-
vecreduce-fmax-legalization.ll
-
vecreduce-fmin-legalization.ll
-
vector-fcopysign.ll
-
vector-gep.ll
-
vector-popcnt-128-ult-ugt.ll
-
vselect-constants.ll
-
vselect-ext.ll
-
win64_vararg.ll
-
win64_vararg_float.ll
-
win64_vararg_float_cc.ll
-
zero-call-used-regs.ll
-
MC/AArch64/
-
AArch64/
-
elf-globaladdress.ll
-
Transforms/
-
CanonicalizeFreezeInLoops/
-
aarch64.ll
-
CodeGenPrepare/AArch64/
-
AArch64/
-
large-offset-gep.ll
-
LoopStrengthReduce/AArch64/
-
AArch64/
-
lsr-ldp.ll
-
tools/
-
UpdateTestChecks/update_llc_test_checks/Inputs/
-
update_llc_test_checks/
-
Inputs/
-
aarch64_generated_funcs.ll
-
aarch64_generated_funcs.ll.generated.expected
-
aarch64_generated_funcs.ll.nogenerated.expected
-
llvm-mca/AArch64/Cortex/
-
AArch64/
-
Cortex/
-
A55-add-sequence.s
-
A55-all-stats.s
-
A55-all-views.s
-
A55-basic-instructions.s
-
A55-in-order-retire.s
-
A55-load-readadv.s
-
A55-out-of-order-retire.s
-
A55-store-readadv.s
-
IPC/
-
A55-0-single-add.s
-
A55-1-add-seq.s
-
A55-3-mul.s
-
A55-4-sdiv.s
-
A55-5-mul-sdiv.s
-
A55-6-mul.s
-
A55-7-cmp.s

Differential D129449

[AArch64] Update latencies for Cortex-A55 schedule.
AcceptedPublic

Authored by dmgreen on Jul 10 2022, 10:03 AM.

Download Raw Diff

Details

Reviewers

SjoerdMeijer
samtebbs
kristof.beyls
t.p.northover
NickGuy
andreadb
sjarus

Summary

The Cortex-A55 schedule currently attempts to model a lot of the effective latencies by marking most integer instructions as having a latency of 3, and then adding forwarding latencies between classes of instructions. When this works it does OK, but is very easy to either get the effective latencies wrong or be tripped up by instructions like pseudo instructions that knock the latency back to 3 without considering forwarding. That in turn can make the decisions it makes suboptimal. This patch simplifies that by just setting the latencies more directly, lining the latencies up with the values from the Software Optimization Guide. In reality the core is more sophisticated than either scheme.

As expected for the AArch64 default schedule, this alters quite a lot of codegen. Almost all of the tests are the same instructions in a slightly different order. The ones with interesting differences are:

andorbrcompare.ll - Now choses ccmp vs branch, due to some bad use of latencies in the AArch64ConditionalCompares pass.
arm64-ldp.ll - Now uses more postinc. Yay.
arm64-neon-mul-div.ll - Has slightly more spills. Boo.
Some other changes like this, where there are slightly less or more instructions.
neon-mla-mls.ll - Chooses mul;sub as opposed to neg;mla. I believe this is generally better, and the differences are coming from better considering of COPYs.
llvm-mca tests no longer show instructions taking three cycles.

For all the measurements I've been collecting, the performance is on average between flat and a slight performance increase, depending on the core and the benchmarks being run. The knock-on effects from different instruction ordering can make any individual test better or worse, but from a range of benchmarks they tend to roughly average one another out.

Diff Detail

Event Timeline

dmgreen created this revision.Jul 10 2022, 10:03 AM

Herald added a reviewer: andreadb. · View Herald TranscriptJul 10 2022, 10:03 AM

Herald added a reviewer: sjarus. · View Herald Transcript

Herald added a project: Restricted Project. · View Herald Transcript

Herald added subscribers: armkevincheng, eric-k256, asbirlea and 4 others. · View Herald Transcript

dmgreen requested review of this revision.Jul 10 2022, 10:03 AM

Herald added a project: Restricted Project. · View Herald TranscriptJul 10 2022, 10:03 AM

Harbormaster completed remote builds in B174572: Diff 443510.Jul 10 2022, 10:03 AM

I think you just love updating a lot of tests... ;-)

More serious, the change makes sense because like you said latencies line up better with the SWOG.
But since perf is flat or a slight performance increase, I was just curious about your motivation for doing this. I.e., is the slight perf increase worth it, or is this an enabler for other things?

This revision is now accepted and ready to land.Jul 11 2022, 1:00 AM

Thanks. The latest issue we ran into was from the machine combiner costmodel in D125588, but we've been noticing the increased latencies causing issues here and there for a while now. I'm hoping with more normal values it can generally make better decisions.

But I may wait until after the branch point to commit this, as it does change a lot of codegen and we are not very far away now.

Matt added a subscriber: Matt.Jul 13 2022, 4:15 PM

Cheers, sounds good.

Large Diff

This large diff affects 295 files. Files without inline comments have been collapsed. Expand All Files

Revision Contents

Path

Size

llvm/

lib/

Target/

AArch64/

AArch64SchedA55.td

68 lines

test/

CodeGen/

AArch64/

GlobalISel/

arm64-atomic.ll

128 lines

call-translator-variadic-musttail.ll

10 lines

combine-udiv.ll

66 lines

select-bitfield-insert.ll

2 lines

46 lines

52 lines

40 lines

18 lines

16 lines

aarch64-matrix-umull-smull.ll

30 lines

aarch64-mops-consecutive.ll

20 lines

aarch64-smull.ll

68 lines

aarch64-tail-dup-size.ll

6 lines

aarch64_win64cc_vararg.ll

2 lines

188 lines

2 lines

2 lines

112 lines

arm64-AdvSIMD-Scalar.ll

24 lines

arm64-addr-type-promotion.ll

26 lines

arm64-addrmode.ll

6 lines

arm64-bitfield-extract.ll

12 lines

arm64-cse.ll

2 lines

arm64-fcopysign.ll

6 lines

arm64-fmadd.ll

2 lines

arm64-homogeneous-prolog-epilog-no-helper.ll

8 lines

arm64-indexed-vector-ldst.ll

120 lines

arm64-inline-asm.ll

2 lines

arm64-instruction-mix-remarks.ll

4 lines

arm64-ldp.ll

20 lines

arm64-neon-copy.ll

10 lines

arm64-neon-mul-div.ll

480 lines

arm64-neon-vector-shuffle-extract.ll

4 lines

arm64-nvcast.ll

22 lines

arm64-rev.ll

90 lines

arm64-setcc-int-to-fp-combine.ll

10 lines

arm64-shrink-wrapping.ll

88 lines

arm64-subvector-extend.ll

552 lines

arm64-tls-dynamics.ll

4 lines

34 lines

14 lines

28 lines

294 lines

6 lines

3 lines

16 lines

56 lines

22 lines

2 lines

4 lines

build-vector-extract.ll

110 lines

10 lines

22 lines

2 lines

48 lines

12 lines

26 lines

4 lines

div-rem-pair-recomposition-signed.ll

18 lines

div-rem-pair-recomposition-unsigned.ll

18 lines

28 lines

136 lines

78 lines

4 lines

fast-isel-addressing-modes.ll

34 lines

6 lines

6 lines

24 lines

50 lines

fold-csel-cttz-and.ll

2 lines

fold-global-offsets.ll

10 lines

fp-intrinsics-vector.ll

40 lines

fp16-v8-instructions.ll

808 lines

120 lines

4 lines

1390 lines

12 lines

1219 lines

32 lines

126 lines

4 lines

global-merge-group-by-use.ll

4 lines

global-merge-ignore-single-use-minsize.ll

8 lines

half.ll

6 lines

hoist-and-by-const-from-lshr-in-eqcmp-zero.ll

2 lines

i128-math.ll

40 lines

i128_volatile_load_store.ll

14 lines

i256-math.ll

28 lines

icmp-shift-opt.ll

12 lines

insert-extend.ll

122 lines

insert-subvector-res-legalization.ll

32 lines

insert-subvector.ll

86 lines

isinf.ll

4 lines

known-never-nan.ll

10 lines

llvm-ir-to-intrinsic.ll

133 lines

logic-reassociate.ll

2 lines

logic-shift.ll

38 lines

logical_shifted_reg.ll

68 lines

lowerMUL-newload.ll

20 lines

machine-combiner-copy.ll

2 lines

machine-combiner-subadd.ll

18 lines

machine-licm-sink-instr.ll

16 lines

16 lines

20 lines

252 lines

256 lines

2 lines

misched-fusion-lit.ll

2 lines

mul_pow2.ll

2 lines

named-vector-shuffles-neon.ll

18 lines

named-vector-shuffles-sve.ll

104 lines

neg-imm.ll

3 lines

neon-abd.ll

20 lines

neon-bitwise-instructions.ll

8 lines

neon-dotreduce.ll

16 lines

neon-extadd.ll

188 lines

neon-extracttruncate.ll

20 lines

neon-mla-mls.ll

30 lines

neon-reverseshuffle.ll

4 lines

neon-stepvector.ll

8 lines

neon-truncstore.ll

6 lines

neon-wide-splat.ll

2 lines

neon-widen-shuffle.ll

6 lines

nontemporal.ll

44 lines

nzcv-save.ll

8 lines

overeager_mla_fusing.ll

10 lines

peephole-and-tst.ll

30 lines

ragreedy-local-interval-cost.ll

116 lines

4 lines

32 lines

32 lines

535 lines

32 lines

regress-tblgen-chains.ll

2 lines

14 lines

38 lines

74 lines

6 lines

select-constant-xor.ll

2 lines

select-with-and-or.ll

32 lines

select_const.ll

48 lines

select_fmf.ll

12 lines

selectcc-to-shiftand.ll

4 lines

settag-merge-order.ll

4 lines

6 lines

12 lines

264 lines

12 lines

60 lines

121 lines

30 lines

sink-addsub-of-const.ll

24 lines

sinksplat.ll

35 lines

sitofp-fixed-legal.ll

12 lines

sme-intrinsics-loads.ll

30 lines

sme-intrinsics-mova-extract.ll

90 lines

sme-intrinsics-mova-insert.ll

2 lines

sme-intrinsics-stores.ll

16 lines

speculation-hardening-loads.ll

2 lines

split-vector-insert.ll

14 lines

sqrt-fastmath.ll

38 lines

srem-seteq-illegal-types.ll

74 lines

srem-seteq-vec-nonsplat.ll

176 lines

srem-seteq-vec-splat.ll

30 lines

360 lines

2 lines

38 lines

74 lines

stack-guard-sysreg.ll

4 lines

statepoint-call-lowering.ll

4 lines

sve-abd.ll

24 lines

sve-calling-convention-mixed.ll

18 lines

sve-extract-element.ll

4 lines

sve-extract-fixed-vector.ll

36 lines

sve-extract-scalable-vector.ll

14 lines

sve-fcopysign.ll

2 lines

sve-fix-length-and-combine-512.ll

2 lines

sve-fixed-ld2-alloca.ll

4 lines

sve-fixed-length-bit-counting.ll

6 lines

sve-fixed-length-extract-subvector.ll

6 lines

sve-fixed-length-fp-reduce.ll

6 lines

sve-fixed-length-fp-select.ll

96 lines

sve-fixed-length-fp-to-int.ll

4 lines

sve-fixed-length-frame-offests-crash.ll

79 lines

sve-fixed-length-insert-vector-elt.ll

72 lines

sve-fixed-length-int-arith.ll

52 lines

sve-fixed-length-int-compares.ll

2 lines

sve-fixed-length-int-div.ll

24 lines

sve-fixed-length-int-log.ll

6 lines

sve-fixed-length-int-minmax.ll

8 lines

sve-fixed-length-int-mulh.ll

264 lines

sve-fixed-length-int-reduce.ll

10 lines

sve-fixed-length-int-rem.ll

148 lines

sve-fixed-length-int-select.ll

128 lines

sve-fixed-length-int-shifts.ll

6 lines

sve-fixed-length-int-to-fp.ll

4 lines

sve-fixed-length-int-vselect.ll

2 lines

sve-fixed-length-loads.ll

62 lines

sve-fixed-length-log-reduce.ll

6 lines

sve-fixed-length-masked-gather.ll

42 lines

sve-fixed-length-masked-loads.ll

32 lines

sve-fixed-length-masked-scatter.ll

154 lines

sve-fixed-length-masked-stores.ll

38 lines

sve-fixed-length-permute-rev.ll

132 lines

sve-fixed-length-rev.ll

2 lines

sve-fixed-length-sdiv-pow2.ll

2 lines

sve-fixed-length-shuffles.ll

54 lines

sve-fixed-length-splat-vector.ll

58 lines

sve-fixed-length-stores.ll

42 lines

sve-fixed-length-vector-shuffle.ll

2 lines

sve-fp-reduce.ll

10 lines

sve-gather-scatter-addr-opts.ll

70 lines

sve-implicit-zero-filling.ll

12 lines

sve-insert-element.ll

106 lines

sve-insert-vector.ll

14 lines

sve-int-arith.ll

2 lines

sve-intrinsics-counting-elems-i32.ll

30 lines

sve-intrinsics-index.ll

10 lines

sve-intrinsics-int-arith.ll

2 lines

sve-intrinsics-perm-select.ll

16 lines

sve-ld-post-inc.ll

18 lines

sve-ld1r.ll

40 lines

sve-masked-ldst-sext.ll

6 lines

sve-select.ll

4 lines

sve-split-extract-elt.ll

80 lines

sve-split-fcvt.ll

60 lines

sve-split-fp-reduce.ll

2 lines

sve-split-insert-elt.ll

28 lines

sve-split-int-pred-reduce.ll

33 lines

sve-split-int-reduce.ll

10 lines

sve-split-load.ll

12 lines

sve-srem-combine-loop.ll

2 lines

sve-stepvector.ll

10 lines

sve-trunc.ll

4 lines

sve-umulo-sdnode.ll

36 lines

sve-vecreduce-fold.ll

12 lines

2 lines

12 lines

52 lines

88 lines

typepromotion-overflow.ll

20 lines

typepromotion-phisret.ll

12 lines

typepromotion-signed.ll

20 lines

uadd_sat_plus.ll

38 lines

uadd_sat_vec.ll

80 lines

udivmodei5.ll

68 lines

umulo-128-legalisation-lowering.ll

12 lines

urem-lkk.ll

12 lines

urem-seteq-illegal-types.ll

6 lines

urem-seteq-vec-nonsplat.ll

144 lines

urem-seteq-vec-nonzero.ll

14 lines

urem-seteq-vec-splat.ll

10 lines

urem-seteq-vec-tautological.ll

18 lines

248 lines

48 lines

42 lines

181 lines

36 lines

vecreduce-fadd-legalization-strict.ll

42 lines

vecreduce-fadd-legalization.ll

6 lines

vecreduce-fadd.ll

8 lines

vecreduce-fmax-legalization.ll

4 lines

vecreduce-fmin-legalization.ll

4 lines

vector-fcopysign.ll

44 lines

vector-gep.ll

2 lines

vector-popcnt-128-ult-ugt.ll

24 lines

vselect-constants.ll

42 lines

vselect-ext.ll

152 lines

win64_vararg.ll

16 lines

win64_vararg_float.ll

20 lines

win64_vararg_float_cc.ll

20 lines

zero-call-used-regs.ll

24 lines

MC/

AArch64/

elf-globaladdress.ll

6 lines

Transforms/

CanonicalizeFreezeInLoops/

aarch64.ll

4 lines

CodeGenPrepare/

AArch64/

large-offset-gep.ll

30 lines

LoopStrengthReduce/

AArch64/

lsr-ldp.ll

6 lines

tools/

UpdateTestChecks/

update_llc_test_checks/

Inputs/

aarch64_generated_funcs.ll

6 lines

aarch64_generated_funcs.ll.generated.expected

44 lines

aarch64_generated_funcs.ll.nogenerated.expected

35 lines

llvm-mca/

AArch64/

Cortex/

A55-add-sequence.s

32 lines

A55-all-stats.s

10 lines

A55-all-views.s

22 lines

A55-basic-instructions.s

1108 lines

A55-in-order-retire.s

58 lines

A55-load-readadv.s

490 lines

A55-out-of-order-retire.s

32 lines

A55-store-readadv.s

376 lines

IPC/

2 lines

6 lines

2 lines

6 lines

6 lines

2 lines

2 lines

This is an archive of the discontinued LLVM Phabricator instance.

[AArch64] Update latencies for Cortex-A55 schedule.AcceptedPublic

Details

Diff Detail

Event Timeline

Large Diff

Revision Contents

Diff 443510

llvm/lib/Target/AArch64/AArch64SchedA55.td

llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll

llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll

llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll

llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll

llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll

llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll

llvm/test/CodeGen/AArch64/aarch64-be-bv.ll

llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll

llvm/test/CodeGen/AArch64/aarch64-load-ext.ll

llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll

llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll

llvm/test/CodeGen/AArch64/aarch64-smull.ll

llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll

llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll

llvm/test/CodeGen/AArch64/active_lane_mask.ll

llvm/test/CodeGen/AArch64/align-down.ll

llvm/test/CodeGen/AArch64/and-mask-removal.ll

llvm/test/CodeGen/AArch64/andorbrcompare.ll

llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll

llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll

llvm/test/CodeGen/AArch64/arm64-addrmode.ll

llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll

llvm/test/CodeGen/AArch64/arm64-cse.ll

llvm/test/CodeGen/AArch64/arm64-fcopysign.ll

llvm/test/CodeGen/AArch64/arm64-fmadd.ll

llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-no-helper.ll

llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll

llvm/test/CodeGen/AArch64/arm64-inline-asm.ll

llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll

llvm/test/CodeGen/AArch64/arm64-ldp.ll

llvm/test/CodeGen/AArch64/arm64-neon-copy.ll

llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll

llvm/test/CodeGen/AArch64/arm64-neon-vector-shuffle-extract.ll

llvm/test/CodeGen/AArch64/arm64-nvcast.ll

llvm/test/CodeGen/AArch64/arm64-rev.ll

llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll

llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll

llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll

llvm/test/CodeGen/AArch64/arm64-tls-dynamics.ll

llvm/test/CodeGen/AArch64/arm64-vabs.ll

llvm/test/CodeGen/AArch64/arm64-vhadd.ll

llvm/test/CodeGen/AArch64/arm64-vmul.ll

llvm/test/CodeGen/AArch64/arm64-xaluo.ll

llvm/test/CodeGen/AArch64/arm64_32-addrs.ll

llvm/test/CodeGen/AArch64/arm64_32.ll

llvm/test/CodeGen/AArch64/atomic-ops-lse.ll

llvm/test/CodeGen/AArch64/bfis-in-loop.ll

llvm/test/CodeGen/AArch64/bitfield-insert.ll

llvm/test/CodeGen/AArch64/branch-relax-bcc.ll

llvm/test/CodeGen/AArch64/build-one-lane.ll

llvm/test/CodeGen/AArch64/build-vector-extract.ll

llvm/test/CodeGen/AArch64/cgp-usubo.ll

llvm/test/CodeGen/AArch64/cmp-select-sign.ll

llvm/test/CodeGen/AArch64/combine-mul.ll

llvm/test/CodeGen/AArch64/consthoist-gep.ll

llvm/test/CodeGen/AArch64/copyprop.ll

llvm/test/CodeGen/AArch64/ctpop-nonean.ll

llvm/test/CodeGen/AArch64/dag-numsignbits.ll

llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll

llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll

llvm/test/CodeGen/AArch64/expand-select.ll

llvm/test/CodeGen/AArch64/extract-bits.ll

llvm/test/CodeGen/AArch64/extract-lowbits.ll

llvm/test/CodeGen/AArch64/faddp.ll

llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll

llvm/test/CodeGen/AArch64/fast-isel-gep.ll

llvm/test/CodeGen/AArch64/fast-isel-memcpy.ll

llvm/test/CodeGen/AArch64/fast-isel-shift.ll

llvm/test/CodeGen/AArch64/fcvt_combine.ll

llvm/test/CodeGen/AArch64/fold-csel-cttz-and.ll

llvm/test/CodeGen/AArch64/fold-global-offsets.ll

[AArch64] Update latencies for Cortex-A55 schedule.
AcceptedPublic