This is an archive of the discontinued LLVM Phabricator instance.

Paths

Table of Contentst

-
llvm/
-
lib/Target/PowerPC/
-
Target/
-
PowerPC/
18/28
PPCScheduleP8.td
-
test/CodeGen/PowerPC/
-
CodeGen/
-
PowerPC/
-
2006-07-07-ComputeMaskedBits.ll
1/1
BreakableToken-reduced.ll
-
CSR-fit.ll
-
CompareEliminationSpillIssue.ll
1
P10-stack-alignment.ll
-
PR35812-neg-cmpxchg.ll
-
VSX-XForm-Scalars.ll
-
aix-dfltabi-rsrvd-reg.ll
-
aix-vsx-splatimm.ll
1
aix32-p8-scalar_vector_conversions.ll
-
aix_scalar_vector_permuted.ll
-
all-atomics.ll
-
and-extend-combine.ll
-
asm-template-I.ll
-
atomics-i128-ldst.ll
1/2
atomics-i128.ll
2/4
atomics-i16-ldst.ll
2/2
atomics-i32-ldst.ll
-
atomics-i64-ldst.ll
2/2
atomics-i8-ldst.ll
1/2
atomics-regression.ll
2/2
bool-math.ll
1
branch_coalesce.ll
-
build-vector-tests.ll
-
builtins-ppc-p8vector.ll
-
canonical-merge-shuffles.ll
-
cfence-float.ll
-
coldcc2.ll
-
combine-fneg.ll
-
combine-sext-and-shl-after-isel.ll
-
combine_ext_trunc.ll
-
const-nonsplat-array-init.ll
-
const-splat-array-init.ll
-
constant-combines.ll
-
crypto_bifs_be.ll
-
csr-split.ll
1/1
ctrloop-constrained-fp.ll
-
ctrloop-fp128.ll
-
cxx_tlscc64.ll
-
disable-ctr-ppcf128.ll
1/1
elf64-byval-cc.ll
-
extra-toc-reg-deps.ll
-
extract-and-store.ll
-
f128-aggregates.ll
-
f128-arith.ll
-
f128-bitcast.ll
-
f128-compare.ll
-
f128-conv.ll
-
f128-fma.ll
-
f128-passByValue.ll
-
f128-truncateNconv.ll
-
fma-aggr-FMF.ll
-
fma-combine.ll
-
fmf-propagation.ll
-
fold-rlwinm-1.ll
-
fp-classify.ll
-
fp-strict-conv-f128.ll
-
fp-strict-conv.ll
-
fp-strict-fcmp.ll
-
fp-strict-round.ll
-
fp-strict.ll
-
fp128-bitcast-after-operation.ll
-
fpscr-intrinsics.ll
-
frounds.ll
-
funnel-shift-rot.ll
-
funnel-shift.ll
-
handle-f16-storage-type.ll
-
huge-frame-call.ll
-
huge-frame-size.ll
1/1
int128_ldst.ll
-
legalize-vaarg.ll
-
licm-remat.ll
-
licm-tocReg.ll
-
load-and-splat.ll
-
load-shuffle-and-shuffle-store.ll
-
memCmpUsedInZeroEqualityComparison.ll
-
memcmp.ll
-
memset-tail.ll
-
mergeable-string-pool-large.ll
-
mergeable-string-pool.ll
-
mma-acc-memops.ll
-
mulld.ll
-
no-ctr-loop-if-exit-in-nested-loop.ll
-
non-debug-mi-search-frspxsrsp.ll
-
p8-isel-sched.ll
-
p8-scalar_vector_conversions.ll
-
peephole-align.ll
-
pow-025-075-intrinsic-scalar-mass-fast.ll
-
ppc-32bit-build-vector.ll
-
ppc-clear-before-return.ll
-
ppc-ctr-dead-code.ll
-
ppc-rotate-clear.ll
-
ppc-shrink-wrapping.ll
-
ppc64-P9-setb.ll
-
ppc64-P9-vabsd.ll
-
ppc64-byval-larger-struct.ll
-
ppc64-byval-multi-store.ll
-
ppc64-rop-protection-aix.ll
-
ppc64-rop-protection.ll
-
ppc64-varargs.ll
-
ppcf128-constrained-fp-intrinsics.ll
-
pr25080.ll
-
pr27078.ll
-
pr33093.ll
-
pr33547.ll
-
pr35402.ll
-
pr36292.ll
-
pr45628.ll
-
pr46759.ll
-
pr47707.ll
-
pr47830.ll
-
pr47891.ll
-
pr48388.ll
-
pr48519.ll
-
pr48527.ll
-
pr52894-32bit.ll
-
pr52894.ll
-
pr61882.ll
-
recipest.ll
-
reduce_scalarization.ll
-
register-pressure-reduction.ll
-
repeated-fp-divisors.ll
-
saddo-ssubo.ll
-
sat-add.ll
-
scalar-double-ldst.ll
-
scalar-equal.ll
-
scalar-float-ldst.ll
-
scalar-i16-ldst.ll
-
scalar-i32-ldst.ll
-
scalar-i64-ldst.ll
-
scalar-i8-ldst.ll
-
scalar_cmp.ll
-
scalar_vector_test_4.ll
-
scalars-in-altivec-regs.ll
-
scheduling-mem-dependency.ll
-
select-constant-xor.ll
-
select.ll
-
select_const.ll
-
setcc-logic.ll
-
setcc-vector.ll
-
sext-vector-inreg.ll
-
sign-ext-atomics.ll
-
signbit-shift.ll
-
sms-remark.ll
-
srem-seteq-illegal-types.ll
-
srem-vector-lkk.ll
-
stack-clash-dynamic-alloca.ll
-
stack-clash-prologue.ll
-
stack-restore-with-setjmp.ll
-
store-constant.ll
-
stwu-sched.ll
-
swap-reduction.ll
-
swaps-le-5.ll
-
swaps-le-6.ll
-
swaps-le-7.ll
-
test-vector-insert.ll
-
testBitReverse.ll
-
testComparesi32gtu.ll
1/1
testComparesi32ltu.ll
-
testComparesieqsc.ll
-
testComparesieqsi.ll
-
testComparesieqsll.ll
-
testComparesieqss.ll
-
testComparesiequc.ll
-
testComparesiequi.ll
-
testComparesiequll.ll
-
testComparesiequs.ll
-
testComparesigesc.ll
-
testComparesigesi.ll
-
testComparesigesll.ll
-
testComparesigess.ll
-
testComparesigeuc.ll
-
testComparesigeui.ll
-
testComparesigeull.ll
-
testComparesigeus.ll
-
testComparesigtsc.ll
-
testComparesigtsi.ll
-
testComparesigtsll.ll
-
testComparesigtss.ll
-
testComparesigtuc.ll
-
testComparesigtui.ll
-
testComparesigtus.ll
-
testComparesilesc.ll
-
testComparesilesi.ll
-
testComparesilesll.ll
-
testComparesiless.ll
-
testComparesileuc.ll
-
testComparesileui.ll
-
testComparesileull.ll
-
testComparesileus.ll
-
testComparesiltsc.ll
-
testComparesiltsi.ll
-
testComparesiltsll.ll
-
testComparesiltss.ll
-
testComparesiltuc.ll
-
testComparesiltui.ll
-
testComparesiltus.ll
-
testComparesinesc.ll
-
testComparesinesi.ll
-
testComparesinesll.ll
-
testComparesiness.ll
-
testComparesineuc.ll
-
testComparesineui.ll
-
testComparesineull.ll
-
testComparesineus.ll
-
testCompareslleqsc.ll
-
testCompareslleqsi.ll
-
testCompareslleqsll.ll
-
testCompareslleqss.ll
-
testComparesllequc.ll
-
testComparesllequi.ll
-
testComparesllequll.ll
-
testComparesllequs.ll
-
testComparesllgesc.ll
-
testComparesllgesi.ll
-
testComparesllgesll.ll
-
testComparesllgess.ll
-
testComparesllgeuc.ll
-
testComparesllgeui.ll
-
testComparesllgeull.ll
-
testComparesllgeus.ll
-
testComparesllgtsll.ll
-
testComparesllgtuc.ll
-
testComparesllgtui.ll
-
testComparesllgtus.ll
-
testCompareslllesc.ll
-
testCompareslllesi.ll
-
testCompareslllesll.ll
-
testComparesllless.ll
-
testComparesllleuc.ll
-
testComparesllleui.ll
-
testComparesllleull.ll
-
testComparesllleus.ll
-
testComparesllltsll.ll
-
testComparesllltuc.ll
-
testComparesllltui.ll
-
testComparesllltus.ll
-
testComparesllnesll.ll
-
testComparesllneull.ll
-
tls-pie-xform.ll
-
toc-float.ll
-
tocSaveInPrologue.ll
-
uint-to-fp-v4i32.ll
-
urem-seteq-illegal-types.ll
-
urem-vector-lkk.ll
-
v16i8_scalar_to_vector_shuffle.ll
-
v2i64_scalar_to_vector_shuffle.ll
-
v4i32_scalar_to_vector_shuffle.ll
-
v8i16_scalar_to_vector_shuffle.ll
-
variable_elem_vec_extracts.ll
-
vavg.ll
-
vec-itofp.ll
-
vec-min-max.ll
-
vec-promote.ll
-
vec-trunc.ll
-
vec-trunc2.ll
-
vec_conv_fp32_to_i16_elts.ll
-
vec_conv_fp32_to_i64_elts.ll
-
vec_conv_fp32_to_i8_elts.ll
-
vec_conv_fp64_to_i16_elts.ll
-
vec_conv_fp64_to_i32_elts.ll
-
vec_conv_fp64_to_i8_elts.ll
-
vec_conv_fp_to_i_4byte_elts.ll
-
vec_conv_fp_to_i_8byte_elts.ll
-
vec_conv_i16_to_fp32_elts.ll
-
vec_conv_i16_to_fp64_elts.ll
-
vec_conv_i32_to_fp64_elts.ll
-
vec_conv_i64_to_fp32_elts.ll
-
vec_conv_i8_to_fp32_elts.ll
-
vec_conv_i8_to_fp64_elts.ll
-
vec_conv_i_to_fp_4byte_elts.ll
-
vec_conv_i_to_fp_8byte_elts.ll
-
vec_insert_elt.ll
-
vec_select.ll
-
vec_shuffle_p8vector_le.ll
-
vector-constrained-fp-intrinsics.ll
-
vector-ldst.ll
-
vperm-swap.ll
-
vselect-constants.ll
-
vsx.ll
-
wide-scalar-shift-by-byte-multiple-legalization.ll
-
wide-scalar-shift-legalization.ll

Differential D154488

[PowerPC] Define SchedModel for Power8
ClosedPublic

Authored by qiucf on Jul 5 2023, 2:56 AM.

Download Raw Diff

Details

Reviewers

nemanjai
shchenz

Group Reviewers

Restricted Project

Commits

rGb922a3621116: [PowerPC] Define SchedModel for Power8

Summary

PowerPC subtargets prior to Power9 use the 'legacy' itinerary way to provide scheduling information. This patch re-writes the tablegen file to define the scheduling information in the new SchedModel way. This can bring improvements to some benchmarks.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

qiucf created this revision.Jul 5 2023, 2:56 AM

Herald added a project: Restricted Project. · View Herald TranscriptJul 5 2023, 2:56 AM

Herald added subscribers: steven.zhang, asbirlea, kbarton, hiraditya. · View Herald Transcript

qiucf requested review of this revision.Jul 5 2023, 2:56 AM

Herald added a project: Restricted Project. · View Herald TranscriptJul 5 2023, 2:56 AM

Herald added a subscriber: llvm-commits. · View Herald Transcript

Harbormaster completed remote builds in B243163: Diff 537275.Jul 5 2023, 4:08 AM

Thanks for the big effort.
I think moving from old ProcessorItineraries form to new ProcessorModel form should be correct. Some comments from me. And also please note that there are some failures in the pre-merge checks.

llvm/lib/Target/PowerPC/PPCScheduleP8.td
18	why the `MicroOpBufferSize` is 60? The pwr8 UM seems showing this value as 32 * 2?
19	We need to comment here why on pwr8 this field is set as 0.
47	minor: _1, _2, _4 does not sound like a good name...
48	Why this unit number for type "2xdouble" is set to 4? Its parent `P8_VMX` only has 2.
61	Same with the name issue _1, ..._6.
llvm/test/CodeGen/PowerPC/BreakableToken-reduced.ll
244	Seems positive, now we use less CSRs.
llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll
31	low ILP but less registers?
llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll
30	lower ILP with less register, I think it is fine only if we get better perf in benchmarks.
llvm/test/CodeGen/PowerPC/atomics-i128.ll
510	negative change, now we have more `mr` which is not free on pwr8.
llvm/test/CodeGen/PowerPC/atomics-i16-ldst.ll
184	We may need another name for check prefix `PREP10`? And also double check if prefixes `P8` and `P9` are still used in this file?
4519	Ah, it show P8 and P9 are not used, we should delete them in the run lines.
llvm/test/CodeGen/PowerPC/atomics-i32-ldst.ll
4779	Ditto
llvm/test/CodeGen/PowerPC/atomics-i8-ldst.ll
4219	Ditto
llvm/test/CodeGen/PowerPC/atomics-regression.ll
456	Aren't the atomic related instructions are barriers to the scheduler? Can you check why there are changes around them? Thanks.
llvm/test/CodeGen/PowerPC/bool-math.ll
120	Weird. Now there is no check for this function?
llvm/test/CodeGen/PowerPC/branch_coalesce.ll
18	lower ILP and less registers
llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll
27	Positive
llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll
35	postive
llvm/test/CodeGen/PowerPC/int128_ldst.ll
1100	Same with above
llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
37	This looks positive. We had a request to move the lr store instruction be away from `mflr`

qiucf updated this revision to Diff 543374.Jul 23 2023, 10:08 PM

qiucf marked 5 inline comments as done.

Herald added a subscriber: qcolombet. · View Herald TranscriptJul 23 2023, 10:08 PM

Harbormaster completed remote builds in B247560: Diff 543374.Jul 23 2023, 11:41 PM

Please notice the 2 failures on two different bot in the premerge test.

llvm/lib/Target/PowerPC/PPCScheduleP8.td
32	Sorry, this still confuses me. The super is `P8_FPU` and `P8_FPU` has 4 execution units which can be executed as 2 way SIMD operation for double. So why we set the ProcResource value to 4 here?
33	Setting `P8_VMX` as super of "4xSingle" also seems weird. In ISA of pwr8, I think most instructions that handle 4xSingle type are VSX related and from the UM "10.3.2 Instructional Latencies and Throughputs", I saw most of them are using pipeline `FPU`. So I guess, we should use `P8_FPU` as Super instead?
llvm/test/CodeGen/PowerPC/atomics-i16-ldst.ll
4517	There are still unused check prefix in the run lines.
llvm/test/CodeGen/PowerPC/atomics-i32-ldst.ll
4777	Please delete the unused check prefixes, so that we won't have this note.
llvm/test/CodeGen/PowerPC/atomics-i8-ldst.ll
4219	Please try to remove this NOTE.

qiucf updated this revision to Diff 543788.Jul 24 2023, 7:27 PM

qiucf marked 10 inline comments as done.

qiucf added inline comments.

llvm/lib/Target/PowerPC/PPCScheduleP8.td
32	Thanks, updated.
33	2.1.3 Speculative Superscalar Inner Core Organization: Two VMX execution units capable of executing simple FX, permute, complex FX, and 4-way SIMD single-precision floating-point operations I think the reason is before VSX, Altivec already had instructions for 4xSingle vectors and they are implemented within VMX units. So their VSX equivalents uses the same execution units.
48	Its parent `P8_FPU` has 4 units.
llvm/test/CodeGen/PowerPC/atomics-i16-ldst.ll
184	Like `CHECK-NONP10` or `CHECK-P8P9`? But it actually has the same meaning as `CHECK-PREP10`.
llvm/test/CodeGen/PowerPC/bool-math.ll
120	Because the test RUN line did not specify `-mcpu`, so that Linux uses pwr8 while AIX uses pwr7. Fixed.

Harbormaster completed remote builds in B247852: Diff 543788.Jul 24 2023, 7:28 PM

qiucf added inline comments.Jul 25 2023, 1:11 AM

llvm/test/CodeGen/PowerPC/atomics-i128.ll
510	The BB with negative result is quite simple: SU(0): %29:gprc = COPY $r3 SU(1): %34:gprc = LWZ 12, %stack.0 SU(2): %33:gprc = LWZ 8, %stack.0 SU(3): %32:gprc = LWZ 4, %stack.0 SU(4): %31:gprc = LWZ 0, %stack.0 SU(5): %30:crrc = CMPLWI %29:gprc, 0 The old scheduling model puts `SU(5)` just after `SU(0)`, so that `%29:gprc` is assigned with `r3`. With the new scheduling model, the BB is unchanged after pre-isel scheduling. I think it's more likely an issue of the scheduler exposed by new model and I'll further look at why regpressure is not respected here. But here I'm curious why the register allocator not assign `r3` to `%29`.

qiucf added inline comments.Jul 25 2023, 1:28 AM

llvm/test/CodeGen/PowerPC/atomics-regression.ll
456	`lwsync` (`SYNC`) is treated as global memory barrier. But `clrlwi` (`RLWINM`) neither writes nor reads any memory, so scheduling such instruction around SYNC is okay. BTW, the new codegen is the original order.

I'd like to give extra comments on some definitions:

2.1.3 Speculative Superscalar Inner Core Organization
Out-of-order issue of up to 10 operations into the following 10 issue ports:
– Two ports to do loads or fixed-point operations.
– Two ports to do stores, fixed-point loads, or fixed-point operations.
– Two fixed-point operations
– Two issue ports shared by two floating-point, two VSX, two VMX, one crypto, and one DFP operations
– One branch operation
– One condition register operation

Sixteen execution units:
– Two symmetric load/store units (LSU), capable of executing stores, fixed-point loads, and simple fixed-point operations
– Two load-only units (LU) also capable of executing simple fixed-point operations
– Two symmetric fixed-point units (FXU)
– Four floating-point units (FPU), implemented as two 2-way SIMD operations for double- and single-precision. Scalar binary floating-point instructions can only use two FPUs.
– Two VMX execution units capable of executing simple FX, permute, complex FX, and 4-way SIMD single-precision floating-point operations
– One Crypto unit
– One decimal floating-point unit (DFU)
– One branch execution unit (BR)
– One condition register logical execution unit (CRL)

So correspondent units and ports are defined. But

For ports, we may need to 'combine' them. For example, port group 1, 2, 3 are all acceptable for fixed-point operations. So we need to create a 'parent' group for fixed-point ops and make group 1, 2, 3 all its children. Similar rule applies for fixed-point loads.
Some instructions uses 'LSU or FXU', a similar parent-child definition are also needed.

Compared to Power9 model, dispatch rules and pipeline forwarding definition are missing in this patch, because they're vague in Power8 manual.

shchenz added inline comments.Aug 1 2023, 10:32 PM

llvm/lib/Target/PowerPC/PPCScheduleP8.td
12–13	This seems like resources for issue not dispatch?
28	Don't know how do you arrange these instructions. But from the UM: `vaddfp` pipeline is `FPU` that should be using `P8_FP_4x32` and the execution unit should be `P8_FPU`?
33	I believe we should use `P8_VSX` as super of `P8_FP_4x32`. According to the instructions which uses `P8_FP_4x32`, all of them are with pipeline `FPU` that should be for the 4 FPU units. Or we can create two P8_FP_4x32 resources, one is child of `P8_FPU` and one is child of `P8_VMX`. However the child of `P8_VMX` seems have no use instructions in the "Instruction Latencies and Throughputs" sheet.
89	instruction like `STB` seems occupies two pipelines `LSU` and `LU` while seems here it only occupies `LSU` pipiline?

Rename P8_DISP to P8_ISSUE
Add scheduling info for DFP instructions
Mark resource usage of 4-way SIMD FP instructions from VMX to FPU
Add resource usage of FXU to basic load instructions

llvm/lib/Target/PowerPC/PPCScheduleP8.td
89	It occupies both: def P8_LS_LU : SchedWriteRes<[P8_LU, P8_LS]>;

Harbormaster completed remote builds in B249683: Diff 546332.Aug 2 2023, 1:21 AM

Gentle ping

Looks almost good to me. Some other comments related to the instruction definitions.

Please note the two failures in the premerge check on X86 target.

llvm/lib/Target/PowerPC/PPCScheduleP8.td
68	nit: This issue port is not used?
88	should be `def P8_LS_FP : SchedWriteRes<[P8_LSU, P8_FPU]>;`?
169	For cracked instructions, should we only use one issue port?
171	ADD_rec/SUBF_rec/NEG_rec seems can use both FXU or LSU?

qiucf updated this revision to Diff 555300.Sep 1 2023, 1:34 AM

qiucf marked 3 inline comments as done.

qiucf added inline comments.

llvm/lib/Target/PowerPC/PPCScheduleP8.td
169	I tried modelling cracked instructions, but not got good performance results, which can be future improvement item. I added a `TODO` note.
171	Thanks, moved to use only FX instead.

Harbormaster completed remote builds in B256208: Diff 555300.Sep 1 2023, 2:20 AM

shchenz added inline comments.Sep 1 2023, 3:48 AM

llvm/lib/Target/PowerPC/PPCScheduleP8.td
210	hmm, this change seems still not accurate, below is from PWR8 UM(17 March 2016) addi addis add add. subf subf. addic subfic adde addme subfme subfze neg neg. nego FXU (or LU or LSU for non-dot forms) `add.` should be able to use FXU or LU (4 hardware units). Maybe we need to define a new unit group like `P8_FX_LU` for this kind of instructions? I suggest you check other instructions as well compared with the UM. Thanks very much for the big effort.

qiucf added inline comments.Sep 3 2023, 10:03 PM

llvm/lib/Target/PowerPC/PPCScheduleP8.td
210	Per my understanding, only non-dot forms use LU/LSU?

shchenz added inline comments.Sep 4 2023, 2:06 AM

llvm/lib/Target/PowerPC/PPCScheduleP8.td
210	OK, so we have different understanding about `FXU (or LU or LSU for non-dot forms)`. Let us use yours for now until we find a clear doc about what units dot form instructions can use.
224	Seems I can not find `mflr`, is it expected?
244	lswi lswx stswi stswx (unaligned) LSU,LU lwarx ldarx LSU or LU

CRBit instructions are not defined in User Manual, remove
Fixed point compare use FX instead of CR
Reorder and rename definitions to make it cleaner
Mark tw instructions as using P8_FX_NONE
Leave TODO note in logical instructions, since the modified codegen is suspicious

Harbormaster completed remote builds in B256836: Diff 556226.Sep 7 2023, 9:08 PM

qiucf added inline comments.Sep 7 2023, 9:14 PM

llvm/lib/Target/PowerPC/PPCScheduleP8.td
244	lswi (naturally aligned) uses LSU or LU lswi lswx stswi stswx (unaligned) uses LSU,LU stswi (naturally aligned) uses LSU, LU We can't differentiate whether it's aligned or not, assume naturally aligned here.

LGTM. Thanks very much again for the very big effort.

llvm/lib/Target/PowerPC/PPCScheduleP8.td
144	I really like the idea about group the instructions by the pipeline. It will be much easier for later change for this model.

This revision is now accepted and ready to land.Sep 8 2023, 12:04 AM

This revision was landed with ongoing or failed builds.Sep 8 2023, 12:45 AM

Closed by commit rGb922a3621116: [PowerPC] Define SchedModel for Power8 (authored by qiucf). · Explain Why

This revision was automatically updated to reflect the committed changes.

qiucf added a commit: rGb922a3621116: [PowerPC] Define SchedModel for Power8.

Large Diff

This large diff affects 283 files. Files without inline comments have been collapsed. Expand All Files

Revision Contents

Path

Size

llvm/

lib/

Target/

PowerPC/

PPCScheduleP8.td

716 lines

test/

CodeGen/

PowerPC/

2006-07-07-ComputeMaskedBits.ll

6 lines

BreakableToken-reduced.ll

48 lines

CSR-fit.ll

4 lines

CompareEliminationSpillIssue.ll

2 lines

P10-stack-alignment.ll

23 lines

PR35812-neg-cmpxchg.ll

2 lines

VSX-XForm-Scalars.ll

1 line

aix-dfltabi-rsrvd-reg.ll

18 lines

aix-vsx-splatimm.ll

10 lines

aix32-p8-scalar_vector_conversions.ll

30 lines

aix_scalar_vector_permuted.ll

10 lines

all-atomics.ll

168 lines

and-extend-combine.ll

8 lines

6 lines

36 lines

224 lines

432 lines

454 lines

465 lines

404 lines

atomics-regression.ll

472 lines

bool-math.ll

6 lines

branch_coalesce.ll

4 lines

build-vector-tests.ll

340 lines

builtins-ppc-p8vector.ll

12 lines

canonical-merge-shuffles.ll

96 lines

cfence-float.ll

2 lines

coldcc2.ll

4 lines

combine-fneg.ll

4 lines

combine-sext-and-shl-after-isel.ll

4 lines

combine_ext_trunc.ll

8 lines

const-nonsplat-array-init.ll

172 lines

const-splat-array-init.ll

20 lines

constant-combines.ll

8 lines

crypto_bifs_be.ll

56 lines

csr-split.ll

8 lines

ctrloop-constrained-fp.ll

12 lines

ctrloop-fp128.ll

28 lines

cxx_tlscc64.ll

2 lines

disable-ctr-ppcf128.ll

6 lines

elf64-byval-cc.ll

34 lines

extra-toc-reg-deps.ll

4 lines

116 lines

26 lines

31 lines

4 lines

130 lines

142 lines

64 lines

28 lines

f128-truncateNconv.ll

214 lines

8 lines

116 lines

60 lines

10 lines

8 lines

fp-strict-conv-f128.ll

30 lines

16 lines

336 lines

16 lines

48 lines

fp128-bitcast-after-operation.ll

22 lines

4 lines

16 lines

62 lines

107 lines

handle-f16-storage-type.ll

33 lines

6 lines

10 lines

558 lines

14 lines

4 lines

35 lines

52 lines

load-shuffle-and-shuffle-store.ll

80 lines

memCmpUsedInZeroEqualityComparison.ll

20 lines

memcmp.ll

8 lines

memset-tail.ll

32 lines

mergeable-string-pool-large.ll

390 lines

mergeable-string-pool.ll

183 lines

mma-acc-memops.ll

194 lines

mulld.ll

2 lines

no-ctr-loop-if-exit-in-nested-loop.ll

31 lines

non-debug-mi-search-frspxsrsp.ll

16 lines

p8-isel-sched.ll

4 lines

p8-scalar_vector_conversions.ll

210 lines

peephole-align.ll

14 lines

pow-025-075-intrinsic-scalar-mass-fast.ll

118 lines

ppc-32bit-build-vector.ll

14 lines

ppc-clear-before-return.ll

85 lines

ppc-ctr-dead-code.ll

70 lines

ppc-rotate-clear.ll

175 lines

ppc-shrink-wrapping.ll

4 lines

ppc64-P9-setb.ll

458 lines

ppc64-P9-vabsd.ll

498 lines

ppc64-byval-larger-struct.ll

114 lines

ppc64-byval-multi-store.ll

34 lines

ppc64-rop-protection-aix.ll

488 lines

ppc64-rop-protection.ll

570 lines

ppc64-varargs.ll

6 lines

ppcf128-constrained-fp-intrinsics.ll

16 lines

104 lines

20 lines

112 lines

6 lines

10 lines

2 lines

62 lines

18 lines

7 lines

2 lines

78 lines

32 lines

4 lines

4 lines

12 lines

24 lines

2 lines

120 lines

reduce_scalarization.ll

22 lines

12 lines

repeated-fp-divisors.ll

28 lines

saddo-ssubo.ll

16 lines

sat-add.ll

362 lines

scalar-double-ldst.ll

570 lines

25 lines

561 lines

654 lines

714 lines

673 lines

614 lines

188 lines

scalar_vector_test_4.ll

130 lines

scalars-in-altivec-regs.ll

16 lines

scheduling-mem-dependency.ll

6 lines

select-constant-xor.ll

8 lines

28 lines

228 lines

40 lines

6 lines

2 lines

2 lines

14 lines

4 lines

srem-seteq-illegal-types.ll

104 lines

srem-vector-lkk.ll

1034 lines

stack-clash-dynamic-alloca.ll

44 lines

stack-clash-prologue.ll

24 lines

stack-restore-with-setjmp.ll

9 lines

132 lines

4 lines

40 lines

8 lines

12 lines

4 lines

test-vector-insert.ll

48 lines

testBitReverse.ll

112 lines

testComparesi32gtu.ll

16 lines

testComparesi32ltu.ll

16 lines

testComparesieqsc.ll

16 lines

testComparesieqsi.ll

16 lines

testComparesieqsll.ll

16 lines

testComparesieqss.ll

16 lines

testComparesiequc.ll

16 lines

testComparesiequi.ll

16 lines

testComparesiequll.ll

16 lines

testComparesiequs.ll

16 lines

testComparesigesc.ll

16 lines

testComparesigesi.ll

16 lines

testComparesigesll.ll

56 lines

testComparesigess.ll

16 lines

testComparesigeuc.ll

8 lines

testComparesigeui.ll

8 lines

testComparesigeull.ll

8 lines

testComparesigeus.ll

8 lines

testComparesigtsc.ll

12 lines

testComparesigtsi.ll

12 lines

testComparesigtsll.ll

32 lines

12 lines

18 lines

18 lines

18 lines

16 lines

16 lines

testComparesilesll.ll

80 lines

testComparesiless.ll

16 lines

testComparesileuc.ll

8 lines

testComparesileui.ll

8 lines

testComparesileull.ll

8 lines

testComparesileus.ll

8 lines

testComparesiltsc.ll

10 lines

testComparesiltsi.ll

10 lines

testComparesiltsll.ll

22 lines

10 lines

8 lines

8 lines

8 lines

16 lines

16 lines

testComparesinesll.ll

24 lines

testComparesiness.ll

16 lines

testComparesineuc.ll

16 lines

testComparesineui.ll

16 lines

testComparesineull.ll

24 lines

testComparesineus.ll

16 lines

testCompareslleqsc.ll

16 lines

testCompareslleqsi.ll

16 lines

testCompareslleqsll.ll

16 lines

testCompareslleqss.ll

16 lines

testComparesllequc.ll

16 lines

testComparesllequi.ll

16 lines

testComparesllequll.ll

16 lines

testComparesllequs.ll

16 lines

testComparesllgesc.ll

16 lines

testComparesllgesi.ll

16 lines

testComparesllgesll.ll

56 lines

testComparesllgess.ll

16 lines

testComparesllgeuc.ll

8 lines

testComparesllgeui.ll

8 lines

testComparesllgeull.ll

8 lines

testComparesllgeus.ll

8 lines

testComparesllgtsll.ll

32 lines

testComparesllgtuc.ll

18 lines

testComparesllgtui.ll

18 lines

testComparesllgtus.ll

18 lines

testCompareslllesc.ll

16 lines

testCompareslllesi.ll

16 lines

testCompareslllesll.ll

80 lines

testComparesllless.ll

16 lines

testComparesllleuc.ll

8 lines

testComparesllleui.ll

8 lines

testComparesllleull.ll

8 lines

testComparesllleus.ll

8 lines

testComparesllltsll.ll

22 lines

testComparesllltuc.ll

8 lines

testComparesllltui.ll

12 lines

testComparesllltus.ll

8 lines

testComparesllnesll.ll

24 lines

testComparesllneull.ll

24 lines

18 lines

12 lines

4 lines

10 lines

urem-seteq-illegal-types.ll

116 lines

urem-vector-lkk.ll

680 lines

v16i8_scalar_to_vector_shuffle.ll

322 lines

v2i64_scalar_to_vector_shuffle.ll

258 lines

v4i32_scalar_to_vector_shuffle.ll

286 lines

v8i16_scalar_to_vector_shuffle.ll

334 lines

variable_elem_vec_extracts.ll

8 lines

12 lines

158 lines

6 lines

64 lines

16 lines

40 lines

vec_conv_fp32_to_i16_elts.ll

500 lines

vec_conv_fp32_to_i64_elts.ll

216 lines

vec_conv_fp32_to_i8_elts.ll

508 lines

vec_conv_fp64_to_i16_elts.ll

436 lines

vec_conv_fp64_to_i32_elts.ll

184 lines

vec_conv_fp64_to_i8_elts.ll

432 lines

vec_conv_fp_to_i_4byte_elts.ll

28 lines

vec_conv_fp_to_i_8byte_elts.ll

104 lines

vec_conv_i16_to_fp32_elts.ll

128 lines

vec_conv_i16_to_fp64_elts.ll

368 lines

vec_conv_i32_to_fp64_elts.ll

204 lines

vec_conv_i64_to_fp32_elts.ll

232 lines

vec_conv_i8_to_fp32_elts.ll

166 lines

vec_conv_i8_to_fp64_elts.ll

394 lines

vec_conv_i_to_fp_4byte_elts.ll

28 lines

vec_conv_i_to_fp_8byte_elts.ll

104 lines

vec_insert_elt.ll

274 lines

vec_select.ll

30 lines

vec_shuffle_p8vector_le.ll

4 lines

vector-constrained-fp-intrinsics.ll

1329 lines

34 lines

8 lines

54 lines

10 lines

wide-scalar-shift-by-byte-multiple-legalization.ll

134 lines

wide-scalar-shift-legalization.ll

242 lines

Diff 556234

llvm/lib/Target/PowerPC/PPCScheduleP8.td

	//===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---- tablegen --===//			//===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---- tablegen --===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//
	//			//
	// This file defines the itinerary class data for the POWER8 processor.			// This file defines the SchedModel for the POWER8 processor.
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	// Scheduling for the P8 involves tracking two types of resources:
	// 1. The dispatch bundle slots
	// 2. The functional unit resources

	// Dispatch units:
	def P8_DU1 : FuncUnit;
	def P8_DU2 : FuncUnit;
	def P8_DU3 : FuncUnit;
	def P8_DU4 : FuncUnit;
	def P8_DU5 : FuncUnit;
	def P8_DU6 : FuncUnit;
	def P8_DU7 : FuncUnit; // Only branch instructions will use DU7,DU8
	def P8_DU8 : FuncUnit;

	// 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).

	def P8_LU1 : FuncUnit; // Loads or fixed-point operations 1
	def P8_LU2 : FuncUnit; // Loads or fixed-point operations 2

	// Load/Store pipelines can handle Stores, fixed-point loads, and simple
	// fixed-point operations.
	def P8_LSU1 : FuncUnit; // Load/Store pipeline 1
	def P8_LSU2 : FuncUnit; // Load/Store pipeline 2

	// Fixed Point unit
	def P8_FXU1 : FuncUnit; // FX pipeline 1
	def P8_FXU2 : FuncUnit; // FX pipeline 2

	// The Floating-Point Unit (FPU) and Vector Media Extension (VMX) units
	// are combined on P7 and newer into a Vector Scalar Unit (VSU).
	// The P8 Instruction latency documents still refers to the unit as the
	// FPU, so keep in mind that FPU==VSU.
	// In contrast to the P7, the VMX units on P8 are symmetric, so no need to
	// split vector integer ops or 128-bit load/store/perms to the specific units.
	def P8_FPU1 : FuncUnit; // VS pipeline 1
	def P8_FPU2 : FuncUnit; // VS pipeline 2

	def P8_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs)
	def P8_BRU : FuncUnit; // BR unit

	def P8Itineraries : ProcessorItineraries<
	[P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6, P8_DU7, P8_DU8,
	P8_LU1, P8_LU2, P8_LSU1, P8_LSU2, P8_FXU1, P8_FXU2,
	P8_FPU1, P8_FPU2, P8_CRU, P8_BRU], [], [
	InstrItinData<IIC_IntSimple , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2,
	P8_LU1, P8_LU2,
	P8_LSU1, P8_LSU2]>],
	[1, 1, 1]>,
	InstrItinData<IIC_IntGeneral , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2, P8_LU1,
	P8_LU2, P8_LSU1, P8_LSU2]>],
	[1, 1, 1]>,
	InstrItinData<IIC_IntISEL, [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
	InstrStage<1, [P8_BRU]>],
	[1, 1, 1, 1]>,
	InstrItinData<IIC_IntCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[1, 1, 1]>,
	InstrItinData<IIC_IntDivW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<15, [P8_FXU1, P8_FXU2]>],
	[15, 1, 1]>,
	InstrItinData<IIC_IntDivD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<23, [P8_FXU1, P8_FXU2]>],
	[23, 1, 1]>,
	InstrItinData<IIC_IntMulHW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[4, 1, 1]>,
	InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[4, 1, 1]>,
	InstrItinData<IIC_IntMulHD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[4, 1, 1]>,
	InstrItinData<IIC_IntMulLI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[4, 1, 1]>,
	InstrItinData<IIC_IntRotate , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[1, 1, 1]>,
	InstrItinData<IIC_IntRotateD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[1, 1, 1]>,
	InstrItinData<IIC_IntRotateDI , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[1, 1, 1]>,
	InstrItinData<IIC_IntShift , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[1, 1, 1]>,
	InstrItinData<IIC_IntTrapW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[1, 1]>,
	InstrItinData<IIC_IntTrapD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[1, 1]>,
	InstrItinData<IIC_BrB , [InstrStage<1, [P8_DU7, P8_DU8], 0>,
	InstrStage<1, [P8_BRU]>],
	[3, 1, 1]>,
	// FIXME - the Br* groups below are not branch related, so should probably
	// be renamed.
	// IIC_BrCR consists of the cr* instructions. (crand,crnor,creqv, etc).
	// and should be 'First' in dispatch.
	InstrItinData<IIC_BrCR , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_CRU]>],
	[3, 1, 1]>,
	// IIC_BrMCR consists of the mcrf instruction.
	InstrItinData<IIC_BrMCR , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_CRU]>],
	[3, 1, 1]>,
	// IIC_BrMCRX consists of mcrxr (obsolete instruction) and mtcrf, which
	// should be first in the dispatch group.
	InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[3, 1, 1]>,
	InstrItinData<IIC_BrMCRX , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[3, 1]>,
	InstrItinData<IIC_LdStLoad , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_LSU1, P8_LSU2,
	P8_LU1, P8_LU2]>],
	[2, 1, 1]>,
	InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_DU2], 0>,
	InstrStage<1, [P8_LSU1, P8_LSU2,
	P8_LU1, P8_LU2 ], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[2, 2, 1, 1]>,
	// Update-Indexed form loads/stores are no longer first and last in the
	// dispatch group. They are simply cracked, so require DU1,DU2.
	InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_DU2], 0>,
	InstrStage<1, [P8_LSU1, P8_LSU2,
	P8_LU1, P8_LU2], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[3, 3, 1, 1]>,
	InstrItinData<IIC_LdStLD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_LSU1, P8_LSU2,
	P8_LU1, P8_LU2]>],
	[2, 1, 1]>,
	InstrItinData<IIC_LdStLDU , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_DU2], 0>,
	InstrStage<1, [P8_LSU1, P8_LSU2,
	P8_LU1, P8_LU2], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[2, 2, 1, 1]>,
	InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_DU2], 0>,
	InstrStage<1, [P8_LSU1, P8_LSU2,
	P8_LU1, P8_LU2], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[3, 3, 1, 1]>,
	InstrItinData<IIC_LdStLFD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_LU1, P8_LU2]>],
	[3, 1, 1]>,
	InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_LU1, P8_LU2]>],
	[3, 1, 1]>,
	InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_DU2], 0>,
	InstrStage<1, [P8_LU1, P8_LU2], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[3, 3, 1, 1]>,
	InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_DU2], 0>,
	InstrStage<1, [P8_LU1, P8_LU2], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[3, 3, 1, 1]>,
	InstrItinData<IIC_LdStLHA , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_DU2], 0>,
	InstrStage<1, [P8_LSU1, P8_LSU2,
	P8_LU1, P8_LU2], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2,
	P8_LU1, P8_LU2]>],
	[3, 1, 1]>,
	InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_DU2], 0>,
	InstrStage<1, [P8_LSU1, P8_LSU2,
	P8_LU1, P8_LU2], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[4, 4, 1, 1]>,
	// first+last in dispatch group.
	InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_DU2], 0>,
	InstrStage<1, [P8_DU3], 0>,
	InstrStage<1, [P8_DU4], 0>,
	InstrStage<1, [P8_DU5], 0>,
	InstrStage<1, [P8_DU6], 0>,
	InstrStage<1, [P8_LSU1, P8_LSU2,
	P8_LU1, P8_LU2], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[4, 4, 1, 1]>,
	InstrItinData<IIC_LdStLWA , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_DU2], 0>,
	InstrStage<1, [P8_LSU1, P8_LSU2,
	P8_LU1, P8_LU2]>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[3, 1, 1]>,
	InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_DU2], 0>,
	InstrStage<1, [P8_DU3], 0>,
	InstrStage<1, [P8_DU4], 0>,
	InstrStage<1, [P8_LSU1, P8_LSU2,
	P8_LU1, P8_LU2]>],
	[3, 1, 1]>,
	// first+last
	InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_DU2], 0>,
	InstrStage<1, [P8_DU3], 0>,
	InstrStage<1, [P8_DU4], 0>,
	InstrStage<1, [P8_DU5], 0>,
	InstrStage<1, [P8_DU6], 0>,
	InstrStage<1, [P8_LSU1, P8_LSU2,
	P8_LU1, P8_LU2]>],
	[3, 1, 1]>,
	InstrItinData<IIC_LdStLMW , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_LSU1, P8_LSU2,
	P8_LU1, P8_LU2]>],
	[2, 1, 1]>,
	// Stores are dual-issued from the issue queue, so may only take up one
	// dispatch slot. The instruction will be broken into two IOPS. The agen
	// op is issued to the LSU, and the data op (register fetch) is issued
	// to either the LU (GPR store) or the VSU (FPR store).
	InstrItinData<IIC_LdStStore , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_LSU1, P8_LSU2]>,
	InstrStage<1, [P8_LU1, P8_LU2]>],
	[1, 1, 1]>,
	InstrItinData<IIC_LdStSTD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_LU1, P8_LU2,
	P8_LSU1, P8_LSU2]>]
	[1, 1, 1]>,
	InstrItinData<IIC_LdStSTU , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_DU2], 0>,
	InstrStage<1, [P8_LU1, P8_LU2,
	P8_LSU1, P8_LSU2], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[2, 1, 1, 1]>,
	// First+last
	InstrItinData<IIC_LdStSTUX , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_DU2], 0>,
	InstrStage<1, [P8_DU3], 0>,
	InstrStage<1, [P8_DU4], 0>,
	InstrStage<1, [P8_DU5], 0>,
	InstrStage<1, [P8_DU6], 0>,
	InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[2, 1, 1, 1]>,
	InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
	InstrStage<1, [P8_FPU1, P8_FPU2]>],
	[1, 1, 1]>,
	InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_DU2], 0>,
	InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2], 0>,
	InstrStage<1, [P8_FPU1, P8_FPU2]>],
	[2, 1, 1, 1]>,
	InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
	InstrStage<1, [P8_FPU1, P8_FPU2]>],
	[1, 1, 1]>,
	InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_DU2], 0>,
	InstrStage<1, [P8_DU3], 0>,
	InstrStage<1, [P8_DU4], 0>,
	InstrStage<1, [P8_DU5], 0>,
	InstrStage<1, [P8_DU6], 0>,
	InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
	InstrStage<1, [P8_LU1, P8_LU2]>],
	[1, 1, 1]>,
	InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_DU2], 0>,
	InstrStage<1, [P8_DU3], 0>,
	InstrStage<1, [P8_DU4], 0>,
	InstrStage<1, [P8_DU5], 0>,
	InstrStage<1, [P8_DU6], 0>,
	InstrStage<1, [P8_LSU1, P8_LSU2], 0>,
	InstrStage<1, [P8_LU1, P8_LU2]>],
	[1, 1, 1]>,
	InstrItinData<IIC_SprMFCR , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_CRU]>],
	[6, 1]>,
	InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_CRU]>],
	[3, 1]>,
	InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_FXU1, P8_FXU2]>],
	[4, 1]>, // mtctr
	InstrItinData<IIC_FPGeneral , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FPU1, P8_FPU2]>],
	[5, 1, 1]>,
	InstrItinData<IIC_FPAddSub , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FPU1, P8_FPU2]>],
	[5, 1, 1]>,
	InstrItinData<IIC_FPCompare , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FPU1, P8_FPU2]>],
	[8, 1, 1]>,
	InstrItinData<IIC_FPDivD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FPU1, P8_FPU2]>],
	[33, 1, 1]>,
	InstrItinData<IIC_FPDivS , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FPU1, P8_FPU2]>],
	[27, 1, 1]>,
	InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FPU1, P8_FPU2]>],
	[44, 1, 1]>,
	InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FPU1, P8_FPU2]>],
	[32, 1, 1]>,
	InstrItinData<IIC_FPFused , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FPU1, P8_FPU2]>],
	[5, 1, 1, 1]>,
	InstrItinData<IIC_FPRes , [InstrStage<1, [P8_DU1, P8_DU2, P8_DU3,
	P8_DU4, P8_DU5, P8_DU6], 0>,
	InstrStage<1, [P8_FPU1, P8_FPU2]>],
	[5, 1, 1]>,
	InstrItinData<IIC_VecGeneral , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_FPU1, P8_FPU2]>],
	[2, 1, 1]>,
	InstrItinData<IIC_VecVSL , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_FPU1, P8_FPU2]>],
	[2, 1, 1]>,
	InstrItinData<IIC_VecVSR , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_FPU1, P8_FPU2]>],
	[2, 1, 1]>,
	InstrItinData<IIC_VecFP , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_FPU1, P8_FPU2]>],
	[6, 1, 1]>,
	InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_FPU1, P8_FPU2]>],
	[6, 1, 1]>,
	InstrItinData<IIC_VecFPRound , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_FPU1, P8_FPU2]>],
	[6, 1, 1]>,
	InstrItinData<IIC_VecComplex , [InstrStage<1, [P8_DU1], 0>,
	InstrStage<1, [P8_FPU1, P8_FPU2]>],
	[7, 1, 1]>,
	InstrItinData<IIC_VecPerm , [InstrStage<1, [P8_DU1, P8_DU2], 0>,
	InstrStage<1, [P8_FPU1, P8_FPU2]>],
	[3, 1, 1]>
	]>;

	// ===---------------------------------------------------------------------===//
	// P8 machine model for scheduling and other instruction cost heuristics.
	// P8 has an 8 insn dispatch group (6 non-branch, 2 branch) and can issue up
	// to 10 insns per cycle (2-LU, 2-LSU, 2-FXU, 2-FPU, 1-CRU, 1-BRU).

	def P8Model : SchedMachineModel {			def P8Model : SchedMachineModel {
				shchenzUnsubmitted Done Reply Inline Actions This seems like resources for issue not dispatch? shchenz: This seems like resources for issue not dispatch?
	let IssueWidth = 8; // up to 8 instructions dispatched per cycle.			let IssueWidth = 8;
	// up to six non-branch instructions.			let LoadLatency = 3;
	// up to two branches in a dispatch group.

	let LoadLatency = 3; // Optimistic load latency assuming bypass.
	// This is overriden by OperandCycles if the
	// Itineraries are queried instead.
	let MispredictPenalty = 16;			let MispredictPenalty = 16;

	// Try to make sure we have at least 10 dispatch groups in a loop.
	let LoopMicroOpBufferSize = 60;			let LoopMicroOpBufferSize = 60;
				let MicroOpBufferSize = 64;
				shchenzUnsubmitted Done Reply Inline Actions why the `MicroOpBufferSize` is 60? The pwr8 UM seems showing this value as 32 * 2? shchenz: why the `MicroOpBufferSize` is 60? The pwr8 UM seems showing this value as 32 * 2?
				// TODO: Due to limitation of instruction definitions, non-P8 instructions
				shchenzUnsubmitted Done Reply Inline Actions We need to comment here why on pwr8 this field is set as 0. shchenz: We need to comment here why on pwr8 this field is set as 0.
				// are required to be listed here. Change this after it got fixed.
	let CompleteModel = 0;			let CompleteModel = 0;
				let UnsupportedFeatures = [HasSPE, PrefixInstrs, MMA,
				PairedVectorMemops, PCRelativeMemops,
				IsISA3_0, IsISA3_1, IsISAFuture];
				}

	let Itineraries = P8Itineraries;			let SchedModel = P8Model in {
				// Power8 Pipeline Units:
				shchenzUnsubmitted Done Reply Inline Actions Don't know how do you arrange these instructions. But from the UM: `vaddfp` pipeline is `FPU` that should be using `P8_FP_4x32` and the execution unit should be `P8_FPU`? shchenz: Don't know how do you arrange these instructions. But from the UM: `vaddfp` pipeline is `FPU`…

				def P8_LU_LS_FX : ProcResource<6>;
				def P8_LU_LS : ProcResource<4> { let Super = P8_LU_LS_FX; }
				def P8_LS : ProcResource<2> { let Super = P8_LU_LS; }
				shchenzUnsubmitted Not Done Reply Inline Actions Sorry, this still confuses me. The super is `P8_FPU` and `P8_FPU` has 4 execution units which can be executed as 2 way SIMD operation for double. So why we set the ProcResource value to 4 here? shchenz: Sorry, this still confuses me. The super is `P8_FPU` and `P8_FPU` has 4 execution units which…
				qiucfAuthorUnsubmitted Done Reply Inline Actions Thanks, updated. qiucf: Thanks, updated.
				def P8_LU : ProcResource<2> { let Super = P8_LU_LS; }
				shchenzUnsubmitted Not Done Reply Inline Actions Setting `P8_VMX` as super of "4xSingle" also seems weird. In ISA of pwr8, I think most instructions that handle 4xSingle type are VSX related and from the UM "10.3.2 Instructional Latencies and Throughputs", I saw most of them are using pipeline `FPU`. So I guess, we should use `P8_FPU` as Super instead? shchenz: Setting `P8_VMX` as super of "4xSingle" also seems weird. In ISA of pwr8, I think most…
				qiucfAuthorUnsubmitted Done Reply Inline Actions 2.1.3 Speculative Superscalar Inner Core Organization: Two VMX execution units capable of executing simple FX, permute, complex FX, and 4-way SIMD single-precision floating-point operations I think the reason is before VSX, Altivec already had instructions for 4xSingle vectors and they are implemented within VMX units. So their VSX equivalents uses the same execution units. qiucf: > 2.1.3 Speculative Superscalar Inner Core Organization: > > - Two VMX execution units…
				shchenzUnsubmitted Done Reply Inline Actions I believe we should use `P8_VSX` as super of `P8_FP_4x32`. According to the instructions which uses `P8_FP_4x32`, all of them are with pipeline `FPU` that should be for the 4 FPU units. Or we can create two P8_FP_4x32 resources, one is child of `P8_FPU` and one is child of `P8_VMX`. However the child of `P8_VMX` seems have no use instructions in the "Instruction Latencies and Throughputs" sheet. shchenz: I believe we should use `P8_VSX` as super of `P8_FP_4x32`. According to the instructions which…
				def P8_FX : ProcResource<2> { let Super = P8_LU_LS_FX; }
				def P8_DFU : ProcResource<1>;
				def P8_BR : ProcResource<1> { let BufferSize = 16; }
				def P8_CY : ProcResource<1>;
				def P8_CRL : ProcResource<1>;
				def P8_VMX : ProcResource<2>;
				def P8_PM : ProcResource<2> {
				// This is workaround for scheduler to respect latency of long permute chain.
				let BufferSize = 1;
				let Super = P8_VMX;
	}			}
				def P8_XS : ProcResource<2> { let Super = P8_VMX; }
				def P8_VX : ProcResource<2> { let Super = P8_VMX; }
				def P8_FPU : ProcResource<4>;
				shchenzUnsubmitted Done Reply Inline Actions minor: _1, _2, _4 does not sound like a good name... shchenz: minor: _1, _2, _4 does not sound like a good name...
				// Units for scalar, 2xDouble and 4xSingle
				shchenzUnsubmitted Not Done Reply Inline Actions Why this unit number for type "2xdouble" is set to 4? Its parent `P8_VMX` only has 2. shchenz: Why this unit number for type "2xdouble" is set to 4? Its parent `P8_VMX` only has 2.
				qiucfAuthorUnsubmitted Done Reply Inline Actions Its parent `P8_FPU` has 4 units. qiucf: Its parent `P8_FPU` has 4 units.
				def P8_FP_Scal : ProcResource<2> { let Super = P8_FPU; }
				def P8_FP_2x64 : ProcResource<2> { let Super = P8_FPU; }
				def P8_FP_4x32 : ProcResource<2> { let Super = P8_FPU; }

				// Power8 Dispatch Ports:
				// Two ports to do loads or fixed-point operations.
				// Two ports to do stores, fixed-point loads, or fixed-point operations.
				// Two ports for fixed-point operations.
				// Two issue ports shared by 2 DFP/2 VSX/2 VMX/1 CY/1 DFP operations.
				// One for branch operations.
				// One for condition register operations.

				// TODO: Model dispatch of cracked instructions.
				shchenzUnsubmitted Done Reply Inline Actions Same with the name issue _1, ..._6. shchenz: Same with the name issue _1, ..._6.

				// Six ports in total are available for fixed-point operations.
				def P8_PORT_ALLFX : ProcResource<6>;
				// Four ports in total are available for fixed-point load operations.
				def P8_PORT_FXLD : ProcResource<4> { let Super = P8_PORT_ALLFX; }
				// Two ports to do loads or fixed-point operations.
				def P8_PORT_LD_FX : ProcResource<2> { let Super = P8_PORT_FXLD; }
				shchenzUnsubmitted Done Reply Inline Actions nit: This issue port is not used? shchenz: nit: This issue port is not used?
				// Two ports to do stores, fixed-point loads, or fixed-point operations.
				def P8_PORT_ST_FXLD_FX : ProcResource<2> { let Super = P8_PORT_FXLD; }
				// Two issue ports shared by two floating-point, two VSX, two VMX, one crypto,
				// and one DFP operations.
				def P8_PORT_VMX_FP : ProcResource<2>;
				// One port for branch operation.
				def P8_PORT_BR : ProcResource<1>;
				// One port for condition register operation.
				def P8_PORT_CR : ProcResource<1>;

				def P8_ISSUE_FX : SchedWriteRes<[P8_PORT_ALLFX]>;
				def P8_ISSUE_FXLD : SchedWriteRes<[P8_PORT_FXLD]>;
				def P8_ISSUE_LD : SchedWriteRes<[P8_PORT_LD_FX]>;
				def P8_ISSUE_ST : SchedWriteRes<[P8_PORT_ST_FXLD_FX]>;
				def P8_ISSUE_VSX : SchedWriteRes<[P8_PORT_VMX_FP]>;
				def P8_ISSUE_BR : SchedWriteRes<[P8_PORT_BR]>;
				def P8_ISSUE_CR : SchedWriteRes<[P8_PORT_CR]>;

				// Power8 Instruction Latency & Port Groups:

				shchenzUnsubmitted Done Reply Inline Actions should be `def P8_LS_FP : SchedWriteRes<[P8_LSU, P8_FPU]>;`? shchenz: should be `def P8_LS_FP : SchedWriteRes<[P8_LSU, P8_FPU]>;`?
				def P8_LS_LU_NONE : SchedWriteRes<[P8_LU, P8_LS]>;
				shchenzUnsubmitted Not Done Reply Inline Actions instruction like `STB` seems occupies two pipelines `LSU` and `LU` while seems here it only occupies `LSU` pipiline? shchenz: instruction like `STB` seems occupies two pipelines `LSU` and `LU` while seems here it only…
				qiucfAuthorUnsubmitted Done Reply Inline Actions It occupies both: def P8_LS_LU : SchedWriteRes<[P8_LU, P8_LS]>; qiucf: It occupies both: ``` def P8_LS_LU : SchedWriteRes<[P8_LU, P8_LS]>; ```
				def P8_LS_FP_NONE : SchedWriteRes<[P8_LS, P8_FPU]>;
				def P8_LU_or_LS_3C : SchedWriteRes<[P8_LU_LS]> { let Latency = 3; }
				def P8_LS_FX_3C : SchedWriteRes<[P8_LS, P8_FX]> { let Latency = 3; }
				def P8_LU_or_LS_or_FX_2C : SchedWriteRes<[P8_LU_LS_FX]> { let Latency = 2; }
				def P8_LU_or_LS_FX_3C : SchedWriteRes<[P8_LU_LS, P8_FX]> { let Latency = 3; }
				def P8_FX_NONE : SchedWriteRes<[P8_FX]>;
				def P8_FX_1C : SchedWriteRes<[P8_FX]> { let Latency = 1; }
				def P8_FX_2C : SchedWriteRes<[P8_FX]> { let Latency = 2; }
				def P8_FX_3C : SchedWriteRes<[P8_FX]> { let Latency = 3; }
				def P8_FX_5C : SchedWriteRes<[P8_FX]> { let Latency = 5; }
				def P8_FX_10C : SchedWriteRes<[P8_FX]> { let Latency = 10; }
				def P8_FX_23C : SchedWriteRes<[P8_FX]> { let Latency = 23; }
				def P8_FX_15C : SchedWriteRes<[P8_FX]> { let Latency = 15; }
				def P8_FX_41C : SchedWriteRes<[P8_FX]> { let Latency = 41; }
				def P8_BR_2C : SchedWriteRes<[P8_BR]> { let Latency = 2; }
				def P8_CR_NONE : SchedWriteRes<[P8_CRL]>;
				def P8_CR_3C : SchedWriteRes<[P8_CRL]> { let Latency = 3; }
				def P8_CR_5C : SchedWriteRes<[P8_CRL]> { let Latency = 5; }
				def P8_LU_5C : SchedWriteRes<[P8_LU]> { let Latency = 5; }
				def P8_LU_FX_5C : SchedWriteRes<[P8_LU, P8_FX]> { let Latency = 5; }
				def P8_LS_FP_FX_2C : SchedWriteRes<[P8_LS, P8_FPU, P8_FX]> { let Latency = 2; }
				def P8_LS_FP_FX_3C : SchedWriteRes<[P8_LS, P8_FPU, P8_FX]> { let Latency = 3; }
				def P8_LS_3C : SchedWriteRes<[P8_LS]> { let Latency = 3; }
				def P8_FP_3C : SchedWriteRes<[P8_FPU]> { let Latency = 3; }
				def P8_FP_Scal_6C : SchedWriteRes<[P8_FP_Scal]> { let Latency = 6; }
				def P8_FP_4x32_6C : SchedWriteRes<[P8_FP_4x32]> { let Latency = 6; }
				def P8_FP_2x64_6C : SchedWriteRes<[P8_FP_2x64]> { let Latency = 6; }
				def P8_FP_26C : SchedWriteRes<[P8_FP_Scal]> { let Latency = 26; }
				def P8_FP_28C : SchedWriteRes<[P8_FP_4x32]> { let Latency = 28; }
				def P8_FP_31C : SchedWriteRes<[P8_FP_Scal]> { let Latency = 31; }
				def P8_FP_Scal_32C : SchedWriteRes<[P8_FP_Scal]> { let Latency = 32; }
				def P8_FP_2x64_32C : SchedWriteRes<[P8_FP_2x64]> { let Latency = 32; }
				def P8_FP_4x32_32C : SchedWriteRes<[P8_FP_4x32]> { let Latency = 32; }
				def P8_FP_Scal_43C : SchedWriteRes<[P8_FP_Scal]> { let Latency = 43; }
				def P8_FP_2x64_43C : SchedWriteRes<[P8_FP_2x64]> { let Latency = 43; }
				def P8_XS_2C : SchedWriteRes<[P8_XS]> { let Latency = 2; }
				def P8_PM_2C : SchedWriteRes<[P8_PM]> { let Latency = 2; }
				def P8_XS_4C : SchedWriteRes<[P8_XS]> { let Latency = 4; }
				def P8_VX_7C : SchedWriteRes<[P8_VX]> { let Latency = 7; }
				def P8_XS_9C : SchedWriteRes<[P8_XS]> { let Latency = 9; }
				def P8_CY_6C : SchedWriteRes<[P8_CY]> { let Latency = 6; }
				def P8_DFU_13C : SchedWriteRes<[P8_DFU]> { let Latency = 13; }
				def P8_DFU_15C : SchedWriteRes<[P8_DFU]> { let Latency = 15; }
				def P8_DFU_17C : SchedWriteRes<[P8_DFU]> { let Latency = 17; }
				def P8_DFU_25C : SchedWriteRes<[P8_DFU]> { let Latency = 25; }
				def P8_DFU_32C : SchedWriteRes<[P8_DFU]> { let Latency = 32; }
				def P8_DFU_34C : SchedWriteRes<[P8_DFU]> { let Latency = 34; }
				def P8_DFU_40C : SchedWriteRes<[P8_DFU]> { let Latency = 40; }
				def P8_DFU_90C : SchedWriteRes<[P8_DFU]> { let Latency = 90; }
				def P8_DFU_96C : SchedWriteRes<[P8_DFU]> { let Latency = 96; }
				def P8_DFU_172C : SchedWriteRes<[P8_DFU]> { let Latency = 172; }
				// Direct move instructions
				def P8_DM_5C : SchedWriteRes<[]> { let Latency = 5; }

				// Instructions of CR pipeline
				shchenzUnsubmitted Not Done Reply Inline Actions I really like the idea about group the instructions by the pipeline. It will be much easier for later change for this model. shchenz: I really like the idea about group the instructions by the pipeline. It will be much easier for…

				def : InstRW<[P8_CR_NONE, P8_ISSUE_CR], (instrs MFCR, MFCR8)>;
				def : InstRW<[P8_CR_3C, P8_ISSUE_CR], (instrs MFOCRF, MFOCRF8)>;
				def : InstRW<[P8_CR_5C, P8_ISSUE_CR], (instrs MFLR, MFLR8, MFCTR, MFCTR8)>;

				// Instructions of CY pipeline

				def : InstRW<[P8_CY_6C, P8_ISSUE_VSX], (instrs
				VCIPHER, VCIPHERLAST, VNCIPHER, VNCIPHERLAST, VPMSUMB, VPMSUMD, VPMSUMH, VPMSUMW, VSBOX)>;

				// Instructions of FPU pipeline

				def : InstRW<[P8_FP_26C, P8_ISSUE_VSX], (instrs (instregex "^FDIVS(_rec)?$"), XSDIVSP)>;
				def : InstRW<[P8_FP_28C, P8_ISSUE_VSX], (instrs XVDIVSP)>;
				def : InstRW<[P8_FP_31C, P8_ISSUE_VSX], (instregex "^FSQRTS(_rec)?$")>;
				def : InstRW<[P8_FP_Scal_32C, P8_ISSUE_VSX], (instrs FDIV, FDIV_rec, XSDIVDP)>;
				def : InstRW<[P8_FP_2x64_32C, P8_ISSUE_VSX], (instrs XVDIVDP)>;
				def : InstRW<[P8_FP_4x32_32C, P8_ISSUE_VSX], (instrs XVSQRTSP)>;
				def : InstRW<[P8_FP_Scal_43C, P8_ISSUE_VSX], (instrs FSQRT, FSQRT_rec, XSSQRTDP)>;
				def : InstRW<[P8_FP_2x64_43C, P8_ISSUE_VSX], (instrs XVSQRTDP)>;

				def : InstRW<[P8_FP_3C, P8_ISSUE_VSX], (instrs
				MTFSFI_rec, MTFSF_rec, MTFSFI, MTFSFIb, MTFSF, MTFSFb, MTFSB0, MTFSB1)>;

				def : InstRW<[P8_FP_Scal_6C, P8_ISSUE_VSX], (instrs
				shchenzUnsubmitted Not Done Reply Inline Actions For cracked instructions, should we only use one issue port? shchenz: For cracked instructions, should we only use one issue port?
				qiucfAuthorUnsubmitted Done Reply Inline Actions I tried modelling cracked instructions, but not got good performance results, which can be future improvement item. I added a `TODO` note. qiucf: I tried modelling cracked instructions, but not got good performance results, which can be…
				(instregex "^F(N)?M(ADD\|SUB)(S)?(_rec)?$"),
				(instregex "^XS(N)?M(ADD\|SUB)(A\|M)(D\|S)P$"),
				shchenzUnsubmitted Done Reply Inline Actions ADD_rec/SUBF_rec/NEG_rec seems can use both FXU or LSU? shchenz: ADD_rec/SUBF_rec/NEG_rec seems can use both FXU or LSU?
				qiucfAuthorUnsubmitted Done Reply Inline Actions Thanks, moved to use only FX instead. qiucf: Thanks, moved to use only FX instead.
				(instregex "^FC(F\|T)I(D\|W)(U)?(S\|Z)?(_rec)?$"),
				(instregex "^(F\|XS)(ABS\|CPSGN\|ADD\|MUL\|NABS\|RE\|NEG\|SUB\|SEL\|RSQRTE)(D\|S)?(P)?(s)?(_rec)?$"),
				(instregex "^FRI(M\|N\|P\|Z)(D\|S)(_rec)?$"),
				(instregex "^XSCVDP(S\|U)X(W\|D)S(s)?$"),
				(instregex "^XSCV(S\|U)XD(D\|S)P$"),
				(instregex "^XSCV(D\|S)P(S\|D)P(N)?$"),
				(instregex "^XSRDPI(C\|M\|P\|Z)?$"),
				FMR, FRSP, FMR_rec, FRSP_rec, XSRSP)>;

				def : InstRW<[P8_FP_4x32_6C, P8_ISSUE_VSX], (instrs
				(instregex "^XV(N)?M(ADD\|SUB)(A\|M)SP$"),
				(instregex "^VRFI(M\|N\|P\|Z)$"),
				XVRSQRTESP, XVSUBSP, VADDFP, VEXPTEFP, VLOGEFP, VMADDFP, VNMSUBFP, VREFP,
				VRSQRTEFP, VSUBFP, XVCVSXWSP, XVCVUXWSP, XVMULSP, XVNABSSP, XVNEGSP, XVRESP,
				XVCVDPSP, XVCVSXDSP, XVCVUXDSP, XVABSSP, XVADDSP, XVCPSGNSP)>;

				def : InstRW<[P8_FP_2x64_6C, P8_ISSUE_VSX], (instrs
				(instregex "^XVR(D\|S)PI(C\|M\|P\|Z)?$"),
				(instregex "^XVCV(S\|U)X(D\|W)DP$"),
				(instregex "^XVCV(D\|W\|S)P(S\|U)X(D\|W)S$"),
				(instregex "^XV(N)?(M)?(RSQRTE\|CPSGN\|SUB\|ADD\|ABS\|UL\|NEG\|RE)(A\|M)?DP$"),
				XVCVSPDP)>;

				// Instructions of FX, LU or LS pipeline

				def : InstRW<[P8_FX_NONE, P8_ISSUE_FX], (instrs TDI, TWI, TD, TW, MTCRF, MTCRF8, MTOCRF, MTOCRF8)>;
				def : InstRW<[P8_FX_1C, P8_ISSUE_FX], (instregex "^RLWIMI(8)?$")>;
				// TODO: Pipeline of logical instructions might be LS or FX
				def : InstRW<[P8_FX_2C, P8_ISSUE_FX], (instrs
				(instregex "^(N\|X)?(EQV\|AND\|OR)(I)?(S\|C)?(8)?(_rec)?$"),
				(instregex "^EXTS(B\|H\|W)(8)?(_32)?(_64)?(_rec)?$"),
				(instregex "^RL(D\|W)(I)?(NM\|C)(L\|R)?(8)?(_32)?(_64)?(_rec)?$"),
				(instregex "^S(L\|R)(A)?(W\|D)(I)?(8)?(_rec\|_32)?$"),
				(instregex "^(ADD\|SUBF)(M\|Z)?(C\|E)?(4\|8)?O(_rec)?$"),
				(instregex "^(ADD\|SUBF)(M\|Z)?E(8)?_rec$"),
				(instregex "^(ADD\|SUBF\|NEG)(4\|8)?_rec$"),
				NOP, ADDG6S, ADDG6S8, ADDZE, ADDZE8, ADDIC_rec, NEGO_rec, ADDC, ADDC8, SUBFC, SUBFC8,
				ADDC_rec, ADDC8_rec, SUBFC_rec, SUBFC8_rec, COPY, NEG8O_rec,
				RLDIMI, RLDIMI_rec, RLWIMI8_rec, RLWIMI_rec)>;
				shchenzUnsubmitted Not Done Reply Inline Actions hmm, this change seems still not accurate, below is from PWR8 UM(17 March 2016) addi addis add add. subf subf. addic subfic adde addme subfme subfze neg neg. nego FXU (or LU or LSU for non-dot forms) `add.` should be able to use FXU or LU (4 hardware units). Maybe we need to define a new unit group like `P8_FX_LU` for this kind of instructions? I suggest you check other instructions as well compared with the UM. Thanks very much for the big effort. shchenz: hmm, this change seems still not accurate, below is from PWR8 UM(17 March 2016) ``` addi addis…
				qiucfAuthorUnsubmitted Done Reply Inline Actions Per my understanding, only non-dot forms use LU/LSU? qiucf: Per my understanding, only non-dot forms use LU/LSU?
				shchenzUnsubmitted Not Done Reply Inline Actions OK, so we have different understanding about `FXU (or LU or LSU for non-dot forms)`. Let us use yours for now until we find a clear doc about what units dot form instructions can use. shchenz: OK, so we have different understanding about `FXU (or LU or LSU for non-dot forms)`. Let us…

				def : InstRW<[P8_FX_3C], (instregex "^(POP)?CNT(LZ)?(B\|W\|D)(8)?(_rec)?$")>;
				def : InstRW<[P8_FX_5C, P8_ISSUE_FX], (instrs
				(instregex "^MUL(H\|L)(I\|W\|D)(8)?(U\|O)?(_rec)?$"),
				CMPDI,CMPWI,CMPD,CMPW,CMPLDI,CMPLWI,CMPLD,CMPLW,
				ISEL, ISEL8, MTLR, MTLR8, MTCTR, MTCTR8, MTCTR8loop, MTCTRloop)>;

				def : InstRW<[P8_FX_10C, P8_ISSUE_VSX], (instregex "^MFTB(8)?$")>;
				def : InstRW<[P8_FX_15C, P8_ISSUE_FX], (instregex "^DIVW(U)?$")>;

				def : InstRW<[P8_FX_23C, P8_ISSUE_FX], (instregex "^DIV(D\|WE)(U)?$")>;
				def : InstRW<[P8_FX_41C], (instrs
				(instregex "^DIV(D\|W)(E)?(U)?O(_rec)?$"),
				(instregex "^DIV(D\|W)(E)?(U)?_rec$"),
				shchenzUnsubmitted Not Done Reply Inline Actions Seems I can not find `mflr`, is it expected? shchenz: Seems I can not find `mflr`, is it expected?
				DIVDE, DIVDEU)>;

				def : InstRW<[P8_LS_3C, P8_ISSUE_FX], (instrs MFSR, MFSRIN)>;

				def : InstRW<[P8_LU_5C, P8_ISSUE_LD], (instrs
				LFS, LFSX, LFD, LFDX, LFDXTLS, LFDXTLS_, LXVD2X, LXVW4X, LXVDSX, LVEBX, LVEHX, LVEWX,
				LVX, LVXL, LXSDX, LFIWAX, LFIWZX, LFSXTLS, LFSXTLS_, LXVB16X, LXVD2X, LXSIWZX,
				DFLOADf64, XFLOADf64, LIWZX)>;

				def : InstRW<[P8_LS_FX_3C, P8_ISSUE_FXLD], (instrs LQ)>;
				def : InstRW<[P8_LU_FX_5C, P8_ISSUE_LD], (instregex "^LF(D\|S)U(X)?$")>;

				def : InstRW<[P8_LS_FP_NONE, P8_ISSUE_ST], (instrs
				STXSDX, STXVD2X, STXVW4X, STFIWX, STFS, STFSX, STFD, STFDX,
				STFDEPX, STFDXTLS, STFDXTLS_, STFSXTLS, STFSXTLS_, STXSIWX, STXSSP, STXSSPX)>;

				def : InstRW<[P8_LS_FP_FX_2C, P8_ISSUE_ST], (instrs STVEBX, STVEHX, STVEWX, STVX, STVXL)>;
				def : InstRW<[P8_LS_FP_FX_3C, P8_ISSUE_ST], (instregex "^STF(D\|S)U(X)?$")>;

				def : InstRW<[P8_LS_LU_NONE, P8_ISSUE_ST], (instrs
				shchenzUnsubmitted Not Done Reply Inline Actions lswi lswx stswi stswx (unaligned) LSU,LU lwarx ldarx LSU or LU shchenz: ``` lswi lswx stswi stswx (unaligned) LSU,LU ``` ``` lwarx ldarx LSU or LU ```
				qiucfAuthorUnsubmitted Done Reply Inline Actions lswi (naturally aligned) uses LSU or LU lswi lswx stswi stswx (unaligned) uses LSU,LU stswi (naturally aligned) uses LSU, LU We can't differentiate whether it's aligned or not, assume naturally aligned here. qiucf: - lswi (naturally aligned) uses LSU or LU - lswi lswx stswi stswx (unaligned) uses LSU,LU…
				(instregex "^ST(B\|H\|W\|D)(U)?(X)?(8\|TLS)?(_)?(32)?$"),
				STBCIX, STBCX, STBEPX, STDBRX, STDCIX, STDCX, STHBRX, STHCIX, STHCX, STHEPX,
				STMW, STSWI, STWBRX, STWCIX, STWCX, STWEPX)>;

				def : InstRW<[P8_LU_or_LS_FX_3C, P8_ISSUE_FXLD],
				(instregex "^L(B\|H\|W\|D)(A\|Z)?(U)?(X)?(8\|TLS)?(_)?(32)?$")>;

				def : InstRW<[P8_LU_or_LS_3C, P8_ISSUE_FXLD], (instrs
				LBARX, LBARXL, LBEPX, LBZCIX, LDARX, LDARXL, LDBRX, LDCIX, LFDEPX, LHARX, LHARXL, LHBRX, LXSIWAX,
				LHBRX8, LHEPX, LHZCIX, LMW, LSWI, LVSL, LVSR, LWARX, LWARXL, LWBRX, LWBRX8, LWEPX, LWZCIX)>;

				def : InstRW<[P8_LU_or_LS_or_FX_2C, P8_ISSUE_FX], (instrs
				(instregex "^ADDI(C)?(dtprel\|tlsgd\|toc)?(L)?(ADDR)?(32\|8)?$"),
				(instregex "^ADDIS(dtprel\|tlsgd\|toc\|gotTprel)?(HA)?(32\|8)?$"),
				(instregex "^LI(S)?(8)?$"),
				(instregex "^ADD(M)?(E)?(4\|8)?(TLS)?(_)?$"),
				(instregex "^SUBF(M\|Z)?(E)?(IC)?(4\|8)?$"),
				(instregex "^NEG(8)?(O)?$"))>;

				// Instructions of PM pipeline

				def : InstRW<[P8_PM_2C, P8_ISSUE_VSX], (instrs
				(instregex "^VPK(S\|U)(H\|W\|D)(S\|U)(M\|S)$"),
				(instregex "^VUPK(H\|L)(P\|S)(H\|B\|W\|X)$"),
				(instregex "^VSPLT(IS)?(B\|H\|W)(s)?$"),
				(instregex "^(XX\|V)MRG(E\|O\|H\|L)(B\|H\|W)$"),
				XXPERMDI, XXPERMDIs, XXSEL, XXSLDWI, XXSLDWIs, XXSPLTW, XXSPLTWs, VPERMXOR,
				VPKPX, VPERM, VBPERMQ, VGBBD, VSEL, VSL, VSLDOI, VSLO, VSR, VSRO)>;

				def : InstRW<[P8_XS_2C, P8_ISSUE_VSX], (instrs
				(instregex "^V(ADD\|SUB)(S\|U)(B\|H\|W\|D)(M\|S)$"),
				(instregex "^X(S\|V)(MAX\|MIN)(D\|S)P$"),
				(instregex "^V(S)?(R)?(L)?(A)?(B\|D\|H\|W)$"),
				(instregex "^VAVG(S\|U)(B\|H\|W)$"),
				(instregex "^VM(AX\|IN)(S\|U)(B\|H\|W\|D)$"),
				(instregex "^(XX\|V)(L)?(N)?(X)?(AND\|OR\|EQV)(C)?$"),
				(instregex "^(X)?VCMP(EQ\|GT\|GE\|B)(F\|S\|U)?(B\|H\|W\|D\|P\|S)(P)?(_rec)?$"),
				(instregex "^VCLZ(B\|H\|W\|D)$"),
				(instregex "^VPOPCNT(B\|H\|W)$"),
				XXLORf, XXLXORdpz, XXLXORspz, XXLXORz, VEQV, VMAXFP, VMINFP,
				VSHASIGMAD, VSHASIGMAW, VSUBCUW, VADDCUW, MFVSCR, MTVSCR)>;

				def : InstRW<[P8_XS_4C, P8_ISSUE_VSX], (instrs
				(instregex "^V(ADD\|SUB)(E)?(C)?UQ(M)?$"),
				VPOPCNTD)>;

				def : InstRW<[P8_XS_9C, P8_ISSUE_CR], (instrs
				(instregex "^(F\|XS)CMP(O\|U)(D\|S)(P)?$"),
				(instregex "^(F\|XS\|XV)T(DIV\|SQRT)((D\|S)P)?$"))>;

				// Instructions of VX pipeline

				def : InstRW<[P8_VX_7C, P8_ISSUE_VSX], (instrs
				(instregex "^V(M)?SUM(2\|4)?(M\|S\|U)(B\|H\|W)(M\|S)$"),
				(instregex "^VMUL(E\|O)?(S\|U)(B\|H\|W)(M)?$"),
				VMHADDSHS, VMHRADDSHS, VMLADDUHM)>;

				// Instructions of BR pipeline

				def : InstRW<[P8_BR_2C, P8_ISSUE_BR], (instrs
				(instregex "^(g)?B(C)?(C)?(CTR)?(L)?(A)?(R)?(L)?(8)?(_LD\|_LWZ)?(always\|into_toc\|at)?(_RM)?(n)?$"),
				(instregex "^BD(N)?Z(L)?(R\|A)?(L)?(m\|p\|8)?$"),
				(instregex "^BL(R\|A)?(8)?(_NOP)?(_TLS)?(_)?(RM)?$"))>;

				// Instructions of DFP pipeline
				// DFP operations also use float/vector/crypto issue ports.
				def : InstRW<[P8_DFU_13C, P8_ISSUE_VSX], (instrs
				(instregex "^DTST(D\|S)(C\|F\|G)(Q)?$"),
				(instregex "^D(Q\|X)EX(Q)?(_rec)?$"),
				(instregex "^D(ADD\|SUB\|IEX\|QUA\|RRND\|RINTX\|RINTN\|CTDP\|DEDPD\|ENBCD)(_rec)?$"),
				(instregex "^DSC(L\|R)I(_rec)?$"),
				BCDADD_rec, BCDSUB_rec, DCMPO, DCMPU, DTSTEX, DQUAI)>;

				def : InstRW<[P8_DFU_15C, P8_ISSUE_VSX], (instrs
				(instregex "^DRINT(N\|X)Q(_rec)?$"),
				DCMPOQ, DCMPUQ, DRRNDQ, DRRNDQ_rec, DIEXQ, DIEXQ_rec, DQUAIQ, DQUAIQ_rec,
				DTSTEXQ, DDEDPDQ, DDEDPDQ_rec, DENBCDQ, DENBCDQ_rec, DSCLIQ, DSCLIQ_rec,
				DSCRIQ, DSCRIQ_rec, DCTQPQ, DCTQPQ_rec)>;

				def : InstRW<[P8_DFU_17C, P8_ISSUE_VSX], (instregex "^D(ADD\|SUB\|QUA)Q(_rec)?$")>;
				def : InstRW<[P8_DFU_25C, P8_ISSUE_VSX], (instrs DRSP, DRSP_rec, DCTFIX, DCTFIX_rec)>;
				def : InstRW<[P8_DFU_32C, P8_ISSUE_VSX], (instrs DCFFIX, DCFFIX_rec)>;
				def : InstRW<[P8_DFU_34C, P8_ISSUE_VSX], (instrs DCFFIXQ, DCFFIXQ_rec)>;
				def : InstRW<[P8_DFU_40C, P8_ISSUE_VSX], (instrs DMUL, DMUL_rec)>;
				def : InstRW<[P8_DFU_90C, P8_ISSUE_VSX], (instrs DMULQ, DMULQ_rec)>;
				def : InstRW<[P8_DFU_96C, P8_ISSUE_VSX], (instrs DDIV, DDIV_rec)>;
				def : InstRW<[P8_DFU_172C, P8_ISSUE_VSX], (instrs DDIVQ, DDIVQ_rec)>;

				// Direct move instructions

				def : InstRW<[P8_DM_5C, P8_ISSUE_VSX], (instrs
				MFVRD, MFVSRD, MFVRWZ, MFVSRWZ, MTVRD, MTVSRD, MTVRWA, MTVSRWA, MTVRWZ, MTVSRWZ)>;
				}

This is an archive of the discontinued LLVM Phabricator instance.

[PowerPC] Define SchedModel for Power8ClosedPublic

Details

Diff Detail

Event Timeline

Large Diff

Revision Contents

Diff 556234

llvm/lib/Target/PowerPC/PPCScheduleP8.td

llvm/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll

llvm/test/CodeGen/PowerPC/BreakableToken-reduced.ll

llvm/test/CodeGen/PowerPC/CSR-fit.ll

llvm/test/CodeGen/PowerPC/CompareEliminationSpillIssue.ll

llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll

llvm/test/CodeGen/PowerPC/PR35812-neg-cmpxchg.ll

llvm/test/CodeGen/PowerPC/VSX-XForm-Scalars.ll

llvm/test/CodeGen/PowerPC/aix-dfltabi-rsrvd-reg.ll

llvm/test/CodeGen/PowerPC/aix-vsx-splatimm.ll

llvm/test/CodeGen/PowerPC/aix32-p8-scalar_vector_conversions.ll

llvm/test/CodeGen/PowerPC/aix_scalar_vector_permuted.ll

llvm/test/CodeGen/PowerPC/all-atomics.ll

llvm/test/CodeGen/PowerPC/and-extend-combine.ll

llvm/test/CodeGen/PowerPC/asm-template-I.ll

llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll

llvm/test/CodeGen/PowerPC/atomics-i128.ll

llvm/test/CodeGen/PowerPC/atomics-i16-ldst.ll

llvm/test/CodeGen/PowerPC/atomics-i32-ldst.ll

llvm/test/CodeGen/PowerPC/atomics-i64-ldst.ll

llvm/test/CodeGen/PowerPC/atomics-i8-ldst.ll

llvm/test/CodeGen/PowerPC/atomics-regression.ll

llvm/test/CodeGen/PowerPC/bool-math.ll

llvm/test/CodeGen/PowerPC/branch_coalesce.ll

llvm/test/CodeGen/PowerPC/build-vector-tests.ll

llvm/test/CodeGen/PowerPC/builtins-ppc-p8vector.ll

llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll

llvm/test/CodeGen/PowerPC/cfence-float.ll

llvm/test/CodeGen/PowerPC/coldcc2.ll

llvm/test/CodeGen/PowerPC/combine-fneg.ll

llvm/test/CodeGen/PowerPC/combine-sext-and-shl-after-isel.ll

llvm/test/CodeGen/PowerPC/combine_ext_trunc.ll

llvm/test/CodeGen/PowerPC/const-nonsplat-array-init.ll

llvm/test/CodeGen/PowerPC/const-splat-array-init.ll

llvm/test/CodeGen/PowerPC/constant-combines.ll

llvm/test/CodeGen/PowerPC/crypto_bifs_be.ll

llvm/test/CodeGen/PowerPC/csr-split.ll

llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll

llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll

llvm/test/CodeGen/PowerPC/cxx_tlscc64.ll

llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll

llvm/test/CodeGen/PowerPC/elf64-byval-cc.ll

llvm/test/CodeGen/PowerPC/extra-toc-reg-deps.ll

llvm/test/CodeGen/PowerPC/extract-and-store.ll

llvm/test/CodeGen/PowerPC/f128-aggregates.ll

llvm/test/CodeGen/PowerPC/f128-arith.ll

llvm/test/CodeGen/PowerPC/f128-bitcast.ll

llvm/test/CodeGen/PowerPC/f128-compare.ll

llvm/test/CodeGen/PowerPC/f128-conv.ll

llvm/test/CodeGen/PowerPC/f128-fma.ll

llvm/test/CodeGen/PowerPC/f128-passByValue.ll

llvm/test/CodeGen/PowerPC/f128-truncateNconv.ll

llvm/test/CodeGen/PowerPC/fma-aggr-FMF.ll

llvm/test/CodeGen/PowerPC/fma-combine.ll

llvm/test/CodeGen/PowerPC/fmf-propagation.ll

llvm/test/CodeGen/PowerPC/fold-rlwinm-1.ll

llvm/test/CodeGen/PowerPC/fp-classify.ll

llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll

llvm/test/CodeGen/PowerPC/fp-strict-conv.ll

llvm/test/CodeGen/PowerPC/fp-strict-fcmp.ll

llvm/test/CodeGen/PowerPC/fp-strict-round.ll

llvm/test/CodeGen/PowerPC/fp-strict.ll

llvm/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll

llvm/test/CodeGen/PowerPC/fpscr-intrinsics.ll

llvm/test/CodeGen/PowerPC/frounds.ll

llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll

llvm/test/CodeGen/PowerPC/funnel-shift.ll

llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll

llvm/test/CodeGen/PowerPC/huge-frame-call.ll

llvm/test/CodeGen/PowerPC/huge-frame-size.ll

llvm/test/CodeGen/PowerPC/int128_ldst.ll

llvm/test/CodeGen/PowerPC/legalize-vaarg.ll

[PowerPC] Define SchedModel for Power8
ClosedPublic