diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td @@ -1213,7 +1213,7 @@ def : InstRW<[V2Write_5cyc_1I_3L, WriteLDHi], (instrs LDPSWi)>; // Load pair, immed post-index or immed pre-index, signed words -def : InstRW<[V2Write_5cyc_1I_3L, WriteLDHi, WriteAdr], +def : InstRW<[WriteAdr, V2Write_5cyc_1I_3L, WriteLDHi], (instregex "^LDPSW(post|pre)$")>; // Store instructions @@ -1224,7 +1224,7 @@ def : SchedAlias; def : SchedAlias; def : SchedAlias; -def : SchedAlias; // copied from A57. +def : SchedAlias; // Tag load instructions // ----------------------------------------------------------------------------- @@ -1337,7 +1337,7 @@ // Load vector reg, immed post-index // Load vector reg, immed pre-index -def : InstRW<[V2Write_6cyc_1I_1L, WriteAdr], +def : InstRW<[WriteAdr, V2Write_6cyc_1I_1L], (instregex "^LDR[BHSDQ](pre|post)$")>; // Load vector reg, unsigned immed @@ -1359,12 +1359,12 @@ // Load vector pair, immed post-index, S/D-form // Load vector pair, immed pre-index, S/D-form -def : InstRW<[V2Write_6cyc_1I_1L, WriteLDHi, WriteAdr], +def : InstRW<[WriteAdr, V2Write_6cyc_1I_1L, WriteLDHi], (instregex "^LDP[SD](pre|post)$")>; // Load vector pair, immed post-index, Q-form // Load vector pair, immed pre-index, Q-form -def : InstRW<[V2Write_6cyc_2I_2L, WriteLDHi, WriteAdr], (instrs LDPQpost, +def : InstRW<[WriteAdr, V2Write_6cyc_2I_2L, WriteLDHi], (instrs LDPQpost, LDPQpre)>; // FP store instructions @@ -1725,220 +1725,220 @@ // ASIMD load, 1 element, multiple, 1 reg, D-form def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_6cyc_1L, WriteAdr], +def : InstRW<[WriteAdr, V2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>; // ASIMD load, 1 element, multiple, 1 reg, Q-form def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_6cyc_1L, WriteAdr], +def : InstRW<[WriteAdr, V2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 2 reg, D-form def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_6cyc_2L, WriteAdr], +def : InstRW<[WriteAdr, V2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; // ASIMD load, 1 element, multiple, 2 reg, Q-form def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_6cyc_2L, WriteAdr], +def : InstRW<[WriteAdr, V2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 3 reg, D-form def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_6cyc_3L, WriteAdr], +def : InstRW<[WriteAdr, V2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; // ASIMD load, 1 element, multiple, 3 reg, Q-form def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_6cyc_3L, WriteAdr], +def : InstRW<[WriteAdr, V2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 4 reg, D-form def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_7cyc_4L, WriteAdr], +def : InstRW<[WriteAdr, V2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; // ASIMD load, 1 element, multiple, 4 reg, Q-form def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_7cyc_4L, WriteAdr], +def : InstRW<[WriteAdr, V2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, one lane, B/H/S // ASIMD load, 1 element, one lane, D def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)$")>; -def : InstRW<[V2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)_POST$")>; // ASIMD load, 1 element, all lanes, D-form, B/H/S // ASIMD load, 1 element, all lanes, D-form, D def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>; // ASIMD load, 1 element, all lanes, Q-form def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; // ASIMD load, 2 element, multiple, D-form, B/H/S def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)$")>; -def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)_POST$")>; // ASIMD load, 2 element, multiple, Q-form, B/H/S // ASIMD load, 2 element, multiple, Q-form, D def : InstRW<[V2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_8cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>; // ASIMD load, 2 element, one lane, B/H // ASIMD load, 2 element, one lane, S // ASIMD load, 2 element, one lane, D def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)$")>; -def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)_POST$")>; // ASIMD load, 2 element, all lanes, D-form, B/H/S // ASIMD load, 2 element, all lanes, D-form, D def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>; // ASIMD load, 2 element, all lanes, Q-form def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; // ASIMD load, 3 element, multiple, D-form, B/H/S def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)$")>; -def : InstRW<[V2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)_POST$")>; // ASIMD load, 3 element, multiple, Q-form, B/H/S // ASIMD load, 3 element, multiple, Q-form, D def : InstRW<[V2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>; // ASIMD load, 3 element, one lane, B/H // ASIMD load, 3 element, one lane, S // ASIMD load, 3 element, one lane, D def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)$")>; -def : InstRW<[V2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)_POST$")>; // ASIMD load, 3 element, all lanes, D-form, B/H/S // ASIMD load, 3 element, all lanes, D-form, D def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>; // ASIMD load, 3 element, all lanes, Q-form, B/H/S // ASIMD load, 3 element, all lanes, Q-form, D def : InstRW<[V2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>; // ASIMD load, 4 element, multiple, D-form, B/H/S def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)$")>; -def : InstRW<[V2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; // ASIMD load, 4 element, multiple, Q-form, B/H/S // ASIMD load, 4 element, multiple, Q-form, D def : InstRW<[V2Write_9cyc_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_9cyc_6L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_9cyc_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; // ASIMD load, 4 element, one lane, B/H // ASIMD load, 4 element, one lane, S // ASIMD load, 4 element, one lane, D def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)$")>; -def : InstRW<[V2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)_POST$")>; // ASIMD load, 4 element, all lanes, D-form, B/H/S // ASIMD load, 4 element, all lanes, D-form, D -def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>; // ASIMD load, 4 element, all lanes, Q-form, B/H/S // ASIMD load, 4 element, all lanes, Q-form, D -def : InstRW<[V2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_8cyc_4L_4V, WriteAdr], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[V2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>; // ASIMD store instructions // ----------------------------------------------------------------------------- // ASIMD store, 1 element, multiple, 1 reg, D-form def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_2cyc_1L01_1V01, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; // ASIMD store, 1 element, multiple, 1 reg, Q-form def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_2cyc_1L01_1V01, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 2 reg, D-form def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_2cyc_1L01_1V01, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; // ASIMD store, 1 element, multiple, 2 reg, Q-form def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_2cyc_2L01_2V01, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 3 reg, D-form def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_2cyc_2L01_2V01, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; // ASIMD store, 1 element, multiple, 3 reg, Q-form def : InstRW<[V2Write_2cyc_3L01_3V01], (instregex "ST1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_2cyc_3L01_3V01, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_2cyc_3L01_3V01], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 4 reg, D-form def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_2cyc_2L01_2V01, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; // ASIMD store, 1 element, multiple, 4 reg, Q-form def : InstRW<[V2Write_2cyc_4L01_4V01], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_2cyc_4L01_4V01, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_2cyc_4L01_4V01], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, one lane, B/H/S // ASIMD store, 1 element, one lane, D def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST1i(8|16|32|64)$")>; -def : InstRW<[V2Write_4cyc_1L01_2V01, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST1i(8|16|32|64)_POST$")>; // ASIMD store, 2 element, multiple, D-form, B/H/S def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST2Twov(8b|4h|2s)$")>; -def : InstRW<[V2Write_4cyc_1L01_2V01, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST2Twov(8b|4h|2s)_POST$")>; // ASIMD store, 2 element, multiple, Q-form, B/H/S // ASIMD store, 2 element, multiple, Q-form, D def : InstRW<[V2Write_4cyc_2L01_4V01], (instregex "ST2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_4cyc_2L01_4V01, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_4cyc_2L01_4V01], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; // ASIMD store, 2 element, one lane, B/H/S // ASIMD store, 2 element, one lane, D def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST2i(8|16|32|64)$")>; -def : InstRW<[V2Write_4cyc_1L01_2V01, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST2i(8|16|32|64)_POST$")>; // ASIMD store, 3 element, multiple, D-form, B/H/S def : InstRW<[V2Write_5cyc_2L01_4V01], (instregex "ST3Threev(8b|4h|2s)$")>; -def : InstRW<[V2Write_5cyc_2L01_4V01, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[WriteAdr, V2Write_5cyc_2L01_4V01], (instregex "ST3Threev(8b|4h|2s)_POST$")>; // ASIMD store, 3 element, multiple, Q-form, B/H/S // ASIMD store, 3 element, multiple, Q-form, D def : InstRW<[V2Write_6cyc_3L01_6V01], (instregex "ST3Threev(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_6cyc_3L01_6V01, WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_6cyc_3L01_6V01], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>; // ASIMD store, 3 element, one lane, B/H // ASIMD store, 3 element, one lane, S // ASIMD store, 3 element, one lane, D def : InstRW<[V2Write_5cyc_2L01_4V01], (instregex "ST3i(8|16|32|64)$")>; -def : InstRW<[V2Write_5cyc_2L01_4V01, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, V2Write_5cyc_2L01_4V01], (instregex "ST3i(8|16|32|64)_POST$")>; // ASIMD store, 4 element, multiple, D-form, B/H/S def : InstRW<[V2Write_6cyc_2L01_6V01], (instregex "ST4Fourv(8b|4h|2s)$")>; -def : InstRW<[V2Write_6cyc_2L01_6V01, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[WriteAdr, V2Write_6cyc_2L01_6V01], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; // ASIMD store, 4 element, multiple, Q-form, B/H/S def : InstRW<[V2Write_7cyc_4L01_12V01], (instregex "ST4Fourv(16b|8h|4s)$")>; -def : InstRW<[V2Write_7cyc_4L01_12V01, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[WriteAdr, V2Write_7cyc_4L01_12V01], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; // ASIMD store, 4 element, multiple, Q-form, D def : InstRW<[V2Write_5cyc_4L01_8V01], (instregex "ST4Fourv(2d)$")>; -def : InstRW<[V2Write_5cyc_4L01_8V01, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_5cyc_4L01_8V01], (instregex "ST4Fourv(2d)_POST$")>; // ASIMD store, 4 element, one lane, B/H/S def : InstRW<[V2Write_6cyc_1L01_3V01], (instregex "ST4i(8|16|32)$")>; -def : InstRW<[V2Write_6cyc_1L01_3V01, WriteAdr], (instregex "ST4i(8|16|32)_POST$")>; +def : InstRW<[WriteAdr, V2Write_6cyc_1L01_3V01], (instregex "ST4i(8|16|32)_POST$")>; // ASIMD store, 4 element, one lane, D def : InstRW<[V2Write_4cyc_2L01_4V01], (instregex "ST4i(64)$")>; -def : InstRW<[V2Write_4cyc_2L01_4V01, WriteAdr], (instregex "ST4i(64)_POST$")>; +def : InstRW<[WriteAdr, V2Write_4cyc_2L01_4V01], (instregex "ST4i(64)_POST$")>; // Cryptography extensions // ----------------------------------------------------------------------------- diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-writeback.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-writeback.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-writeback.s @@ -0,0 +1,3979 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v2 --instruction-info=0 --resource-pressure=0 --timeline --timeline-max-iterations=1 < %s | FileCheck %s + +# LLVM-MCA-BEGIN G01 +ld1 { v1.1d }, [x27], #8 +ld1 { v1.2d }, [x27], #16 +ld1 { v1.2s }, [x27], #8 +ld1 { v1.4h }, [x27], #8 +ld1 { v1.4s }, [x27], #16 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G02 +ld1 { v1.8b }, [x27], #8 +ld1 { v1.8h }, [x27], #16 +ld1 { v1.16b }, [x27], #16 +ld1 { v1.1d }, [x27], x28 +ld1 { v1.2d }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G03 +ld1 { v1.2s }, [x27], x28 +ld1 { v1.4h }, [x27], x28 +ld1 { v1.4s }, [x27], x28 +ld1 { v1.8b }, [x27], x28 +ld1 { v1.8h }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G04 +ld1 { v1.16b }, [x27], x28 +ld1 { v1.1d, v2.1d }, [x27], #16 +ld1 { v1.2d, v2.2d }, [x27], #32 +ld1 { v1.2s, v2.2s }, [x27], #16 +ld1 { v1.4h, v2.4h }, [x27], #16 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G05 +ld1 { v1.4s, v2.4s }, [x27], #32 +ld1 { v1.8b, v2.8b }, [x27], #16 +ld1 { v1.8h, v2.8h }, [x27], #32 +ld1 { v1.16b, v2.16b }, [x27], #32 +ld1 { v1.1d, v2.1d }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G06 +ld1 { v1.2d, v2.2d }, [x27], x28 +ld1 { v1.2s, v2.2s }, [x27], x28 +ld1 { v1.4h, v2.4h }, [x27], x28 +ld1 { v1.4s, v2.4s }, [x27], x28 +ld1 { v1.8b, v2.8b }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G07 +ld1 { v1.8h, v2.8h }, [x27], x28 +ld1 { v1.16b, v2.16b }, [x27], x28 +ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G08 +ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G09 +ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G10 +ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G11 +ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G12 +ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G13 +ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +ld1 { v1.b }[0], [x27], #1 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G14 +ld1 { v1.b }[8], [x27], #1 +ld1 { v1.b }[0], [x27], x28 +ld1 { v1.b }[8], [x27], x28 +ld1 { v1.h }[0], [x27], #2 +ld1 { v1.h }[4], [x27], #2 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G15 +ld1 { v1.h }[0], [x27], x28 +ld1 { v1.h }[4], [x27], x28 +ld1 { v1.s }[0], [x27], #4 +ld1 { v1.s }[0], [x27], x28 +ld1 { v1.d }[0], [x27], #8 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G16 +ld1 { v1.d }[0], [x27], x28 +ld1r { v1.1d }, [x27], #8 +ld1r { v1.2d }, [x27], #8 +ld1r { v1.2s }, [x27], #4 +ld1r { v1.4h }, [x27], #2 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G17 +ld1r { v1.4s }, [x27], #4 +ld1r { v1.8b }, [x27], #1 +ld1r { v1.8h }, [x27], #2 +ld1r { v1.16b }, [x27], #1 +ld1r { v1.1d }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G18 +ld1r { v1.2d }, [x27], x28 +ld1r { v1.2s }, [x27], x28 +ld1r { v1.4h }, [x27], x28 +ld1r { v1.4s }, [x27], x28 +ld1r { v1.8b }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G19 +ld1r { v1.8h }, [x27], x28 +ld1r { v1.16b }, [x27], x28 +ld2 { v1.2d, v2.2d }, [x27], #32 +ld2 { v1.2s, v2.2s }, [x27], #16 +ld2 { v1.4h, v2.4h }, [x27], #16 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G20 +ld2 { v1.4s, v2.4s }, [x27], #32 +ld2 { v1.8b, v2.8b }, [x27], #16 +ld2 { v1.8h, v2.8h }, [x27], #32 +ld2 { v1.16b, v2.16b }, [x27], #32 +ld2 { v1.2d, v2.2d }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G21 +ld2 { v1.2s, v2.2s }, [x27], x28 +ld2 { v1.4h, v2.4h }, [x27], x28 +ld2 { v1.4s, v2.4s }, [x27], x28 +ld2 { v1.8b, v2.8b }, [x27], x28 +ld2 { v1.8h, v2.8h }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G22 +ld2 { v1.16b, v2.16b }, [x27], x28 +ld2 { v1.b, v2.b }[0], [x27], #2 +ld2 { v1.b, v2.b }[8], [x27], #2 +ld2 { v1.b, v2.b }[0], [x27], x28 +ld2 { v1.b, v2.b }[8], [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G23 +ld2 { v1.h, v2.h }[0], [x27], #4 +ld2 { v1.h, v2.h }[4], [x27], #4 +ld2 { v1.h, v2.h }[0], [x27], x28 +ld2 { v1.h, v2.h }[4], [x27], x28 +ld2 { v1.s, v2.s }[0], [x27], #8 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G24 +ld2 { v1.s, v2.s }[0], [x27], x28 +ld2 { v1.d, v2.d }[0], [x27], #16 +ld2 { v1.d, v2.d }[0], [x27], x28 +ld2r { v1.1d, v2.1d }, [x27], #16 +ld2r { v1.2d, v2.2d }, [x27], #16 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G25 +ld2r { v1.2s, v2.2s }, [x27], #8 +ld2r { v1.4h, v2.4h }, [x27], #4 +ld2r { v1.4s, v2.4s }, [x27], #8 +ld2r { v1.8b, v2.8b }, [x27], #2 +ld2r { v1.8h, v2.8h }, [x27], #4 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G26 +ld2r { v1.16b, v2.16b }, [x27], #2 +ld2r { v1.1d, v2.1d }, [x27], x28 +ld2r { v1.2d, v2.2d }, [x27], x28 +ld2r { v1.2s, v2.2s }, [x27], x28 +ld2r { v1.4h, v2.4h }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G27 +ld2r { v1.4s, v2.4s }, [x27], x28 +ld2r { v1.8b, v2.8b }, [x27], x28 +ld2r { v1.8h, v2.8h }, [x27], x28 +ld2r { v1.16b, v2.16b }, [x27], x28 +ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G28 +ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G29 +ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G30 +ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 +ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G31 +ld3 { v1.b, v2.b, v3.b }[0], [x27], x28 +ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 +ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 +ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 +ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G32 +ld3 { v1.h, v2.h, v3.h }[4], [x27], x28 +ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 +ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 +ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 +ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G33 +ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 +ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 +ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 +ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 +ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G34 +ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 +ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 +ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 +ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 +ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G35 +ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 +ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 +ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 +ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 +ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G36 +ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 +ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G37 +ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G38 +ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G39 +ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 +ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 +ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G40 +ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 +ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 +ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G41 +ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 +ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G42 +ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 +ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 +ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 +ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 +ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G43 +ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G44 +ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +ldp s1, s2, [x27], #248 +ldp d1, d2, [x27], #496 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G45 +ldp q1, q2, [x27], #992 +ldp s1, s2, [x27, #248]! +ldp d1, d2, [x27, #496]! +ldp q1, q2, [x27, #992]! +ldp w1, w2, [x27], #248 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G46 +ldp x1, x2, [x27], #496 +ldp w1, w2, [x27, #248]! +ldp x1, x2, [x27, #496]! +ldpsw x1, x2, [x27], #248 +ldpsw x1, x2, [x27, #248]! +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G47 +ldr b1, [x27], #254 +ldr h1, [x27], #254 +ldr s1, [x27], #254 +ldr d1, [x27], #254 +ldr q1, [x27], #254 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G48 +ldr b1, [x27, #254]! +ldr h1, [x27, #254]! +ldr s1, [x27, #254]! +ldr d1, [x27, #254]! +ldr q1, [x27, #254]! +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G49 +ldr w1, [x27], #254 +ldr x1, [x27], #254 +ldr w1, [x27, #254]! +ldr x1, [x27, #254]! +ldrb w1, [x27], #254 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G50 +ldrb w1, [x27, #254]! +ldrh w1, [x27], #254 +ldrh w1, [x27, #254]! +ldrsb w1, [x27], #254 +ldrsb x1, [x27], #254 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G51 +ldrsb w1, [x27, #254]! +ldrsb x1, [x27, #254]! +ldrsh w1, [x27], #254 +ldrsh x1, [x27], #254 +ldrsh w1, [x27, #254]! +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G52 +ldrsh x1, [x27, #254]! +ldrsw x1, [x27], #254 +ldrsw x1, [x27, #254]! +st1 { v1.1d }, [x27], #8 +st1 { v1.2d }, [x27], #16 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G53 +st1 { v1.2s }, [x27], #8 +st1 { v1.4h }, [x27], #8 +st1 { v1.4s }, [x27], #16 +st1 { v1.8b }, [x27], #8 +st1 { v1.8h }, [x27], #16 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G54 +st1 { v1.16b }, [x27], #16 +st1 { v1.1d }, [x27], x28 +st1 { v1.2d }, [x27], x28 +st1 { v1.2s }, [x27], x28 +st1 { v1.4h }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G55 +st1 { v1.4s }, [x27], x28 +st1 { v1.8b }, [x27], x28 +st1 { v1.8h }, [x27], x28 +st1 { v1.16b }, [x27], x28 +st1 { v1.1d, v2.1d }, [x27], #16 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G56 +st1 { v1.2d, v2.2d }, [x27], #32 +st1 { v1.2s, v2.2s }, [x27], #16 +st1 { v1.4h, v2.4h }, [x27], #16 +st1 { v1.4s, v2.4s }, [x27], #32 +st1 { v1.8b, v2.8b }, [x27], #16 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G57 +st1 { v1.8h, v2.8h }, [x27], #32 +st1 { v1.16b, v2.16b }, [x27], #32 +st1 { v1.1d, v2.1d }, [x27], x28 +st1 { v1.2d, v2.2d }, [x27], x28 +st1 { v1.2s, v2.2s }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G58 +st1 { v1.4h, v2.4h }, [x27], x28 +st1 { v1.4s, v2.4s }, [x27], x28 +st1 { v1.8b, v2.8b }, [x27], x28 +st1 { v1.8h, v2.8h }, [x27], x28 +st1 { v1.16b, v2.16b }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G59 +st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G60 +st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G61 +st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G62 +st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G63 +st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G64 +st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G65 +st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +st1 { v1.b }[0], [x27], #1 +st1 { v1.b }[8], [x27], #1 +st1 { v1.b }[0], [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G66 +st1 { v1.b }[8], [x27], x28 +st1 { v1.h }[0], [x27], #2 +st1 { v1.h }[4], [x27], #2 +st1 { v1.h }[0], [x27], x28 +st1 { v1.h }[4], [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G67 +st1 { v1.s }[0], [x27], #4 +st1 { v1.s }[0], [x27], x28 +st1 { v1.d }[0], [x27], #8 +st1 { v1.d }[0], [x27], x28 +st2 { v1.2d, v2.2d }, [x27], #32 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G68 +st2 { v1.2s, v2.2s }, [x27], #16 +st2 { v1.4h, v2.4h }, [x27], #16 +st2 { v1.4s, v2.4s }, [x27], #32 +st2 { v1.8b, v2.8b }, [x27], #16 +st2 { v1.8h, v2.8h }, [x27], #32 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G69 +st2 { v1.16b, v2.16b }, [x27], #32 +st2 { v1.2d, v2.2d }, [x27], x28 +st2 { v1.2s, v2.2s }, [x27], x28 +st2 { v1.4h, v2.4h }, [x27], x28 +st2 { v1.4s, v2.4s }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G70 +st2 { v1.8b, v2.8b }, [x27], x28 +st2 { v1.8h, v2.8h }, [x27], x28 +st2 { v1.16b, v2.16b }, [x27], x28 +st2 { v1.b, v2.b }[0], [x27], #2 +st2 { v1.b, v2.b }[8], [x27], #2 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G71 +st2 { v1.b, v2.b }[0], [x27], x28 +st2 { v1.b, v2.b }[8], [x27], x28 +st2 { v1.h, v2.h }[0], [x27], #4 +st2 { v1.h, v2.h }[4], [x27], #4 +st2 { v1.h, v2.h }[0], [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G72 +st2 { v1.h, v2.h }[4], [x27], x28 +st2 { v1.s, v2.s }[0], [x27], #8 +st2 { v1.s, v2.s }[0], [x27], x28 +st2 { v1.d, v2.d }[0], [x27], #16 +st2 { v1.d, v2.d }[0], [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G73 +st2g x26, [x27], #4064 +st2g x26, [x27, #4064]! +st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G74 +st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G75 +st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G76 +st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +st3 { v1.b, v2.b, v3.b }[0], [x27], #3 +st3 { v1.b, v2.b, v3.b }[8], [x27], #3 +st3 { v1.b, v2.b, v3.b }[0], [x27], x28 +st3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G77 +st3 { v1.h, v2.h, v3.h }[0], [x27], #6 +st3 { v1.h, v2.h, v3.h }[4], [x27], #6 +st3 { v1.h, v2.h, v3.h }[0], [x27], x28 +st3 { v1.h, v2.h, v3.h }[4], [x27], x28 +st3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G78 +st3 { v1.s, v2.s, v3.s }[0], [x27], x28 +st3 { v1.d, v2.d, v3.d }[0], [x27], #24 +st3 { v1.d, v2.d, v3.d }[0], [x27], x28 +st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G79 +st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G80 +st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G81 +st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 +st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G82 +st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 +st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 +st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G83 +st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 +st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +stg x26, [x27], #4064 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G84 +stg x26, [x27, #4064]! +stgp x1, x2, [x27], #992 +stgp x1, x2, [x27, #992]! +stp s1, s2, [x27], #248 +stp d1, d2, [x27], #496 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G85 +stp q1, q2, [x27], #992 +stp s1, s2, [x27, #248]! +stp d1, d2, [x27, #496]! +stp q1, q2, [x27, #992]! +stp w1, w2, [x27], #248 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G86 +stp x1, x2, [x27], #496 +stp w1, w2, [x27, #248]! +stp x1, x2, [x27, #496]! +str b1, [x27], #254 +str h1, [x27], #254 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G87 +str s1, [x27], #254 +str d1, [x27], #254 +str q1, [x27], #254 +str b1, [x27, #254]! +str h1, [x27, #254]! +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G88 +str s1, [x27, #254]! +str d1, [x27, #254]! +str q1, [x27, #254]! +str w1, [x27], #254 +str x1, [x27], #254 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G89 +str w1, [x27, #254]! +str x1, [x27, #254]! +strb w1, [x27], #254 +strb w1, [x27, #254]! +strh w1, [x27], #254 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G90 +strh w1, [x27, #254]! +stz2g x26, [x27], #4064 +stz2g x26, [x27, #4064]! +stzg x26, [x27], #4064 +stzg x26, [x27, #4064]! +# LLVM-MCA-END + +# LLVM-MCA-BEGIN G91 +ldr x1, [x27], #254 +ldr x2, [x1], #254 +# LLVM-MCA-END + +# CHECK: [0] Code Region - G01 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 508 +# CHECK-NEXT: Total uOps: 1000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 1.97 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 1.7 + +# CHECK: Timeline view: +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.1d }, [x27], #8 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.2d }, [x27], #16 +# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.2s }, [x27], #8 +# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.4h }, [x27], #8 +# CHECK-NEXT: [0,4] D====eeeeeeER ld1 { v1.4s }, [x27], #16 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d }, [x27], #8 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.2d }, [x27], #16 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.2s }, [x27], #8 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.4h }, [x27], #8 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.4s }, [x27], #16 +# CHECK-NEXT: 1 3.0 0.2 0.0 + +# CHECK: [1] Code Region - G02 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 508 +# CHECK-NEXT: Total uOps: 1000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 1.97 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 1.7 + +# CHECK: Timeline view: +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b }, [x27], #8 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.8h }, [x27], #16 +# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.16b }, [x27], #16 +# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.1d }, [x27], x28 +# CHECK-NEXT: [0,4] D====eeeeeeER ld1 { v1.2d }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b }, [x27], #8 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.8h }, [x27], #16 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.16b }, [x27], #16 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.1d }, [x27], x28 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.2d }, [x27], x28 +# CHECK-NEXT: 1 3.0 0.2 0.0 + +# CHECK: [2] Code Region - G03 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 508 +# CHECK-NEXT: Total uOps: 1000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 1.97 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 1.7 + +# CHECK: Timeline view: +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.2s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.4h }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.4s }, [x27], x28 +# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.8b }, [x27], x28 +# CHECK-NEXT: [0,4] D====eeeeeeER ld1 { v1.8h }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s }, [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.4h }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.4s }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.8b }, [x27], x28 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.8h }, [x27], x28 +# CHECK-NEXT: 1 3.0 0.2 0.0 + +# CHECK: [3] Code Region - G04 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 508 +# CHECK-NEXT: Total uOps: 1400 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 2.76 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 3.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.16b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,4] D====eeeeeeER ld1 { v1.4h, v2.4h }, [x27], #16 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b }, [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 1 3.0 0.2 0.0 + +# CHECK: [4] Code Region - G05 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 508 +# CHECK-NEXT: Total uOps: 1500 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 2.95 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 3.3 + +# CHECK: Timeline view: +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,4] D====eeeeeeER ld1 { v1.1d, v2.1d }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: 1 3.0 0.2 0.0 + +# CHECK: [5] Code Region - G06 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 508 +# CHECK-NEXT: Total uOps: 1500 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 2.95 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 3.3 + +# CHECK: Timeline view: +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,4] D====eeeeeeER ld1 { v1.8b, v2.8b }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 1 3.0 0.2 0.0 + +# CHECK: [6] Code Region - G07 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 508 +# CHECK-NEXT: Total uOps: 1800 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.54 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 4.3 + +# CHECK: Timeline view: +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,4] .D===eeeeeeER ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: 1 2.8 0.2 0.0 + +# CHECK: [7] Code Region - G08 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 508 +# CHECK-NEXT: Total uOps: 2000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.94 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 5.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,4] .D===eeeeeeER ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: 1 2.8 0.2 0.0 + +# CHECK: [8] Code Region - G09 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 508 +# CHECK-NEXT: Total uOps: 2000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.94 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 5.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,4] .D===eeeeeeER ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 1 2.8 0.2 0.0 + +# CHECK: [9] Code Region - G10 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 608 +# CHECK-NEXT: Total uOps: 2200 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.62 +# CHECK-NEXT: IPC: 0.82 +# CHECK-NEXT: Block RThroughput: 5.7 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,3] .D==eeeeeeeER. ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,4] .D===eeeeeeeER ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 1 2.6 0.2 0.0 + +# CHECK: [10] Code Region - G11 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 675 +# CHECK-NEXT: Total uOps: 2500 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.70 +# CHECK-NEXT: IPC: 0.74 +# CHECK-NEXT: Block RThroughput: 6.7 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeER. . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,1] D=eeeeeeeER . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,2] D==eeeeeeeER . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,3] .D===eeeeeeeER. ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,4] .D====eeeeeeeER ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 1 3.0 0.4 0.0 + +# CHECK: [11] Code Region - G12 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 675 +# CHECK-NEXT: Total uOps: 2500 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.70 +# CHECK-NEXT: IPC: 0.74 +# CHECK-NEXT: Block RThroughput: 6.7 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeER. . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,1] D=eeeeeeeER . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeeER . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,3] .D===eeeeeeeER. ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,4] .D====eeeeeeeER ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 1 3.0 0.4 0.0 + +# CHECK: [12] Code Region - G13 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 1210 +# CHECK-NEXT: Total uOps: 2300 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 1.90 +# CHECK-NEXT: IPC: 0.41 +# CHECK-NEXT: Block RThroughput: 5.7 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 01 + +# CHECK: [0,0] DeeeeeeeER. . .. ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeeER . .. ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeeER . .. ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,3] .D===eeeeeeeER . .. ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,4] .D==========eeeeeeeeER ld1 { v1.b }[0], [x27], #1 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: 1 4.2 0.4 0.0 + +# CHECK: [13] Code Region - G14 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total uOps: 1500 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 0.37 +# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: Block RThroughput: 1.7 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 012 + +# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.b }[8], [x27], #1 +# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld1 { v1.b }[0], [x27], x28 +# CHECK-NEXT: [0,2] D================eeeeeeeeER . . . . ld1 { v1.b }[8], [x27], x28 +# CHECK-NEXT: [0,3] D========================eeeeeeeeER. . . ld1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: [0,4] D================================eeeeeeeeER ld1 { v1.h }[4], [x27], #2 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.b }[8], [x27], #1 +# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld1 { v1.b }[0], [x27], x28 +# CHECK-NEXT: 2. 1 17.0 0.0 0.0 ld1 { v1.b }[8], [x27], x28 +# CHECK-NEXT: 3. 1 25.0 0.0 0.0 ld1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: 4. 1 33.0 0.0 0.0 ld1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: 1 17.0 0.2 0.0 + +# CHECK: [14] Code Region - G15 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total uOps: 1500 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 0.37 +# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: Block RThroughput: 1.7 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 012 + +# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.h }[0], [x27], x28 +# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld1 { v1.h }[4], [x27], x28 +# CHECK-NEXT: [0,2] D================eeeeeeeeER . . . . ld1 { v1.s }[0], [x27], #4 +# CHECK-NEXT: [0,3] D========================eeeeeeeeER. . . ld1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: [0,4] D================================eeeeeeeeER ld1 { v1.d }[0], [x27], #8 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.h }[0], [x27], x28 +# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld1 { v1.h }[4], [x27], x28 +# CHECK-NEXT: 2. 1 17.0 0.0 0.0 ld1 { v1.s }[0], [x27], #4 +# CHECK-NEXT: 3. 1 25.0 0.0 0.0 ld1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: 4. 1 33.0 0.0 0.0 ld1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: 1 17.0 0.2 0.0 + +# CHECK: [15] Code Region - G16 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 1203 +# CHECK-NEXT: Total uOps: 1500 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 1.25 +# CHECK-NEXT: IPC: 0.42 +# CHECK-NEXT: Block RThroughput: 1.7 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld1 { v1.d }[0], [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld1r { v1.1d }, [x27], #8 +# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld1r { v1.2d }, [x27], #8 +# CHECK-NEXT: [0,3] D===eeeeeeeeER. ld1r { v1.2s }, [x27], #4 +# CHECK-NEXT: [0,4] D====eeeeeeeeER ld1r { v1.4h }, [x27], #2 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.d }[0], [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1r { v1.1d }, [x27], #8 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1r { v1.2d }, [x27], #8 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1r { v1.2s }, [x27], #4 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1r { v1.4h }, [x27], #2 +# CHECK-NEXT: 1 3.0 0.2 0.0 + +# CHECK: [16] Code Region - G17 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 510 +# CHECK-NEXT: Total uOps: 1500 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 2.94 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 1.7 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld1r { v1.4s }, [x27], #4 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld1r { v1.8b }, [x27], #1 +# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld1r { v1.8h }, [x27], #2 +# CHECK-NEXT: [0,3] D===eeeeeeeeER. ld1r { v1.16b }, [x27], #1 +# CHECK-NEXT: [0,4] D====eeeeeeeeER ld1r { v1.1d }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.4s }, [x27], #4 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1r { v1.8b }, [x27], #1 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1r { v1.8h }, [x27], #2 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1r { v1.16b }, [x27], #1 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1r { v1.1d }, [x27], x28 +# CHECK-NEXT: 1 3.0 0.2 0.0 + +# CHECK: [17] Code Region - G18 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 510 +# CHECK-NEXT: Total uOps: 1500 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 2.94 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 1.7 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld1r { v1.2d }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld1r { v1.2s }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld1r { v1.4h }, [x27], x28 +# CHECK-NEXT: [0,3] D===eeeeeeeeER. ld1r { v1.4s }, [x27], x28 +# CHECK-NEXT: [0,4] D====eeeeeeeeER ld1r { v1.8b }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.2d }, [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1r { v1.2s }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1r { v1.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1r { v1.4s }, [x27], x28 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1r { v1.8b }, [x27], x28 +# CHECK-NEXT: 1 3.0 0.2 0.0 + +# CHECK: [18] Code Region - G19 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 510 +# CHECK-NEXT: Total uOps: 1900 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.73 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 2.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld1r { v1.8h }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld1r { v1.16b }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,3] D===eeeeeeeeER. ld2 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,4] .D===eeeeeeeeER ld2 { v1.4h, v2.4h }, [x27], #16 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.8h }, [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1r { v1.16b }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 1 2.8 0.2 0.0 + +# CHECK: [19] Code Region - G20 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 510 +# CHECK-NEXT: Total uOps: 2400 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 4.71 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 3.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld2 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,4] .D===eeeeeeeeER ld2 { v1.2d, v2.2d }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 1 2.6 0.2 0.0 + +# CHECK: [20] Code Region - G21 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 510 +# CHECK-NEXT: Total uOps: 2200 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 4.31 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 2.5 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld2 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld2 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,4] .D===eeeeeeeeER ld2 { v1.8h, v2.8h }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 1 2.6 0.2 0.0 + +# CHECK: [21] Code Region - G22 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 3310 +# CHECK-NEXT: Total uOps: 2100 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 0.63 +# CHECK-NEXT: IPC: 0.15 +# CHECK-NEXT: Block RThroughput: 2.5 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 012 + +# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: [0,2] D================eeeeeeeeER . . . . ld2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld2 { v1.b, v2.b }[0], [x27], x28 +# CHECK-NEXT: [0,4] .D===============================eeeeeeeeER ld2 { v1.b, v2.b }[8], [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: 2. 1 17.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld2 { v1.b, v2.b }[0], [x27], x28 +# CHECK-NEXT: 4. 1 32.0 0.0 0.0 ld2 { v1.b, v2.b }[8], [x27], x28 +# CHECK-NEXT: 1 16.6 0.2 0.0 + +# CHECK: [22] Code Region - G23 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total uOps: 2000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 0.50 +# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: Block RThroughput: 2.5 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 012 + +# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.h, v2.h }[0], [x27], #4 +# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: [0,2] D================eeeeeeeeER . . . . ld2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: [0,3] D========================eeeeeeeeER. . . ld2 { v1.h, v2.h }[4], [x27], x28 +# CHECK-NEXT: [0,4] .D===============================eeeeeeeeER ld2 { v1.s, v2.s }[0], [x27], #8 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.h, v2.h }[0], [x27], #4 +# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: 2. 1 17.0 0.0 0.0 ld2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: 3. 1 25.0 0.0 0.0 ld2 { v1.h, v2.h }[4], [x27], x28 +# CHECK-NEXT: 4. 1 32.0 0.0 0.0 ld2 { v1.s, v2.s }[0], [x27], #8 +# CHECK-NEXT: 1 16.8 0.2 0.0 + +# CHECK: [23] Code Region - G24 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 2603 +# CHECK-NEXT: Total uOps: 2000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 0.77 +# CHECK-NEXT: IPC: 0.19 +# CHECK-NEXT: Block RThroughput: 2.5 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 012345678 + +# CHECK: [0,0] DeeeeeeeeER . . . . ld2 { v1.s, v2.s }[0], [x27], x28 +# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . ld2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: [0,2] D================eeeeeeeeER . ld2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: [0,3] D=================eeeeeeeeER. ld2r { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: [0,4] .D=================eeeeeeeeER ld2r { v1.2d, v2.2d }, [x27], #16 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.s, v2.s }[0], [x27], x28 +# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: 2. 1 17.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: 3. 1 18.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: 4. 1 18.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16 +# CHECK-NEXT: 1 12.6 0.2 0.0 + +# CHECK: [24] Code Region - G25 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 510 +# CHECK-NEXT: Total uOps: 2000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.92 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 2.5 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld2r { v1.2s, v2.2s }, [x27], #8 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld2r { v1.4h, v2.4h }, [x27], #4 +# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld2r { v1.4s, v2.4s }, [x27], #8 +# CHECK-NEXT: [0,3] D===eeeeeeeeER. ld2r { v1.8b, v2.8b }, [x27], #2 +# CHECK-NEXT: [0,4] .D===eeeeeeeeER ld2r { v1.8h, v2.8h }, [x27], #4 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.2s, v2.2s }, [x27], #8 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4 +# CHECK-NEXT: 1 2.8 0.2 0.0 + +# CHECK: [25] Code Region - G26 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 510 +# CHECK-NEXT: Total uOps: 2000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.92 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 2.5 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld2r { v1.16b, v2.16b }, [x27], #2 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld2r { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld2r { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,3] D===eeeeeeeeER. ld2r { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,4] .D===eeeeeeeeER ld2r { v1.4h, v2.4h }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.16b, v2.16b }, [x27], #2 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 1 2.8 0.2 0.0 + +# CHECK: [26] Code Region - G27 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 510 +# CHECK-NEXT: Total uOps: 2300 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 4.51 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 2.8 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld2r { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld2r { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld2r { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,3] D===eeeeeeeeER. ld2r { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,4] .D===eeeeeeeeER ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: 1 2.8 0.2 0.0 + +# CHECK: [27] Code Region - G28 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 510 +# CHECK-NEXT: Total uOps: 3200 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 6.27 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 4.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 1 2.2 0.2 0.0 + +# CHECK: [28] Code Region - G29 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 510 +# CHECK-NEXT: Total uOps: 3300 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 6.47 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 4.3 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 1 2.2 0.2 0.0 + +# CHECK: [29] Code Region - G30 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 1910 +# CHECK-NEXT: Total uOps: 3200 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 1.68 +# CHECK-NEXT: IPC: 0.26 +# CHECK-NEXT: Block RThroughput: 4.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 012345678 + +# CHECK: [0,0] DeeeeeeeeER . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,2] .D=eeeeeeeeER . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,3] .D=========eeeeeeeeER . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: [0,4] . D================eeeeeeeeER ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: 3. 1 10.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: 1 6.4 0.2 0.0 + +# CHECK: [30] Code Region - G31 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total uOps: 3000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: Block RThroughput: 3.8 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 012 + +# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.b, v2.b, v3.b }[0], [x27], x28 +# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: [0,2] .D===============eeeeeeeeER . . . . ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 +# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: [0,4] . D==============================eeeeeeeeER ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], x28 +# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: 2. 1 16.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], #6 +# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: 4. 1 31.0 0.0 0.0 ld3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: 1 16.2 0.2 0.0 + +# CHECK: [31] Code Region - G32 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total uOps: 3000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 0.75 +# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: Block RThroughput: 3.8 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 012 + +# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.h, v2.h, v3.h }[4], [x27], x28 +# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: [0,2] .D===============eeeeeeeeER . . . . ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 +# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: [0,4] . D==============================eeeeeeeeER ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.h, v2.h, v3.h }[4], [x27], x28 +# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: 2. 1 16.0 0.0 0.0 ld3 { v1.s, v2.s, v3.s }[0], [x27], x28 +# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: 4. 1 31.0 0.0 0.0 ld3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: 1 16.2 0.2 0.0 + +# CHECK: [32] Code Region - G33 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 510 +# CHECK-NEXT: Total uOps: 3200 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 6.27 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 4.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 +# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 +# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 +# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 +# CHECK-NEXT: 1 2.2 0.2 0.0 + +# CHECK: [33] Code Region - G34 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 510 +# CHECK-NEXT: Total uOps: 3300 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 6.47 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 4.3 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 +# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 +# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 1 2.2 0.2 0.0 + +# CHECK: [34] Code Region - G35 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 510 +# CHECK-NEXT: Total uOps: 3200 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 6.27 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 4.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 1 2.2 0.2 0.0 + +# CHECK: [35] Code Region - G36 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 710 +# CHECK-NEXT: Total uOps: 4500 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 6.34 +# CHECK-NEXT: IPC: 0.70 +# CHECK-NEXT: Block RThroughput: 7.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER .. ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,1] .DeeeeeeeeeER .. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,2] . DeeeeeeeeER .. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,3] . D=eeeeeeeeER .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,4] . D==eeeeeeeeeER ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: 4. 1 3.0 1.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: 1 1.6 0.4 0.0 + +# CHECK: [36] Code Region - G37 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 810 +# CHECK-NEXT: Total uOps: 4900 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 6.05 +# CHECK-NEXT: IPC: 0.62 +# CHECK-NEXT: Block RThroughput: 8.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234567 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,1] .DeeeeeeeeeER . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,2] . DeeeeeeeeeER . . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,3] . DeeeeeeeeeER. . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,4] . D===eeeeeeeeER ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: 4. 1 4.0 3.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 1 1.6 0.8 0.0 + +# CHECK: [37] Code Region - G38 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 809 +# CHECK-NEXT: Total uOps: 4900 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 6.06 +# CHECK-NEXT: IPC: 0.62 +# CHECK-NEXT: Block RThroughput: 8.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,1] .DeeeeeeeeeER .. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,2] . DeeeeeeeeER .. ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,3] . DeeeeeeeeeER.. ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,4] . D=eeeeeeeeeER ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 1 1.2 0.4 0.0 + +# CHECK: [38] Code Region - G39 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total uOps: 4000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 1.00 +# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: Block RThroughput: 5.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 012 + +# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 +# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: [0,2] .D===============eeeeeeeeER . . . . ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 +# CHECK-NEXT: [0,4] . D==============================eeeeeeeeER ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 +# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: 2. 1 16.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 +# CHECK-NEXT: 4. 1 31.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: 1 16.2 0.2 0.0 + +# CHECK: [39] Code Region - G40 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total uOps: 4000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 1.00 +# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: Block RThroughput: 5.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 012 + +# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 +# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: [0,2] .D===============eeeeeeeeER . . . . ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 +# CHECK-NEXT: [0,4] . D==============================eeeeeeeeER ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 +# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: 2. 1 16.0 0.0 0.0 ld4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 +# CHECK-NEXT: 4. 1 31.0 0.0 0.0 ld4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: 1 16.2 0.2 0.0 + +# CHECK: [40] Code Region - G41 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 1903 +# CHECK-NEXT: Total uOps: 4100 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 2.15 +# CHECK-NEXT: IPC: 0.26 +# CHECK-NEXT: Block RThroughput: 5.3 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 01 + +# CHECK: [0,0] DeeeeeeeeER . .. ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +# CHECK-NEXT: [0,1] D========eeeeeeeeER .. ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: [0,2] .D========eeeeeeeeER.. ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,3] . D========eeeeeeeeER. ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 +# CHECK-NEXT: [0,4] . D========eeeeeeeeER ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: 3. 1 9.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 +# CHECK-NEXT: 4. 1 9.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 +# CHECK-NEXT: 1 7.4 0.2 0.0 + +# CHECK: [41] Code Region - G42 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 659 +# CHECK-NEXT: Total uOps: 4300 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 6.53 +# CHECK-NEXT: IPC: 0.76 +# CHECK-NEXT: Block RThroughput: 6.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 012345 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 +# CHECK-NEXT: [0,1] .DeeeeeeeeER . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 +# CHECK-NEXT: [0,2] . DeeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 +# CHECK-NEXT: [0,3] . DeeeeeeeeER . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 +# CHECK-NEXT: [0,4] . D=eeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 +# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 +# CHECK-NEXT: 1 1.2 0.4 0.0 + +# CHECK: [42] Code Region - G43 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 610 +# CHECK-NEXT: Total uOps: 4200 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 6.89 +# CHECK-NEXT: IPC: 0.82 +# CHECK-NEXT: Block RThroughput: 5.7 + +# CHECK: Timeline view: +# CHECK-NEXT: 012345 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: [0,1] .DeeeeeeeeER . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,2] . DeeeeeeeeER . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,3] . D=eeeeeeeeER . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 4. 1 3.0 1.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 1 1.6 0.4 0.0 + +# CHECK: [43] Code Region - G44 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 508 +# CHECK-NEXT: Total uOps: 3400 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 6.69 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 4.3 + +# CHECK: Timeline view: +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,1] .DeeeeeeeeER. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,2] . DeeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,3] . D=eeeeeeE-R ldp s1, s2, [x27], #248 +# CHECK-NEXT: [0,4] . D=eeeeeeER ldp d1, d2, [x27], #496 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 1.0 ldp s1, s2, [x27], #248 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ldp d1, d2, [x27], #496 +# CHECK-NEXT: 1 1.4 0.2 0.2 + +# CHECK: [44] Code Region - G45 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 507 +# CHECK-NEXT: Total uOps: 2300 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 4.54 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 2.3 + +# CHECK: Timeline view: +# CHECK-NEXT: 01 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER .. ldp q1, q2, [x27], #992 +# CHECK-NEXT: [0,1] D=eeeeeeER.. ldp s1, s2, [x27, #248]! +# CHECK-NEXT: [0,2] D==eeeeeeER. ldp d1, d2, [x27, #496]! +# CHECK-NEXT: [0,3] .D==eeeeeeER ldp q1, q2, [x27, #992]! +# CHECK-NEXT: [0,4] .D===eeeeE-R ldp w1, w2, [x27], #248 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp q1, q2, [x27], #992 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldp s1, s2, [x27, #248]! +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldp d1, d2, [x27, #496]! +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ldp q1, q2, [x27, #992]! +# CHECK-NEXT: 4. 1 4.0 0.0 1.0 ldp w1, w2, [x27], #248 +# CHECK-NEXT: 1 2.6 0.2 0.2 + +# CHECK: [45] Code Region - G46 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 507 +# CHECK-NEXT: Total uOps: 2100 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 4.14 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 3.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 01 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER .. ldp x1, x2, [x27], #496 +# CHECK-NEXT: [0,1] D=eeeeER .. ldp w1, w2, [x27, #248]! +# CHECK-NEXT: [0,2] D==eeeeER .. ldp x1, x2, [x27, #496]! +# CHECK-NEXT: [0,3] D===eeeeeER. ldpsw x1, x2, [x27], #248 +# CHECK-NEXT: [0,4] .D===eeeeeER ldpsw x1, x2, [x27, #248]! + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp x1, x2, [x27], #496 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldp w1, w2, [x27, #248]! +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldp x1, x2, [x27, #496]! +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ldpsw x1, x2, [x27], #248 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ldpsw x1, x2, [x27, #248]! +# CHECK-NEXT: 1 2.8 0.2 0.0 + +# CHECK: [46] Code Region - G47 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 508 +# CHECK-NEXT: Total uOps: 1500 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 2.95 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 1.7 + +# CHECK: Timeline view: +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ldr b1, [x27], #254 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ldr h1, [x27], #254 +# CHECK-NEXT: [0,2] D==eeeeeeER . ldr s1, [x27], #254 +# CHECK-NEXT: [0,3] D===eeeeeeER. ldr d1, [x27], #254 +# CHECK-NEXT: [0,4] D====eeeeeeER ldr q1, [x27], #254 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27], #254 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldr h1, [x27], #254 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldr s1, [x27], #254 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ldr d1, [x27], #254 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ldr q1, [x27], #254 +# CHECK-NEXT: 1 3.0 0.2 0.0 + +# CHECK: [47] Code Region - G48 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 508 +# CHECK-NEXT: Total uOps: 1500 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 2.95 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 1.7 + +# CHECK: Timeline view: +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . ldr b1, [x27, #254]! +# CHECK-NEXT: [0,1] D=eeeeeeER. . ldr h1, [x27, #254]! +# CHECK-NEXT: [0,2] D==eeeeeeER . ldr s1, [x27, #254]! +# CHECK-NEXT: [0,3] D===eeeeeeER. ldr d1, [x27, #254]! +# CHECK-NEXT: [0,4] D====eeeeeeER ldr q1, [x27, #254]! + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27, #254]! +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldr h1, [x27, #254]! +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldr s1, [x27, #254]! +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ldr d1, [x27, #254]! +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ldr q1, [x27, #254]! +# CHECK-NEXT: 1 3.0 0.2 0.0 + +# CHECK: [48] Code Region - G49 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 506 +# CHECK-NEXT: Total uOps: 1000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 1.98 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 1.7 + +# CHECK: Timeline view: +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . ldr w1, [x27], #254 +# CHECK-NEXT: [0,1] D=eeeeER . ldr x1, [x27], #254 +# CHECK-NEXT: [0,2] D==eeeeER . ldr w1, [x27, #254]! +# CHECK-NEXT: [0,3] D===eeeeER. ldr x1, [x27, #254]! +# CHECK-NEXT: [0,4] D====eeeeER ldrb w1, [x27], #254 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr w1, [x27], #254 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldr x1, [x27], #254 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldr w1, [x27, #254]! +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ldr x1, [x27, #254]! +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ldrb w1, [x27], #254 +# CHECK-NEXT: 1 3.0 0.2 0.0 + +# CHECK: [49] Code Region - G50 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 506 +# CHECK-NEXT: Total uOps: 1000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 1.98 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 1.7 + +# CHECK: Timeline view: +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . ldrb w1, [x27, #254]! +# CHECK-NEXT: [0,1] D=eeeeER . ldrh w1, [x27], #254 +# CHECK-NEXT: [0,2] D==eeeeER . ldrh w1, [x27, #254]! +# CHECK-NEXT: [0,3] D===eeeeER. ldrsb w1, [x27], #254 +# CHECK-NEXT: [0,4] D====eeeeER ldrsb x1, [x27], #254 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrb w1, [x27, #254]! +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldrh w1, [x27], #254 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldrh w1, [x27, #254]! +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ldrsb w1, [x27], #254 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ldrsb x1, [x27], #254 +# CHECK-NEXT: 1 3.0 0.2 0.0 + +# CHECK: [50] Code Region - G51 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 506 +# CHECK-NEXT: Total uOps: 1000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 1.98 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 1.7 + +# CHECK: Timeline view: +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . ldrsb w1, [x27, #254]! +# CHECK-NEXT: [0,1] D=eeeeER . ldrsb x1, [x27, #254]! +# CHECK-NEXT: [0,2] D==eeeeER . ldrsh w1, [x27], #254 +# CHECK-NEXT: [0,3] D===eeeeER. ldrsh x1, [x27], #254 +# CHECK-NEXT: [0,4] D====eeeeER ldrsh w1, [x27, #254]! + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsb w1, [x27, #254]! +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldrsb x1, [x27, #254]! +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldrsh w1, [x27], #254 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ldrsh x1, [x27], #254 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ldrsh w1, [x27, #254]! +# CHECK-NEXT: 1 3.0 0.2 0.0 + +# CHECK: [51] Code Region - G52 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total uOps: 1200 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 2.38 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 1.0 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 012345678 + +# CHECK: [0,0] DeeeeER . ldrsh x1, [x27, #254]! +# CHECK-NEXT: [0,1] D=eeeeER. ldrsw x1, [x27], #254 +# CHECK-NEXT: [0,2] D==eeeeER ldrsw x1, [x27, #254]! +# CHECK-NEXT: [0,3] D===eeE-R st1 { v1.1d }, [x27], #8 +# CHECK-NEXT: [0,4] D====eeER st1 { v1.2d }, [x27], #16 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldrsh x1, [x27, #254]! +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldrsw x1, [x27], #254 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldrsw x1, [x27, #254]! +# CHECK-NEXT: 3. 1 4.0 0.0 1.0 st1 { v1.1d }, [x27], #8 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.2d }, [x27], #16 +# CHECK-NEXT: 1 3.0 0.2 0.2 + +# CHECK: [52] Code Region - G53 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total uOps: 1500 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 2.98 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 2.5 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 012345678 + +# CHECK: [0,0] DeeER. . st1 { v1.2s }, [x27], #8 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.4h }, [x27], #8 +# CHECK-NEXT: [0,2] D==eeER . st1 { v1.4s }, [x27], #16 +# CHECK-NEXT: [0,3] D===eeER. st1 { v1.8b }, [x27], #8 +# CHECK-NEXT: [0,4] D====eeER st1 { v1.8h }, [x27], #16 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s }, [x27], #8 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.4h }, [x27], #8 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.4s }, [x27], #16 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.8b }, [x27], #8 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.8h }, [x27], #16 +# CHECK-NEXT: 1 3.0 0.2 0.0 + +# CHECK: [53] Code Region - G54 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total uOps: 1500 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 2.98 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 2.5 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 012345678 + +# CHECK: [0,0] DeeER. . st1 { v1.16b }, [x27], #16 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.1d }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeER . st1 { v1.2d }, [x27], x28 +# CHECK-NEXT: [0,3] D===eeER. st1 { v1.2s }, [x27], x28 +# CHECK-NEXT: [0,4] D====eeER st1 { v1.4h }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b }, [x27], #16 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.1d }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.2d }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.2s }, [x27], x28 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.4h }, [x27], x28 +# CHECK-NEXT: 1 3.0 0.2 0.0 + +# CHECK: [54] Code Region - G55 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total uOps: 1500 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 2.98 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 2.5 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 012345678 + +# CHECK: [0,0] DeeER. . st1 { v1.4s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.8b }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeER . st1 { v1.8h }, [x27], x28 +# CHECK-NEXT: [0,3] D===eeER. st1 { v1.16b }, [x27], x28 +# CHECK-NEXT: [0,4] D====eeER st1 { v1.1d, v2.1d }, [x27], #16 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s }, [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.8b }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.16b }, [x27], x28 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: 1 3.0 0.2 0.0 + +# CHECK: [55] Code Region - G56 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total uOps: 1900 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.77 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 3.5 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 012345678 + +# CHECK: [0,0] DeeER. . st1 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,2] D==eeER . st1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,3] D===eeER. st1 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,4] .D===eeER st1 { v1.8b, v2.8b }, [x27], #16 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 1 2.8 0.2 0.0 + +# CHECK: [56] Code Region - G57 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total uOps: 2100 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 4.17 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 4.0 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 012345678 + +# CHECK: [0,0] DeeER. . st1 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,2] D==eeER . st1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: [0,3] .D==eeER. st1 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,4] .D===eeER st1 { v1.2s, v2.2s }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 1 2.6 0.2 0.0 + +# CHECK: [57] Code Region - G58 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total uOps: 2100 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 4.17 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 4.0 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 012345678 + +# CHECK: [0,0] DeeER. . st1 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeER . st1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,3] D===eeER. st1 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,4] .D===eeER st1 { v1.16b, v2.16b }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 1 2.8 0.2 0.0 + +# CHECK: [58] Code Region - G59 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 703 +# CHECK-NEXT: Total uOps: 2900 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 4.13 +# CHECK-NEXT: IPC: 0.71 +# CHECK-NEXT: Block RThroughput: 6.0 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeER. . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,2] .D=eeER . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,3] .D===eeER. st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,4] . D===eeER st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: 1 2.6 0.4 0.0 + +# CHECK: [59] Code Region - G60 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 703 +# CHECK-NEXT: Total uOps: 3100 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 4.41 +# CHECK-NEXT: IPC: 0.71 +# CHECK-NEXT: Block RThroughput: 6.5 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeER. . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,2] .D=eeER . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,3] .D===eeER. st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: [0,4] . D===eeER st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 1 2.6 0.4 0.0 + +# CHECK: [60] Code Region - G61 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 703 +# CHECK-NEXT: Total uOps: 2900 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 4.13 +# CHECK-NEXT: IPC: 0.71 +# CHECK-NEXT: Block RThroughput: 6.0 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeER. . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,2] .D=eeER . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,3] .D==eeER . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,4] . D===eeER st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: 4. 1 4.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 1 2.4 0.4 0.0 + +# CHECK: [61] Code Region - G62 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 704 +# CHECK-NEXT: Total uOps: 3100 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 4.40 +# CHECK-NEXT: IPC: 0.71 +# CHECK-NEXT: Block RThroughput: 6.5 + +# CHECK: Timeline view: +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeER. . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,2] .D==eeER . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,3] .D===eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,4] . D====eeER st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: 4. 1 5.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: 1 3.0 0.6 0.0 + +# CHECK: [62] Code Region - G63 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 804 +# CHECK-NEXT: Total uOps: 3700 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 4.60 +# CHECK-NEXT: IPC: 0.62 +# CHECK-NEXT: Block RThroughput: 8.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 01 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeER. .. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,1] D=eeER .. st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,2] .D==eeER .. st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,3] . D==eeER .. st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,4] . D=====eeER st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: 4. 1 6.0 2.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: 1 3.0 0.8 0.0 + +# CHECK: [63] Code Region - G64 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 703 +# CHECK-NEXT: Total uOps: 3300 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 4.69 +# CHECK-NEXT: IPC: 0.71 +# CHECK-NEXT: Block RThroughput: 7.0 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeER. . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,2] .D==eeER . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,3] .D===eeER. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,4] . D===eeER st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 1 2.8 0.4 0.0 + +# CHECK: [64] Code Region - G65 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 706 +# CHECK-NEXT: Total uOps: 3000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 4.25 +# CHECK-NEXT: IPC: 0.71 +# CHECK-NEXT: Block RThroughput: 7.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeER. . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,1] .DeeER . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,2] .D===eeeeER . st1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: [0,3] . D===eeeeER. st1 { v1.b }[8], [x27], #1 +# CHECK-NEXT: [0,4] . D====eeeeER st1 { v1.b }[0], [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 2. 1 4.0 2.0 0.0 st1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.b }[8], [x27], #1 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.b }[0], [x27], x28 +# CHECK-NEXT: 1 3.0 0.6 0.0 + +# CHECK: [65] Code Region - G66 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 506 +# CHECK-NEXT: Total uOps: 2000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.95 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 5.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . st1 { v1.b }[8], [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeER . st1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: [0,2] D==eeeeER . st1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: [0,3] D===eeeeER. st1 { v1.h }[0], [x27], x28 +# CHECK-NEXT: [0,4] .D===eeeeER st1 { v1.h }[4], [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.b }[8], [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.h }[0], [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.h }[4], [x27], x28 +# CHECK-NEXT: 1 2.8 0.2 0.0 + +# CHECK: [66] Code Region - G67 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 605 +# CHECK-NEXT: Total uOps: 2300 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.80 +# CHECK-NEXT: IPC: 0.83 +# CHECK-NEXT: Block RThroughput: 6.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . st1 { v1.s }[0], [x27], #4 +# CHECK-NEXT: [0,1] D=eeeeER . st1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeER . st1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: [0,3] D===eeeeER. st1 { v1.d }[0], [x27], x28 +# CHECK-NEXT: [0,4] .D===eeeeER st2 { v1.2d, v2.2d }, [x27], #32 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.s }[0], [x27], #4 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.d }[0], [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: 1 2.8 0.2 0.0 + +# CHECK: [67] Code Region - G68 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 705 +# CHECK-NEXT: Total uOps: 2600 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.69 +# CHECK-NEXT: IPC: 0.71 +# CHECK-NEXT: Block RThroughput: 7.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 01 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER .. st2 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,1] D=eeeeER .. st2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,2] D==eeeeER .. st2 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,3] .D==eeeeER.. st2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,4] .D====eeeeER st2 { v1.8h, v2.8h }, [x27], #32 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 4. 1 5.0 1.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: 1 2.8 0.4 0.0 + +# CHECK: [68] Code Region - G69 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 805 +# CHECK-NEXT: Total uOps: 2900 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.60 +# CHECK-NEXT: IPC: 0.62 +# CHECK-NEXT: Block RThroughput: 8.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . . st2 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,1] D=eeeeER . . st2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,2] .D===eeeeER . st2 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,3] .D====eeeeER. st2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,4] .D=====eeeeER st2 { v1.4s, v2.4s }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 2. 1 4.0 2.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 3. 1 5.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 4. 1 6.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 1 3.6 0.6 0.0 + +# CHECK: [69] Code Region - G70 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 706 +# CHECK-NEXT: Total uOps: 2600 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.68 +# CHECK-NEXT: IPC: 0.71 +# CHECK-NEXT: Block RThroughput: 7.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . . st2 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeER . . st2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,2] .D=eeeeER . . st2 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,3] .D====eeeeER. st2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: [0,4] .D=====eeeeER st2 { v1.b, v2.b }[8], [x27], #2 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 3. 1 5.0 2.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: 4. 1 6.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: 1 3.2 0.6 0.0 + +# CHECK: [70] Code Region - G71 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 506 +# CHECK-NEXT: Total uOps: 2000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.95 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 5.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . st2 { v1.b, v2.b }[0], [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeER . st2 { v1.b, v2.b }[8], [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeER . st2 { v1.h, v2.h }[0], [x27], #4 +# CHECK-NEXT: [0,3] D===eeeeER. st2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: [0,4] .D===eeeeER st2 { v1.h, v2.h }[0], [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.b, v2.b }[0], [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: 1 2.8 0.2 0.0 + +# CHECK: [71] Code Region - G72 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 506 +# CHECK-NEXT: Total uOps: 2000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.95 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 5.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . st2 { v1.h, v2.h }[4], [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeER . st2 { v1.s, v2.s }[0], [x27], #8 +# CHECK-NEXT: [0,2] D==eeeeER . st2 { v1.s, v2.s }[0], [x27], x28 +# CHECK-NEXT: [0,3] D===eeeeER. st2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: [0,4] .D===eeeeER st2 { v1.d, v2.d }[0], [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.h, v2.h }[4], [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: 1 2.8 0.2 0.0 + +# CHECK: [72] Code Region - G73 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 807 +# CHECK-NEXT: Total uOps: 3000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.72 +# CHECK-NEXT: IPC: 0.62 +# CHECK-NEXT: Block RThroughput: 7.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeER . . . st2g x26, [x27], #4064 +# CHECK-NEXT: [0,1] D=eER. . . st2g x26, [x27, #4064]! +# CHECK-NEXT: [0,2] D==eeeeeeER . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,3] .D==eeeeeER . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,4] .D======eeeeeER st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2g x26, [x27], #4064 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st2g x26, [x27, #4064]! +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: 4. 1 7.0 3.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: 1 3.2 0.8 0.0 + +# CHECK: [73] Code Region - G74 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 1405 +# CHECK-NEXT: Total uOps: 4700 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.35 +# CHECK-NEXT: IPC: 0.36 +# CHECK-NEXT: Block RThroughput: 14.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,1] .DeeeeeER . . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,2] . D===eeeeeeER . . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,3] . D===eeeeeeER. . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,4] . D=======eeeeeeER st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: 2. 1 4.0 3.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: 4. 1 8.0 4.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 1 3.6 1.6 0.0 + +# CHECK: [74] Code Region - G75 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 1206 +# CHECK-NEXT: Total uOps: 4100 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.40 +# CHECK-NEXT: IPC: 0.41 +# CHECK-NEXT: Block RThroughput: 12.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234567 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeER . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeER . . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,2] .D===eeeeeeER . . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,3] . D===eeeeeER . . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,4] . D======eeeeeeER st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 2. 1 4.0 2.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: 4. 1 7.0 3.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 1 3.6 1.2 0.0 + +# CHECK: [75] Code Region - G76 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 1106 +# CHECK-NEXT: Total uOps: 3800 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.44 +# CHECK-NEXT: IPC: 0.45 +# CHECK-NEXT: Block RThroughput: 11.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . .. st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,1] .DeeeeeER . .. st3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: [0,2] .D====eeeeeER .. st3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: [0,3] . D====eeeeeER .. st3 { v1.b, v2.b, v3.b }[0], [x27], x28 +# CHECK-NEXT: [0,4] . D=======eeeeeER st3 { v1.b, v2.b, v3.b }[8], [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: 2. 1 5.0 3.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: 3. 1 5.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28 +# CHECK-NEXT: 4. 1 8.0 2.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: 1 4.0 1.2 0.0 + +# CHECK: [76] Code Region - G77 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 1005 +# CHECK-NEXT: Total uOps: 3500 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.48 +# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: Block RThroughput: 10.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 012345 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeER . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6 +# CHECK-NEXT: [0,1] D=eeeeeER . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: [0,2] .D===eeeeeER . st3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: [0,3] .D====eeeeeER . st3 { v1.h, v2.h, v3.h }[4], [x27], x28 +# CHECK-NEXT: [0,4] . D======eeeeeER st3 { v1.s, v2.s, v3.s }[0], [x27], #12 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], #6 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: 2. 1 4.0 2.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: 3. 1 5.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28 +# CHECK-NEXT: 4. 1 7.0 2.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: 1 3.8 1.0 0.0 + +# CHECK: [77] Code Region - G78 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 1304 +# CHECK-NEXT: Total uOps: 4300 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.30 +# CHECK-NEXT: IPC: 0.38 +# CHECK-NEXT: Block RThroughput: 13.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeER . .. st3 { v1.s, v2.s, v3.s }[0], [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeER . .. st3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: [0,2] .D===eeeeeER .. st3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: [0,3] . D===eeeeeER .. st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,4] . D=====eeeeeeER st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: 2. 1 4.0 2.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 4. 1 6.0 2.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: 1 3.4 1.0 0.0 + +# CHECK: [78] Code Region - G79 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 2399 +# CHECK-NEXT: Total uOps: 6900 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 2.88 +# CHECK-NEXT: IPC: 0.21 +# CHECK-NEXT: Block RThroughput: 24.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 012 + +# CHECK: [0,0] DeeeeeeER . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,1] .DeeeeeeeER . . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,2] . D====eeeeeeER. . . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,3] . D=========eeeeeeeER. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,4] . D========eeeeeeeER st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: 2. 1 5.0 4.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: 3. 1 10.0 5.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: 1 5.2 2.0 0.0 + +# CHECK: [79] Code Region - G80 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 1903 +# CHECK-NEXT: Total uOps: 5700 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.00 +# CHECK-NEXT: IPC: 0.26 +# CHECK-NEXT: Block RThroughput: 19.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 01 + +# CHECK: [0,0] DeeeeeER . . .. st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,1] .DeeeeeeER. . .. st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,2] . D=====eeeeeeER .. st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,3] . D=====eeeeeeeER .. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,4] . D=========eeeeeeER st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 2. 1 6.0 5.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 6.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 4. 1 10.0 4.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 1 4.8 2.0 0.0 + +# CHECK: [80] Code Region - G81 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 1658 +# CHECK-NEXT: Total uOps: 4900 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 2.96 +# CHECK-NEXT: IPC: 0.30 +# CHECK-NEXT: Block RThroughput: 16.5 + +# CHECK: Timeline view: +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 0123 + +# CHECK: [0,0] DeeeeeeeER. . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,1] . DeeeeeeeER . . . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,2] . D=========eeeeeeER . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 +# CHECK-NEXT: [0,3] . D===========eeeeeeER. st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: [0,4] . D============eeeeeeER st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: 1. 1 1.0 1.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 2. 1 10.0 9.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 +# CHECK-NEXT: 3. 1 12.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: 4. 1 13.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: 1 7.4 2.4 0.0 + +# CHECK: [81] Code Region - G82 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 757 +# CHECK-NEXT: Total uOps: 2500 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.30 +# CHECK-NEXT: IPC: 0.66 +# CHECK-NEXT: Block RThroughput: 7.5 + +# CHECK: Timeline view: +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeER. . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: [0,2] D===eeeeeeER . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 +# CHECK-NEXT: [0,3] .D===eeeeeeER . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: [0,4] .D=====eeeeeeER st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: 2. 1 4.0 1.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: 4. 1 6.0 1.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: 1 3.4 0.6 0.0 + +# CHECK: [82] Code Region - G83 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 704 +# CHECK-NEXT: Total uOps: 2700 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.84 +# CHECK-NEXT: IPC: 0.71 +# CHECK-NEXT: Block RThroughput: 7.0 + +# CHECK: Timeline view: +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeeeER . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 +# CHECK-NEXT: [0,1] D=eeeeeeER. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: [0,2] .D==eeeeER. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +# CHECK-NEXT: [0,3] .D===eeeeER st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: [0,4] . D===eE--R stg x26, [x27], #4064 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 2.0 stg x26, [x27], #4064 +# CHECK-NEXT: 1 2.8 0.4 0.4 + +# CHECK: [83] Code Region - G84 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total uOps: 1700 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.37 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 2.5 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 012345678 + +# CHECK: [0,0] DeER . . stg x26, [x27, #4064]! +# CHECK-NEXT: [0,1] D=eER. . stgp x1, x2, [x27], #992 +# CHECK-NEXT: [0,2] D==eER . stgp x1, x2, [x27, #992]! +# CHECK-NEXT: [0,3] D===eeER. stp s1, s2, [x27], #248 +# CHECK-NEXT: [0,4] .D===eeER stp d1, d2, [x27], #496 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stg x26, [x27, #4064]! +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 stgp x1, x2, [x27], #992 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 stgp x1, x2, [x27, #992]! +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 stp s1, s2, [x27], #248 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 stp d1, d2, [x27], #496 +# CHECK-NEXT: 1 2.8 0.2 0.0 + +# CHECK: [84] Code Region - G85 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 703 +# CHECK-NEXT: Total uOps: 2000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 2.84 +# CHECK-NEXT: IPC: 0.71 +# CHECK-NEXT: Block RThroughput: 3.0 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeER. . stp q1, q2, [x27], #992 +# CHECK-NEXT: [0,1] D==eeER . stp s1, s2, [x27, #248]! +# CHECK-NEXT: [0,2] D===eeER . stp d1, d2, [x27, #496]! +# CHECK-NEXT: [0,3] .D===eeER. stp q1, q2, [x27, #992]! +# CHECK-NEXT: [0,4] .D=====eER stp w1, w2, [x27], #248 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp q1, q2, [x27], #992 +# CHECK-NEXT: 1. 1 3.0 0.0 0.0 stp s1, s2, [x27, #248]! +# CHECK-NEXT: 2. 1 4.0 0.0 0.0 stp d1, d2, [x27, #496]! +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 stp q1, q2, [x27, #992]! +# CHECK-NEXT: 4. 1 6.0 0.0 0.0 stp w1, w2, [x27], #248 +# CHECK-NEXT: 1 3.6 0.2 0.0 + +# CHECK: [85] Code Region - G86 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total uOps: 1700 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.37 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 2.5 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 012345678 + +# CHECK: [0,0] DeER . . stp x1, x2, [x27], #496 +# CHECK-NEXT: [0,1] D=eER. . stp w1, w2, [x27, #248]! +# CHECK-NEXT: [0,2] D==eER . stp x1, x2, [x27, #496]! +# CHECK-NEXT: [0,3] D===eeER. str b1, [x27], #254 +# CHECK-NEXT: [0,4] .D===eeER str h1, [x27], #254 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 stp x1, x2, [x27], #496 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 stp w1, w2, [x27, #248]! +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 stp x1, x2, [x27, #496]! +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 str b1, [x27], #254 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 str h1, [x27], #254 +# CHECK-NEXT: 1 2.8 0.2 0.0 + +# CHECK: [86] Code Region - G87 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 504 +# CHECK-NEXT: Total uOps: 2000 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.97 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 2.5 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 012345678 + +# CHECK: [0,0] DeeER. . str s1, [x27], #254 +# CHECK-NEXT: [0,1] D=eeER . str d1, [x27], #254 +# CHECK-NEXT: [0,2] D==eeER . str q1, [x27], #254 +# CHECK-NEXT: [0,3] D===eeER. str b1, [x27, #254]! +# CHECK-NEXT: [0,4] .D===eeER str h1, [x27, #254]! + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27], #254 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 str d1, [x27], #254 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 str q1, [x27], #254 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 str b1, [x27, #254]! +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 str h1, [x27, #254]! +# CHECK-NEXT: 1 2.8 0.2 0.0 + +# CHECK: [87] Code Region - G88 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 503 +# CHECK-NEXT: Total uOps: 1800 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.58 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 2.5 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 01234567 + +# CHECK: [0,0] DeeER. . str s1, [x27, #254]! +# CHECK-NEXT: [0,1] D=eeER . str d1, [x27, #254]! +# CHECK-NEXT: [0,2] D==eeER. str q1, [x27, #254]! +# CHECK-NEXT: [0,3] D===eER. str w1, [x27], #254 +# CHECK-NEXT: [0,4] .D===eER str x1, [x27], #254 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str s1, [x27, #254]! +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 str d1, [x27, #254]! +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 str q1, [x27, #254]! +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 str w1, [x27], #254 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 str x1, [x27], #254 +# CHECK-NEXT: 1 2.8 0.2 0.0 + +# CHECK: [88] Code Region - G89 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 503 +# CHECK-NEXT: Total uOps: 1500 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 2.98 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 2.5 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 01234567 + +# CHECK: [0,0] DeER . . str w1, [x27, #254]! +# CHECK-NEXT: [0,1] D=eER. . str x1, [x27, #254]! +# CHECK-NEXT: [0,2] D==eER . strb w1, [x27], #254 +# CHECK-NEXT: [0,3] D===eER. strb w1, [x27, #254]! +# CHECK-NEXT: [0,4] D====eER strh w1, [x27], #254 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 str w1, [x27, #254]! +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 str x1, [x27, #254]! +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 strb w1, [x27], #254 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 strb w1, [x27, #254]! +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 strh w1, [x27], #254 +# CHECK-NEXT: 1 3.0 0.2 0.0 + +# CHECK: [89] Code Region - G90 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 500 +# CHECK-NEXT: Total Cycles: 503 +# CHECK-NEXT: Total uOps: 1500 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 2.98 +# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: Block RThroughput: 2.5 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 01234567 + +# CHECK: [0,0] DeER . . strh w1, [x27, #254]! +# CHECK-NEXT: [0,1] D=eER. . stz2g x26, [x27], #4064 +# CHECK-NEXT: [0,2] D==eER . stz2g x26, [x27, #4064]! +# CHECK-NEXT: [0,3] D===eER. stzg x26, [x27], #4064 +# CHECK-NEXT: [0,4] D====eER stzg x26, [x27, #4064]! + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 strh w1, [x27, #254]! +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 stz2g x26, [x27], #4064 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 stz2g x26, [x27, #4064]! +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 stzg x26, [x27], #4064 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 stzg x26, [x27, #4064]! +# CHECK-NEXT: 1 3.0 0.2 0.0 + +# CHECK: [90] Code Region - G91 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 200 +# CHECK-NEXT: Total Cycles: 110 +# CHECK-NEXT: Total uOps: 400 + +# CHECK: Dispatch Width: 16 +# CHECK-NEXT: uOps Per Cycle: 3.64 +# CHECK-NEXT: IPC: 1.82 +# CHECK-NEXT: Block RThroughput: 0.7 + +# CHECK: Timeline view: +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeeER . ldr x1, [x27], #254 +# CHECK-NEXT: [0,1] D====eeeeER ldr x2, [x1], #254 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr x1, [x27], #254 +# CHECK-NEXT: 1. 1 5.0 0.0 0.0 ldr x2, [x1], #254 +# CHECK-NEXT: 1 3.0 0.5 0.0