Index: llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td +++ llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td @@ -1213,7 +1213,7 @@ def : InstRW<[V2Write_5cyc_1I_3L, WriteLDHi], (instrs LDPSWi)>; // Load pair, immed post-index or immed pre-index, signed words -def : InstRW<[V2Write_5cyc_1I_3L, WriteLDHi, WriteAdr], +def : InstRW<[WriteAdr, V2Write_5cyc_1I_3L, WriteLDHi], (instregex "^LDPSW(post|pre)$")>; // Store instructions @@ -1224,7 +1224,7 @@ def : SchedAlias; def : SchedAlias; def : SchedAlias; -def : SchedAlias; // copied from A57. +def : SchedAlias; // Tag load instructions // ----------------------------------------------------------------------------- @@ -1337,7 +1337,7 @@ // Load vector reg, immed post-index // Load vector reg, immed pre-index -def : InstRW<[V2Write_6cyc_1I_1L, WriteAdr], +def : InstRW<[WriteAdr, V2Write_6cyc_1I_1L], (instregex "^LDR[BHSDQ](pre|post)$")>; // Load vector reg, unsigned immed @@ -1359,12 +1359,12 @@ // Load vector pair, immed post-index, S/D-form // Load vector pair, immed pre-index, S/D-form -def : InstRW<[V2Write_6cyc_1I_1L, WriteLDHi, WriteAdr], +def : InstRW<[WriteAdr, V2Write_6cyc_1I_1L, WriteLDHi], (instregex "^LDP[SD](pre|post)$")>; // Load vector pair, immed post-index, Q-form // Load vector pair, immed pre-index, Q-form -def : InstRW<[V2Write_6cyc_2I_2L, WriteLDHi, WriteAdr], (instrs LDPQpost, +def : InstRW<[WriteAdr, V2Write_6cyc_2I_2L, WriteLDHi], (instrs LDPQpost, LDPQpre)>; // FP store instructions @@ -1725,220 +1725,220 @@ // ASIMD load, 1 element, multiple, 1 reg, D-form def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_6cyc_1L, WriteAdr], +def : InstRW<[WriteAdr, V2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>; // ASIMD load, 1 element, multiple, 1 reg, Q-form def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_6cyc_1L, WriteAdr], +def : InstRW<[WriteAdr, V2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 2 reg, D-form def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_6cyc_2L, WriteAdr], +def : InstRW<[WriteAdr, V2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; // ASIMD load, 1 element, multiple, 2 reg, Q-form def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_6cyc_2L, WriteAdr], +def : InstRW<[WriteAdr, V2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 3 reg, D-form def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_6cyc_3L, WriteAdr], +def : InstRW<[WriteAdr, V2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; // ASIMD load, 1 element, multiple, 3 reg, Q-form def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_6cyc_3L, WriteAdr], +def : InstRW<[WriteAdr, V2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, multiple, 4 reg, D-form def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_7cyc_4L, WriteAdr], +def : InstRW<[WriteAdr, V2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; // ASIMD load, 1 element, multiple, 4 reg, Q-form def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_7cyc_4L, WriteAdr], +def : InstRW<[WriteAdr, V2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; // ASIMD load, 1 element, one lane, B/H/S // ASIMD load, 1 element, one lane, D def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)$")>; -def : InstRW<[V2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)_POST$")>; // ASIMD load, 1 element, all lanes, D-form, B/H/S // ASIMD load, 1 element, all lanes, D-form, D def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>; // ASIMD load, 1 element, all lanes, Q-form def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; // ASIMD load, 2 element, multiple, D-form, B/H/S def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)$")>; -def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)_POST$")>; // ASIMD load, 2 element, multiple, Q-form, B/H/S // ASIMD load, 2 element, multiple, Q-form, D def : InstRW<[V2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_8cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>; // ASIMD load, 2 element, one lane, B/H // ASIMD load, 2 element, one lane, S // ASIMD load, 2 element, one lane, D def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)$")>; -def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)_POST$")>; // ASIMD load, 2 element, all lanes, D-form, B/H/S // ASIMD load, 2 element, all lanes, D-form, D def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>; // ASIMD load, 2 element, all lanes, Q-form def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; // ASIMD load, 3 element, multiple, D-form, B/H/S def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)$")>; -def : InstRW<[V2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)_POST$")>; // ASIMD load, 3 element, multiple, Q-form, B/H/S // ASIMD load, 3 element, multiple, Q-form, D def : InstRW<[V2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>; // ASIMD load, 3 element, one lane, B/H // ASIMD load, 3 element, one lane, S // ASIMD load, 3 element, one lane, D def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)$")>; -def : InstRW<[V2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)_POST$")>; // ASIMD load, 3 element, all lanes, D-form, B/H/S // ASIMD load, 3 element, all lanes, D-form, D def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>; // ASIMD load, 3 element, all lanes, Q-form, B/H/S // ASIMD load, 3 element, all lanes, Q-form, D def : InstRW<[V2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>; // ASIMD load, 4 element, multiple, D-form, B/H/S def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)$")>; -def : InstRW<[V2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; // ASIMD load, 4 element, multiple, Q-form, B/H/S // ASIMD load, 4 element, multiple, Q-form, D def : InstRW<[V2Write_9cyc_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_9cyc_6L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_9cyc_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; // ASIMD load, 4 element, one lane, B/H // ASIMD load, 4 element, one lane, S // ASIMD load, 4 element, one lane, D def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)$")>; -def : InstRW<[V2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)_POST$")>; // ASIMD load, 4 element, all lanes, D-form, B/H/S // ASIMD load, 4 element, all lanes, D-form, D -def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>; // ASIMD load, 4 element, all lanes, Q-form, B/H/S // ASIMD load, 4 element, all lanes, Q-form, D -def : InstRW<[V2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_8cyc_4L_4V, WriteAdr], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[V2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>; +def : InstRW<[WriteAdr, V2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>; // ASIMD store instructions // ----------------------------------------------------------------------------- // ASIMD store, 1 element, multiple, 1 reg, D-form def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_2cyc_1L01_1V01, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; // ASIMD store, 1 element, multiple, 1 reg, Q-form def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_2cyc_1L01_1V01, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 2 reg, D-form def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_2cyc_1L01_1V01, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; // ASIMD store, 1 element, multiple, 2 reg, Q-form def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_2cyc_2L01_2V01, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 3 reg, D-form def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_2cyc_2L01_2V01, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; // ASIMD store, 1 element, multiple, 3 reg, Q-form def : InstRW<[V2Write_2cyc_3L01_3V01], (instregex "ST1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_2cyc_3L01_3V01, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_2cyc_3L01_3V01], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, multiple, 4 reg, D-form def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[V2Write_2cyc_2L01_2V01, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; // ASIMD store, 1 element, multiple, 4 reg, Q-form def : InstRW<[V2Write_2cyc_4L01_4V01], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_2cyc_4L01_4V01, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_2cyc_4L01_4V01], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; // ASIMD store, 1 element, one lane, B/H/S // ASIMD store, 1 element, one lane, D def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST1i(8|16|32|64)$")>; -def : InstRW<[V2Write_4cyc_1L01_2V01, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST1i(8|16|32|64)_POST$")>; // ASIMD store, 2 element, multiple, D-form, B/H/S def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST2Twov(8b|4h|2s)$")>; -def : InstRW<[V2Write_4cyc_1L01_2V01, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST2Twov(8b|4h|2s)_POST$")>; // ASIMD store, 2 element, multiple, Q-form, B/H/S // ASIMD store, 2 element, multiple, Q-form, D def : InstRW<[V2Write_4cyc_2L01_4V01], (instregex "ST2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_4cyc_2L01_4V01, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_4cyc_2L01_4V01], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; // ASIMD store, 2 element, one lane, B/H/S // ASIMD store, 2 element, one lane, D def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST2i(8|16|32|64)$")>; -def : InstRW<[V2Write_4cyc_1L01_2V01, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST2i(8|16|32|64)_POST$")>; // ASIMD store, 3 element, multiple, D-form, B/H/S def : InstRW<[V2Write_5cyc_2L01_4V01], (instregex "ST3Threev(8b|4h|2s)$")>; -def : InstRW<[V2Write_5cyc_2L01_4V01, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[WriteAdr, V2Write_5cyc_2L01_4V01], (instregex "ST3Threev(8b|4h|2s)_POST$")>; // ASIMD store, 3 element, multiple, Q-form, B/H/S // ASIMD store, 3 element, multiple, Q-form, D def : InstRW<[V2Write_6cyc_3L01_6V01], (instregex "ST3Threev(16b|8h|4s|2d)$")>; -def : InstRW<[V2Write_6cyc_3L01_6V01, WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_6cyc_3L01_6V01], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>; // ASIMD store, 3 element, one lane, B/H // ASIMD store, 3 element, one lane, S // ASIMD store, 3 element, one lane, D def : InstRW<[V2Write_5cyc_2L01_4V01], (instregex "ST3i(8|16|32|64)$")>; -def : InstRW<[V2Write_5cyc_2L01_4V01, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; +def : InstRW<[WriteAdr, V2Write_5cyc_2L01_4V01], (instregex "ST3i(8|16|32|64)_POST$")>; // ASIMD store, 4 element, multiple, D-form, B/H/S def : InstRW<[V2Write_6cyc_2L01_6V01], (instregex "ST4Fourv(8b|4h|2s)$")>; -def : InstRW<[V2Write_6cyc_2L01_6V01, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[WriteAdr, V2Write_6cyc_2L01_6V01], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; // ASIMD store, 4 element, multiple, Q-form, B/H/S def : InstRW<[V2Write_7cyc_4L01_12V01], (instregex "ST4Fourv(16b|8h|4s)$")>; -def : InstRW<[V2Write_7cyc_4L01_12V01, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[WriteAdr, V2Write_7cyc_4L01_12V01], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; // ASIMD store, 4 element, multiple, Q-form, D def : InstRW<[V2Write_5cyc_4L01_8V01], (instregex "ST4Fourv(2d)$")>; -def : InstRW<[V2Write_5cyc_4L01_8V01, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; +def : InstRW<[WriteAdr, V2Write_5cyc_4L01_8V01], (instregex "ST4Fourv(2d)_POST$")>; // ASIMD store, 4 element, one lane, B/H/S def : InstRW<[V2Write_6cyc_1L01_3V01], (instregex "ST4i(8|16|32)$")>; -def : InstRW<[V2Write_6cyc_1L01_3V01, WriteAdr], (instregex "ST4i(8|16|32)_POST$")>; +def : InstRW<[WriteAdr, V2Write_6cyc_1L01_3V01], (instregex "ST4i(8|16|32)_POST$")>; // ASIMD store, 4 element, one lane, D def : InstRW<[V2Write_4cyc_2L01_4V01], (instregex "ST4i(64)$")>; -def : InstRW<[V2Write_4cyc_2L01_4V01, WriteAdr], (instregex "ST4i(64)_POST$")>; +def : InstRW<[WriteAdr, V2Write_4cyc_2L01_4V01], (instregex "ST4i(64)_POST$")>; // Cryptography extensions // ----------------------------------------------------------------------------- Index: llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-writeback.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-writeback.s +++ llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-writeback.s @@ -725,23 +725,23 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.33 -# CHECK-NEXT: IPC: 0.17 +# CHECK-NEXT: uOps Per Cycle: 1.97 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 1.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.1d }, [x27], #8 -# CHECK-NEXT: [0,1] D======eeeeeeER. . . . . ld1 { v1.2d }, [x27], #16 -# CHECK-NEXT: [0,2] D============eeeeeeER . . . ld1 { v1.2s }, [x27], #8 -# CHECK-NEXT: [0,3] D==================eeeeeeER . . ld1 { v1.4h }, [x27], #8 -# CHECK-NEXT: [0,4] D========================eeeeeeER ld1 { v1.4s }, [x27], #16 +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.1d }, [x27], #8 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.2d }, [x27], #16 +# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.2s }, [x27], #8 +# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.4h }, [x27], #8 +# CHECK-NEXT: [0,4] D====eeeeeeER ld1 { v1.4s }, [x27], #16 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -751,33 +751,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d }, [x27], #8 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld1 { v1.2d }, [x27], #16 -# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld1 { v1.2s }, [x27], #8 -# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld1 { v1.4h }, [x27], #8 -# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld1 { v1.4s }, [x27], #16 -# CHECK-NEXT: 1 13.0 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.2d }, [x27], #16 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.2s }, [x27], #8 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.4h }, [x27], #8 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.4s }, [x27], #16 +# CHECK-NEXT: 1 3.0 0.2 0.0 # CHECK: [1] Code Region - G02 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.33 -# CHECK-NEXT: IPC: 0.17 +# CHECK-NEXT: uOps Per Cycle: 1.97 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 1.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.8b }, [x27], #8 -# CHECK-NEXT: [0,1] D======eeeeeeER. . . . . ld1 { v1.8h }, [x27], #16 -# CHECK-NEXT: [0,2] D============eeeeeeER . . . ld1 { v1.16b }, [x27], #16 -# CHECK-NEXT: [0,3] D==================eeeeeeER . . ld1 { v1.1d }, [x27], x28 -# CHECK-NEXT: [0,4] D========================eeeeeeER ld1 { v1.2d }, [x27], x28 +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b }, [x27], #8 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.8h }, [x27], #16 +# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.16b }, [x27], #16 +# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.1d }, [x27], x28 +# CHECK-NEXT: [0,4] D====eeeeeeER ld1 { v1.2d }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -787,33 +787,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b }, [x27], #8 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld1 { v1.8h }, [x27], #16 -# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld1 { v1.16b }, [x27], #16 -# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld1 { v1.1d }, [x27], x28 -# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld1 { v1.2d }, [x27], x28 -# CHECK-NEXT: 1 13.0 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.8h }, [x27], #16 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.16b }, [x27], #16 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.1d }, [x27], x28 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.2d }, [x27], x28 +# CHECK-NEXT: 1 3.0 0.2 0.0 # CHECK: [2] Code Region - G03 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.33 -# CHECK-NEXT: IPC: 0.17 +# CHECK-NEXT: uOps Per Cycle: 1.97 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 1.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.2s }, [x27], x28 -# CHECK-NEXT: [0,1] D======eeeeeeER. . . . . ld1 { v1.4h }, [x27], x28 -# CHECK-NEXT: [0,2] D============eeeeeeER . . . ld1 { v1.4s }, [x27], x28 -# CHECK-NEXT: [0,3] D==================eeeeeeER . . ld1 { v1.8b }, [x27], x28 -# CHECK-NEXT: [0,4] D========================eeeeeeER ld1 { v1.8h }, [x27], x28 +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.2s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.4h }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.4s }, [x27], x28 +# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.8b }, [x27], x28 +# CHECK-NEXT: [0,4] D====eeeeeeER ld1 { v1.8h }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -823,33 +823,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s }, [x27], x28 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld1 { v1.4h }, [x27], x28 -# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld1 { v1.4s }, [x27], x28 -# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld1 { v1.8b }, [x27], x28 -# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld1 { v1.8h }, [x27], x28 -# CHECK-NEXT: 1 13.0 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.4h }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.4s }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.8b }, [x27], x28 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.8h }, [x27], x28 +# CHECK-NEXT: 1 3.0 0.2 0.0 # CHECK: [3] Code Region - G04 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1400 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.47 -# CHECK-NEXT: IPC: 0.17 +# CHECK-NEXT: uOps Per Cycle: 2.76 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.16b }, [x27], x28 -# CHECK-NEXT: [0,1] D======eeeeeeER. . . . . ld1 { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: [0,2] D============eeeeeeER . . . ld1 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,3] D==================eeeeeeER . . ld1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,4] D========================eeeeeeER ld1 { v1.4h, v2.4h }, [x27], #16 +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.16b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,4] D====eeeeeeER ld1 { v1.4h, v2.4h }, [x27], #16 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -859,33 +859,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b }, [x27], x28 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: 1 13.0 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 1 3.0 0.2 0.0 # CHECK: [4] Code Region - G05 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.50 -# CHECK-NEXT: IPC: 0.17 +# CHECK-NEXT: uOps Per Cycle: 2.95 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 3.3 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: [0,1] D======eeeeeeER. . . . . ld1 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,2] D============eeeeeeER . . . ld1 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,3] D==================eeeeeeER . . ld1 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,4] D========================eeeeeeER ld1 { v1.1d, v2.1d }, [x27], x28 +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,4] D====eeeeeeER ld1 { v1.1d, v2.1d }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -895,33 +895,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: 1 13.0 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: 1 3.0 0.2 0.0 # CHECK: [5] Code Region - G06 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.50 -# CHECK-NEXT: IPC: 0.17 +# CHECK-NEXT: uOps Per Cycle: 2.95 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 3.3 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,1] D======eeeeeeER. . . . . ld1 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,2] D============eeeeeeER . . . ld1 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,3] D==================eeeeeeER . . ld1 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,4] D========================eeeeeeER ld1 { v1.8b, v2.8b }, [x27], x28 +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,4] D====eeeeeeER ld1 { v1.8b, v2.8b }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -931,33 +931,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 1 13.0 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 1 3.0 0.2 0.0 # CHECK: [6] Code Region - G07 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1800 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.17 +# CHECK-NEXT: uOps Per Cycle: 3.54 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 4.3 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,1] D======eeeeeeER. . . . . ld1 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,2] D============eeeeeeER . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: [0,3] D==================eeeeeeER . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,4] .D=======================eeeeeeER ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,4] .D===eeeeeeER ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -967,33 +967,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: 4. 1 24.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: 1 12.8 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: 1 2.8 0.2 0.0 # CHECK: [7] Code Region - G08 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.67 -# CHECK-NEXT: IPC: 0.17 +# CHECK-NEXT: uOps Per Cycle: 3.94 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,1] D======eeeeeeER. . . . . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,2] D============eeeeeeER . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,3] D==================eeeeeeER . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,4] .D=======================eeeeeeER ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,4] .D===eeeeeeER ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1003,33 +1003,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: 4. 1 24.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: 1 12.8 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: 1 2.8 0.2 0.0 # CHECK: [8] Code Region - G09 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.67 -# CHECK-NEXT: IPC: 0.17 +# CHECK-NEXT: uOps Per Cycle: 3.94 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: [0,1] D======eeeeeeER. . . . . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,2] D============eeeeeeER . . . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,3] D==================eeeeeeER . . ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,4] .D=======================eeeeeeER ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,3] D===eeeeeeER. ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,4] .D===eeeeeeER ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1039,33 +1039,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 4. 1 24.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 1 12.8 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 1 2.8 0.2 0.0 # CHECK: [9] Code Region - G10 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 3203 +# CHECK-NEXT: Total Cycles: 608 # CHECK-NEXT: Total uOps: 2200 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.69 -# CHECK-NEXT: IPC: 0.16 +# CHECK-NEXT: uOps Per Cycle: 3.62 +# CHECK-NEXT: IPC: 0.82 # CHECK-NEXT: Block RThroughput: 5.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 01234 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 0123 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . . . . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,1] D======eeeeeeER. . . . . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,2] D============eeeeeeER . . . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,3] .D=================eeeeeeeER . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: [0,4] .D========================eeeeeeeER ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK: [0,0] DeeeeeeER . . ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeER . ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,3] .D==eeeeeeeER. ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,4] .D===eeeeeeeER ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1075,33 +1075,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 3. 1 18.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: 1 12.8 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 1 2.6 0.2 0.0 # CHECK: [10] Code Region - G11 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 3503 +# CHECK-NEXT: Total Cycles: 675 # CHECK-NEXT: Total uOps: 2500 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.71 -# CHECK-NEXT: IPC: 0.14 +# CHECK-NEXT: uOps Per Cycle: 3.70 +# CHECK-NEXT: IPC: 0.74 # CHECK-NEXT: Block RThroughput: 6.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 01234567 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,1] D=======eeeeeeeER . . . . . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,2] D==============eeeeeeeER . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,3] .D====================eeeeeeeER . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,4] .D===========================eeeeeeeER ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK: [0,0] DeeeeeeeER. . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,1] D=eeeeeeeER . ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,2] D==eeeeeeeER . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,3] .D===eeeeeeeER. ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,4] .D====eeeeeeeER ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1111,33 +1111,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: 2. 1 15.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: 3. 1 21.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: 4. 1 28.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: 1 14.6 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 1 3.0 0.4 0.0 # CHECK: [11] Code Region - G12 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 3503 +# CHECK-NEXT: Total Cycles: 675 # CHECK-NEXT: Total uOps: 2500 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.71 -# CHECK-NEXT: IPC: 0.14 +# CHECK-NEXT: uOps Per Cycle: 3.70 +# CHECK-NEXT: IPC: 0.74 # CHECK-NEXT: Block RThroughput: 6.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 01234567 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,1] D=======eeeeeeeER . . . . . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: [0,2] D==============eeeeeeeER . . . . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,3] .D====================eeeeeeeER . . ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,4] .D===========================eeeeeeeER ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK: [0,0] DeeeeeeeER. . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,1] D=eeeeeeeER . ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeeER . ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,3] .D===eeeeeeeER. ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,4] .D====eeeeeeeER ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1147,33 +1147,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: 2. 1 15.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 3. 1 21.0 0.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 4. 1 28.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 1 14.6 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 ld1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 1 3.0 0.4 0.0 # CHECK: [12] Code Region - G13 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 3603 +# CHECK-NEXT: Total Cycles: 1210 # CHECK-NEXT: Total uOps: 2300 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.64 -# CHECK-NEXT: IPC: 0.14 +# CHECK-NEXT: uOps Per Cycle: 1.90 +# CHECK-NEXT: IPC: 0.41 # CHECK-NEXT: Block RThroughput: 5.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012345678 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 01 -# CHECK: [0,0] DeeeeeeeER. . . . . . . ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,1] D=======eeeeeeeER . . . . . ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,2] D==============eeeeeeeER . . . . ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,3] .D====================eeeeeeeER . . ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,4] .D===========================eeeeeeeeER ld1 { v1.b }[0], [x27], #1 +# CHECK: [0,0] DeeeeeeeER. . .. ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeeER . .. ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeeER . .. ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,3] .D===eeeeeeeER . .. ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,4] .D==========eeeeeeeeER ld1 { v1.b }[0], [x27], #1 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1183,11 +1183,11 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 2. 1 15.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 3. 1 21.0 0.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 4. 1 28.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: 1 14.6 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 ld1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 4. 1 11.0 0.0 0.0 ld1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: 1 4.2 0.4 0.0 # CHECK: [13] Code Region - G14 @@ -1265,23 +1265,23 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total Cycles: 1203 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.37 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 1.25 +# CHECK-NEXT: IPC: 0.42 # CHECK-NEXT: Block RThroughput: 1.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1 { v1.d }[0], [x27], x28 -# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld1r { v1.1d }, [x27], #8 -# CHECK-NEXT: [0,2] D================eeeeeeeeER . . . . ld1r { v1.2d }, [x27], #8 -# CHECK-NEXT: [0,3] D========================eeeeeeeeER. . . ld1r { v1.2s }, [x27], #4 -# CHECK-NEXT: [0,4] D================================eeeeeeeeER ld1r { v1.4h }, [x27], #2 +# CHECK: [0,0] DeeeeeeeeER . ld1 { v1.d }[0], [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld1r { v1.1d }, [x27], #8 +# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld1r { v1.2d }, [x27], #8 +# CHECK-NEXT: [0,3] D===eeeeeeeeER. ld1r { v1.2s }, [x27], #4 +# CHECK-NEXT: [0,4] D====eeeeeeeeER ld1r { v1.4h }, [x27], #2 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1291,33 +1291,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1 { v1.d }[0], [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld1r { v1.1d }, [x27], #8 -# CHECK-NEXT: 2. 1 17.0 0.0 0.0 ld1r { v1.2d }, [x27], #8 -# CHECK-NEXT: 3. 1 25.0 0.0 0.0 ld1r { v1.2s }, [x27], #4 -# CHECK-NEXT: 4. 1 33.0 0.0 0.0 ld1r { v1.4h }, [x27], #2 -# CHECK-NEXT: 1 17.0 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1r { v1.1d }, [x27], #8 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1r { v1.2d }, [x27], #8 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1r { v1.2s }, [x27], #4 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1r { v1.4h }, [x27], #2 +# CHECK-NEXT: 1 3.0 0.2 0.0 # CHECK: [16] Code Region - G17 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.37 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 2.94 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 1.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1r { v1.4s }, [x27], #4 -# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld1r { v1.8b }, [x27], #1 -# CHECK-NEXT: [0,2] D================eeeeeeeeER . . . . ld1r { v1.8h }, [x27], #2 -# CHECK-NEXT: [0,3] D========================eeeeeeeeER. . . ld1r { v1.16b }, [x27], #1 -# CHECK-NEXT: [0,4] D================================eeeeeeeeER ld1r { v1.1d }, [x27], x28 +# CHECK: [0,0] DeeeeeeeeER . ld1r { v1.4s }, [x27], #4 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld1r { v1.8b }, [x27], #1 +# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld1r { v1.8h }, [x27], #2 +# CHECK-NEXT: [0,3] D===eeeeeeeeER. ld1r { v1.16b }, [x27], #1 +# CHECK-NEXT: [0,4] D====eeeeeeeeER ld1r { v1.1d }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1327,33 +1327,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.4s }, [x27], #4 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld1r { v1.8b }, [x27], #1 -# CHECK-NEXT: 2. 1 17.0 0.0 0.0 ld1r { v1.8h }, [x27], #2 -# CHECK-NEXT: 3. 1 25.0 0.0 0.0 ld1r { v1.16b }, [x27], #1 -# CHECK-NEXT: 4. 1 33.0 0.0 0.0 ld1r { v1.1d }, [x27], x28 -# CHECK-NEXT: 1 17.0 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1r { v1.8b }, [x27], #1 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1r { v1.8h }, [x27], #2 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1r { v1.16b }, [x27], #1 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1r { v1.1d }, [x27], x28 +# CHECK-NEXT: 1 3.0 0.2 0.0 # CHECK: [17] Code Region - G18 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.37 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 2.94 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 1.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1r { v1.2d }, [x27], x28 -# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld1r { v1.2s }, [x27], x28 -# CHECK-NEXT: [0,2] D================eeeeeeeeER . . . . ld1r { v1.4h }, [x27], x28 -# CHECK-NEXT: [0,3] D========================eeeeeeeeER. . . ld1r { v1.4s }, [x27], x28 -# CHECK-NEXT: [0,4] D================================eeeeeeeeER ld1r { v1.8b }, [x27], x28 +# CHECK: [0,0] DeeeeeeeeER . ld1r { v1.2d }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld1r { v1.2s }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld1r { v1.4h }, [x27], x28 +# CHECK-NEXT: [0,3] D===eeeeeeeeER. ld1r { v1.4s }, [x27], x28 +# CHECK-NEXT: [0,4] D====eeeeeeeeER ld1r { v1.8b }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1363,33 +1363,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.2d }, [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld1r { v1.2s }, [x27], x28 -# CHECK-NEXT: 2. 1 17.0 0.0 0.0 ld1r { v1.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 25.0 0.0 0.0 ld1r { v1.4s }, [x27], x28 -# CHECK-NEXT: 4. 1 33.0 0.0 0.0 ld1r { v1.8b }, [x27], x28 -# CHECK-NEXT: 1 17.0 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1r { v1.2s }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld1r { v1.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld1r { v1.4s }, [x27], x28 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ld1r { v1.8b }, [x27], x28 +# CHECK-NEXT: 1 3.0 0.2 0.0 # CHECK: [18] Code Region - G19 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 1900 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.47 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 3.73 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld1r { v1.8h }, [x27], x28 -# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld1r { v1.16b }, [x27], x28 -# CHECK-NEXT: [0,2] D================eeeeeeeeER . . . . ld2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,3] D========================eeeeeeeeER. . . ld2 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,4] .D===============================eeeeeeeeER ld2 { v1.4h, v2.4h }, [x27], #16 +# CHECK: [0,0] DeeeeeeeeER . ld1r { v1.8h }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld1r { v1.16b }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,3] D===eeeeeeeeER. ld2 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,4] .D===eeeeeeeeER ld2 { v1.4h, v2.4h }, [x27], #16 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1399,33 +1399,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld1r { v1.8h }, [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld1r { v1.16b }, [x27], x28 -# CHECK-NEXT: 2. 1 17.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: 3. 1 25.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: 4. 1 32.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: 1 16.8 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld1r { v1.16b }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld2 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 1 2.8 0.2 0.0 # CHECK: [19] Code Region - G20 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2400 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.60 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 4.71 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,2] D================eeeeeeeeER . . . . ld2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld2 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,4] .D===============================eeeeeeeeER ld2 { v1.2d, v2.2d }, [x27], x28 +# CHECK: [0,0] DeeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld2 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,4] .D===eeeeeeeeER ld2 { v1.2d, v2.2d }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1435,33 +1435,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: 2. 1 17.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: 4. 1 32.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 1 16.6 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld2 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 1 2.6 0.2 0.0 # CHECK: [20] Code Region - G21 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2200 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.55 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 4.31 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,2] D================eeeeeeeeER . . . . ld2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld2 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,4] .D===============================eeeeeeeeER ld2 { v1.8h, v2.8h }, [x27], x28 +# CHECK: [0,0] DeeeeeeeeER . ld2 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld2 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,4] .D===eeeeeeeeER ld2 { v1.8h, v2.8h }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1471,22 +1471,22 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 2. 1 17.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 4. 1 32.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 1 16.6 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld2 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 1 2.6 0.2 0.0 # CHECK: [21] Code Region - G22 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total Cycles: 3310 # CHECK-NEXT: Total uOps: 2100 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.52 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 0.63 +# CHECK-NEXT: IPC: 0.15 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: @@ -1553,23 +1553,23 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total Cycles: 2603 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.50 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 0.77 +# CHECK-NEXT: IPC: 0.19 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 012345678 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2 { v1.s, v2.s }[0], [x27], x28 -# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld2 { v1.d, v2.d }[0], [x27], #16 -# CHECK-NEXT: [0,2] D================eeeeeeeeER . . . . ld2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: [0,3] D========================eeeeeeeeER. . . ld2r { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: [0,4] .D===============================eeeeeeeeER ld2r { v1.2d, v2.2d }, [x27], #16 +# CHECK: [0,0] DeeeeeeeeER . . . . ld2 { v1.s, v2.s }[0], [x27], x28 +# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . ld2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: [0,2] D================eeeeeeeeER . ld2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: [0,3] D=================eeeeeeeeER. ld2r { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: [0,4] .D=================eeeeeeeeER ld2r { v1.2d, v2.2d }, [x27], #16 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1581,31 +1581,31 @@ # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2 { v1.s, v2.s }[0], [x27], x28 # CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], #16 # CHECK-NEXT: 2. 1 17.0 0.0 0.0 ld2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: 3. 1 25.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: 4. 1 32.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16 -# CHECK-NEXT: 1 16.8 0.2 0.0 +# CHECK-NEXT: 3. 1 18.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: 4. 1 18.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], #16 +# CHECK-NEXT: 1 12.6 0.2 0.0 # CHECK: [24] Code Region - G25 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.50 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 3.92 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2r { v1.2s, v2.2s }, [x27], #8 -# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld2r { v1.4h, v2.4h }, [x27], #4 -# CHECK-NEXT: [0,2] D================eeeeeeeeER . . . . ld2r { v1.4s, v2.4s }, [x27], #8 -# CHECK-NEXT: [0,3] D========================eeeeeeeeER. . . ld2r { v1.8b, v2.8b }, [x27], #2 -# CHECK-NEXT: [0,4] .D===============================eeeeeeeeER ld2r { v1.8h, v2.8h }, [x27], #4 +# CHECK: [0,0] DeeeeeeeeER . ld2r { v1.2s, v2.2s }, [x27], #8 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld2r { v1.4h, v2.4h }, [x27], #4 +# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld2r { v1.4s, v2.4s }, [x27], #8 +# CHECK-NEXT: [0,3] D===eeeeeeeeER. ld2r { v1.8b, v2.8b }, [x27], #2 +# CHECK-NEXT: [0,4] .D===eeeeeeeeER ld2r { v1.8h, v2.8h }, [x27], #4 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1615,33 +1615,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.2s, v2.2s }, [x27], #8 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4 -# CHECK-NEXT: 2. 1 17.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8 -# CHECK-NEXT: 3. 1 25.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2 -# CHECK-NEXT: 4. 1 32.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4 -# CHECK-NEXT: 1 16.8 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], #4 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld2r { v1.4s, v2.4s }, [x27], #8 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], #2 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], #4 +# CHECK-NEXT: 1 2.8 0.2 0.0 # CHECK: [25] Code Region - G26 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.50 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 3.92 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2r { v1.16b, v2.16b }, [x27], #2 -# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld2r { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: [0,2] D================eeeeeeeeER . . . . ld2r { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,3] D========================eeeeeeeeER. . . ld2r { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,4] .D===============================eeeeeeeeER ld2r { v1.4h, v2.4h }, [x27], x28 +# CHECK: [0,0] DeeeeeeeeER . ld2r { v1.16b, v2.16b }, [x27], #2 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld2r { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld2r { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,3] D===eeeeeeeeER. ld2r { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,4] .D===eeeeeeeeER ld2r { v1.4h, v2.4h }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1651,33 +1651,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.16b, v2.16b }, [x27], #2 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: 2. 1 17.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 3. 1 25.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 4. 1 32.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 1 16.8 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld2r { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld2r { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld2r { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld2r { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 1 2.8 0.2 0.0 # CHECK: [26] Code Region - G27 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 2300 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.57 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 4.51 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 2.8 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld2r { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld2r { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,2] D================eeeeeeeeER . . . . ld2r { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,3] D========================eeeeeeeeER. . . ld2r { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,4] .D===============================eeeeeeeeER ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK: [0,0] DeeeeeeeeER . ld2r { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld2r { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeeeeeER . ld2r { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,3] D===eeeeeeeeER. ld2r { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,4] .D===eeeeeeeeER ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1687,33 +1687,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld2r { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 2. 1 17.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 3. 1 25.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 4. 1 32.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: 1 16.8 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld2r { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ld2r { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ld2r { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: 1 2.8 0.2 0.0 # CHECK: [27] Code Region - G28 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 3200 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.80 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 6.27 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,2] .D===============eeeeeeeeER . . . . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,4] . D==============================eeeeeeeeER ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK: [0,0] DeeeeeeeeER . ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1723,33 +1723,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: 2. 1 16.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: 4. 1 31.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: 1 16.2 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 1 2.2 0.2 0.0 # CHECK: [28] Code Region - G29 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 3300 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.82 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 6.47 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 4.3 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: [0,2] .D===============eeeeeeeeER . . . . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,4] . D==============================eeeeeeeeER ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK: [0,0] DeeeeeeeeER . ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1759,33 +1759,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 2. 1 16.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 4. 1 31.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 1 16.2 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 1 2.2 0.2 0.0 # CHECK: [29] Code Region - G30 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total Cycles: 1910 # CHECK-NEXT: Total uOps: 3200 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.80 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 1.68 +# CHECK-NEXT: IPC: 0.26 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 012345678 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: [0,2] .D===============eeeeeeeeER . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: [0,4] . D==============================eeeeeeeeER ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK: [0,0] DeeeeeeeeER . . . . ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . . . . ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: [0,2] .D=eeeeeeeeER . . . . ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,3] .D=========eeeeeeeeER . . ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: [0,4] . D================eeeeeeeeER ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1795,11 +1795,11 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 2. 1 16.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: 4. 1 31.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: 1 16.2 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: 3. 1 10.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: 4. 1 17.0 0.0 0.0 ld3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: 1 6.4 0.2 0.0 # CHECK: [30] Code Region - G31 @@ -1877,23 +1877,23 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 3200 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.80 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 6.27 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 -# CHECK-NEXT: [0,2] .D===============eeeeeeeeER . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 -# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 -# CHECK-NEXT: [0,4] . D==============================eeeeeeeeER ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 +# CHECK: [0,0] DeeeeeeeeER . ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 +# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 +# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 +# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1903,33 +1903,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 -# CHECK-NEXT: 2. 1 16.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 -# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 -# CHECK-NEXT: 4. 1 31.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 -# CHECK-NEXT: 1 16.2 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], #24 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], #12 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], #6 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], #12 +# CHECK-NEXT: 1 2.2 0.2 0.0 # CHECK: [33] Code Region - G34 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 3300 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.82 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 6.47 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 4.3 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 -# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 -# CHECK-NEXT: [0,2] .D===============eeeeeeeeER . . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 -# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: [0,4] . D==============================eeeeeeeeER ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK: [0,0] DeeeeeeeeER . ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 +# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 +# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1939,33 +1939,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], #3 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 -# CHECK-NEXT: 2. 1 16.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 -# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: 4. 1 31.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 1 16.2 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], #6 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], #3 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld3r { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld3r { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 1 2.2 0.2 0.0 # CHECK: [34] Code Region - G35 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total Cycles: 510 # CHECK-NEXT: Total uOps: 3200 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.80 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 6.27 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,2] .D===============eeeeeeeeER . . . . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,3] .D=======================eeeeeeeeER. . . ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,4] . D==============================eeeeeeeeER ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK: [0,0] DeeeeeeeeER . ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeeeER . ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,2] .D=eeeeeeeeER . ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,3] .D==eeeeeeeeER. ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -1975,33 +1975,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 2. 1 16.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 4. 1 31.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 1 16.2 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ld3r { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 ld3r { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ld3r { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: 4. 1 3.0 0.0 0.0 ld3r { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 1 2.2 0.2 0.0 # CHECK: [35] Code Region - G36 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4203 +# CHECK-NEXT: Total Cycles: 710 # CHECK-NEXT: Total uOps: 4500 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.07 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 6.34 +# CHECK-NEXT: IPC: 0.70 # CHECK-NEXT: Block RThroughput: 7.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 01234 +# CHECK-NEXT: 0123456 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,1] .D=======eeeeeeeeeER. . . . . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,2] . D===============eeeeeeeeER . . . . ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,3] . D=======================eeeeeeeeER . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,4] . D==============================eeeeeeeeeER ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK: [0,0] DeeeeeeeeER .. ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,1] .DeeeeeeeeeER .. ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,2] . DeeeeeeeeER .. ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,3] . D=eeeeeeeeER .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,4] . D==eeeeeeeeeER ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2011,33 +2011,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld3r { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: 2. 1 16.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: 4. 1 31.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: 1 16.0 0.2 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: 4. 1 3.0 1.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: 1 1.6 0.4 0.0 # CHECK: [36] Code Region - G37 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4303 +# CHECK-NEXT: Total Cycles: 810 # CHECK-NEXT: Total uOps: 4900 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.14 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 6.05 +# CHECK-NEXT: IPC: 0.62 # CHECK-NEXT: Block RThroughput: 8.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012345 +# CHECK-NEXT: 01234567 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,1] .D=======eeeeeeeeeER. . . . . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,2] . D===============eeeeeeeeeER . . . . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,3] . D=======================eeeeeeeeeER . . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,4] . D===============================eeeeeeeeER ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK: [0,0] DeeeeeeeeER . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,1] .DeeeeeeeeeER . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,2] . DeeeeeeeeeER . . ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,3] . DeeeeeeeeeER. . ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,4] . D===eeeeeeeeER ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2047,33 +2047,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: 2. 1 16.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: 3. 1 24.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 4. 1 32.0 0.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 1 16.2 0.2 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 ld4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: 4. 1 4.0 3.0 0.0 ld4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 1 1.6 0.8 0.0 # CHECK: [37] Code Region - G38 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4303 +# CHECK-NEXT: Total Cycles: 809 # CHECK-NEXT: Total uOps: 4900 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.14 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 6.06 +# CHECK-NEXT: IPC: 0.62 # CHECK-NEXT: Block RThroughput: 8.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012345 +# CHECK-NEXT: 0123456 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,1] .D=======eeeeeeeeeER. . . . . . ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,2] . D===============eeeeeeeeER . . . . ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,3] . D======================eeeeeeeeeER . . ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,4] . D==============================eeeeeeeeeER ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK: [0,0] DeeeeeeeeER .. ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,1] .DeeeeeeeeeER .. ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,2] . DeeeeeeeeER .. ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,3] . DeeeeeeeeeER.. ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,4] . D=eeeeeeeeeER ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2083,11 +2083,11 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 2. 1 16.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 3. 1 23.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 4. 1 31.0 0.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 1 15.8 0.2 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 ld4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 1 1.2 0.4 0.0 # CHECK: [38] Code Region - G39 @@ -2165,23 +2165,23 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total Cycles: 1903 # CHECK-NEXT: Total uOps: 4100 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.02 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 2.15 +# CHECK-NEXT: IPC: 0.26 # CHECK-NEXT: Block RThroughput: 5.3 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 01 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: [0,1] D========eeeeeeeeER . . . . . . ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: [0,2] .D===============eeeeeeeeER . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: [0,3] . D======================eeeeeeeeER. . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 -# CHECK-NEXT: [0,4] . D=============================eeeeeeeeER ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 +# CHECK: [0,0] DeeeeeeeeER . .. ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +# CHECK-NEXT: [0,1] D========eeeeeeeeER .. ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: [0,2] .D========eeeeeeeeER.. ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,3] . D========eeeeeeeeER. ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 +# CHECK-NEXT: [0,4] . D========eeeeeeeeER ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2192,32 +2192,32 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 # CHECK-NEXT: 1. 1 9.0 0.0 0.0 ld4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: 2. 1 16.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: 3. 1 23.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 -# CHECK-NEXT: 4. 1 30.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 -# CHECK-NEXT: 1 15.8 0.2 0.0 +# CHECK-NEXT: 2. 1 9.0 0.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: 3. 1 9.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #32 +# CHECK-NEXT: 4. 1 9.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #16 +# CHECK-NEXT: 1 7.4 0.2 0.0 # CHECK: [41] Code Region - G42 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total Cycles: 659 # CHECK-NEXT: Total uOps: 4300 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.07 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 6.53 +# CHECK-NEXT: IPC: 0.76 # CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012 +# CHECK-NEXT: 012345 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 -# CHECK-NEXT: [0,1] .D=======eeeeeeeeER . . . . . . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 -# CHECK-NEXT: [0,2] . D==============eeeeeeeeER . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 -# CHECK-NEXT: [0,3] . D=====================eeeeeeeeER. . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 -# CHECK-NEXT: [0,4] . D============================eeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 +# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 +# CHECK-NEXT: [0,1] .DeeeeeeeeER . ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 +# CHECK-NEXT: [0,2] . DeeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 +# CHECK-NEXT: [0,3] . DeeeeeeeeER . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 +# CHECK-NEXT: [0,4] . D=eeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2227,33 +2227,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #8 -# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 -# CHECK-NEXT: 2. 1 15.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 -# CHECK-NEXT: 3. 1 22.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 -# CHECK-NEXT: 4. 1 29.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 -# CHECK-NEXT: 1 15.0 0.2 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #16 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #4 +# CHECK-NEXT: 3. 1 1.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #8 +# CHECK-NEXT: 4. 1 2.0 1.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #4 +# CHECK-NEXT: 1 1.2 0.4 0.0 # CHECK: [42] Code Region - G43 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 4003 +# CHECK-NEXT: Total Cycles: 610 # CHECK-NEXT: Total uOps: 4200 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.05 -# CHECK-NEXT: IPC: 0.12 +# CHECK-NEXT: uOps Per Cycle: 6.89 +# CHECK-NEXT: IPC: 0.82 # CHECK-NEXT: Block RThroughput: 5.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 012 +# CHECK-NEXT: 012345 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . . . . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: [0,1] .D=======eeeeeeeeER . . . . . . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,2] . D==============eeeeeeeeER . . . . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,3] . D======================eeeeeeeeER. . . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,4] . D=============================eeeeeeeeER ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: [0,1] .DeeeeeeeeER . ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,2] . DeeeeeeeeER . ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,3] . D=eeeeeeeeER . ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,4] . D==eeeeeeeeER ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2263,33 +2263,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 2. 1 15.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 3. 1 23.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 4. 1 30.0 0.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 1 15.4 0.2 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4r { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 0.0 ld4r { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 4. 1 3.0 1.0 0.0 ld4r { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 1 1.6 0.4 0.0 # CHECK: [43] Code Region - G44 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 3603 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 3400 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.94 -# CHECK-NEXT: IPC: 0.14 +# CHECK-NEXT: uOps Per Cycle: 6.69 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 4.3 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012345678 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeeeER . . . . . . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: [0,1] .D=======eeeeeeeeER . . . . . ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,2] . D==============eeeeeeeeER . . . ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,3] . D======================eeeeeeER . . ldp s1, s2, [x27], #248 -# CHECK-NEXT: [0,4] . D===========================eeeeeeER ldp d1, d2, [x27], #496 +# CHECK: [0,0] DeeeeeeeeER . ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: [0,1] .DeeeeeeeeER. ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,2] . DeeeeeeeeER ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,3] . D=eeeeeeE-R ldp s1, s2, [x27], #248 +# CHECK-NEXT: [0,4] . D=eeeeeeER ldp d1, d2, [x27], #496 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2299,33 +2299,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ld4r { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 1. 1 8.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 2. 1 15.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 3. 1 23.0 0.0 0.0 ldp s1, s2, [x27], #248 -# CHECK-NEXT: 4. 1 28.0 0.0 0.0 ldp d1, d2, [x27], #496 -# CHECK-NEXT: 1 15.0 0.2 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 ld4r { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: 2. 1 1.0 0.0 0.0 ld4r { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 3. 1 2.0 0.0 1.0 ldp s1, s2, [x27], #248 +# CHECK-NEXT: 4. 1 2.0 0.0 0.0 ldp d1, d2, [x27], #496 +# CHECK-NEXT: 1 1.4 0.2 0.2 # CHECK: [44] Code Region - G45 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 2506 +# CHECK-NEXT: Total Cycles: 507 # CHECK-NEXT: Total uOps: 2300 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.92 -# CHECK-NEXT: IPC: 0.20 +# CHECK-NEXT: uOps Per Cycle: 4.54 +# CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 2.3 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 01 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . . . . ldp q1, q2, [x27], #992 -# CHECK-NEXT: [0,1] D======eeeeeeER. . . . ldp s1, s2, [x27, #248]! -# CHECK-NEXT: [0,2] D============eeeeeeER . . ldp d1, d2, [x27, #496]! -# CHECK-NEXT: [0,3] .D=================eeeeeeER . ldp q1, q2, [x27, #992]! -# CHECK-NEXT: [0,4] .D=======================eeeeER ldp w1, w2, [x27], #248 +# CHECK: [0,0] DeeeeeeER .. ldp q1, q2, [x27], #992 +# CHECK-NEXT: [0,1] D=eeeeeeER.. ldp s1, s2, [x27, #248]! +# CHECK-NEXT: [0,2] D==eeeeeeER. ldp d1, d2, [x27, #496]! +# CHECK-NEXT: [0,3] .D==eeeeeeER ldp q1, q2, [x27, #992]! +# CHECK-NEXT: [0,4] .D===eeeeE-R ldp w1, w2, [x27], #248 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2335,33 +2335,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldp q1, q2, [x27], #992 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ldp s1, s2, [x27, #248]! -# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ldp d1, d2, [x27, #496]! -# CHECK-NEXT: 3. 1 18.0 0.0 0.0 ldp q1, q2, [x27, #992]! -# CHECK-NEXT: 4. 1 24.0 0.0 0.0 ldp w1, w2, [x27], #248 -# CHECK-NEXT: 1 12.6 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldp s1, s2, [x27, #248]! +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldp d1, d2, [x27, #496]! +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 ldp q1, q2, [x27, #992]! +# CHECK-NEXT: 4. 1 4.0 0.0 1.0 ldp w1, w2, [x27], #248 +# CHECK-NEXT: 1 2.6 0.2 0.2 # CHECK: [45] Code Region - G46 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 1303 +# CHECK-NEXT: Total Cycles: 507 # CHECK-NEXT: Total uOps: 2100 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.61 -# CHECK-NEXT: IPC: 0.38 +# CHECK-NEXT: uOps Per Cycle: 4.14 +# CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Timeline view: -# CHECK-NEXT: 012345 +# CHECK-NEXT: 01 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . . ldp x1, x2, [x27], #496 -# CHECK-NEXT: [0,1] D=eeeeER . . ldp w1, w2, [x27, #248]! -# CHECK-NEXT: [0,2] D==eeeeER . . ldp x1, x2, [x27, #496]! -# CHECK-NEXT: [0,3] D===eeeeeER . ldpsw x1, x2, [x27], #248 -# CHECK-NEXT: [0,4] .D=======eeeeeER ldpsw x1, x2, [x27, #248]! +# CHECK: [0,0] DeeeeER .. ldp x1, x2, [x27], #496 +# CHECK-NEXT: [0,1] D=eeeeER .. ldp w1, w2, [x27, #248]! +# CHECK-NEXT: [0,2] D==eeeeER .. ldp x1, x2, [x27, #496]! +# CHECK-NEXT: [0,3] D===eeeeeER. ldpsw x1, x2, [x27], #248 +# CHECK-NEXT: [0,4] .D===eeeeeER ldpsw x1, x2, [x27, #248]! # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2374,30 +2374,30 @@ # CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldp w1, w2, [x27, #248]! # CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldp x1, x2, [x27, #496]! # CHECK-NEXT: 3. 1 4.0 0.0 0.0 ldpsw x1, x2, [x27], #248 -# CHECK-NEXT: 4. 1 8.0 0.0 0.0 ldpsw x1, x2, [x27, #248]! -# CHECK-NEXT: 1 3.6 0.2 0.0 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 ldpsw x1, x2, [x27, #248]! +# CHECK-NEXT: 1 2.8 0.2 0.0 # CHECK: [46] Code Region - G47 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.50 -# CHECK-NEXT: IPC: 0.17 +# CHECK-NEXT: uOps Per Cycle: 2.95 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 1.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . . . . . ldr b1, [x27], #254 -# CHECK-NEXT: [0,1] D======eeeeeeER. . . . . ldr h1, [x27], #254 -# CHECK-NEXT: [0,2] D============eeeeeeER . . . ldr s1, [x27], #254 -# CHECK-NEXT: [0,3] D==================eeeeeeER . . ldr d1, [x27], #254 -# CHECK-NEXT: [0,4] D========================eeeeeeER ldr q1, [x27], #254 +# CHECK: [0,0] DeeeeeeER . . ldr b1, [x27], #254 +# CHECK-NEXT: [0,1] D=eeeeeeER. . ldr h1, [x27], #254 +# CHECK-NEXT: [0,2] D==eeeeeeER . ldr s1, [x27], #254 +# CHECK-NEXT: [0,3] D===eeeeeeER. ldr d1, [x27], #254 +# CHECK-NEXT: [0,4] D====eeeeeeER ldr q1, [x27], #254 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2407,33 +2407,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27], #254 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ldr h1, [x27], #254 -# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ldr s1, [x27], #254 -# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ldr d1, [x27], #254 -# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ldr q1, [x27], #254 -# CHECK-NEXT: 1 13.0 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldr h1, [x27], #254 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldr s1, [x27], #254 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ldr d1, [x27], #254 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ldr q1, [x27], #254 +# CHECK-NEXT: 1 3.0 0.2 0.0 # CHECK: [47] Code Region - G48 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Total Cycles: 508 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.50 -# CHECK-NEXT: IPC: 0.17 +# CHECK-NEXT: uOps Per Cycle: 2.95 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 1.7 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . . . . . ldr b1, [x27, #254]! -# CHECK-NEXT: [0,1] D======eeeeeeER. . . . . ldr h1, [x27, #254]! -# CHECK-NEXT: [0,2] D============eeeeeeER . . . ldr s1, [x27, #254]! -# CHECK-NEXT: [0,3] D==================eeeeeeER . . ldr d1, [x27, #254]! -# CHECK-NEXT: [0,4] D========================eeeeeeER ldr q1, [x27, #254]! +# CHECK: [0,0] DeeeeeeER . . ldr b1, [x27, #254]! +# CHECK-NEXT: [0,1] D=eeeeeeER. . ldr h1, [x27, #254]! +# CHECK-NEXT: [0,2] D==eeeeeeER . ldr s1, [x27, #254]! +# CHECK-NEXT: [0,3] D===eeeeeeER. ldr d1, [x27, #254]! +# CHECK-NEXT: [0,4] D====eeeeeeER ldr q1, [x27, #254]! # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2443,11 +2443,11 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 ldr b1, [x27, #254]! -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 ldr h1, [x27, #254]! -# CHECK-NEXT: 2. 1 13.0 0.0 0.0 ldr s1, [x27, #254]! -# CHECK-NEXT: 3. 1 19.0 0.0 0.0 ldr d1, [x27, #254]! -# CHECK-NEXT: 4. 1 25.0 0.0 0.0 ldr q1, [x27, #254]! -# CHECK-NEXT: 1 13.0 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldr h1, [x27, #254]! +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldr s1, [x27, #254]! +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 ldr d1, [x27, #254]! +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 ldr q1, [x27, #254]! +# CHECK-NEXT: 1 3.0 0.2 0.0 # CHECK: [48] Code Region - G49 @@ -2561,22 +2561,22 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 703 +# CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 1200 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.71 -# CHECK-NEXT: IPC: 0.71 +# CHECK-NEXT: uOps Per Cycle: 2.38 +# CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeeeeER . ldrsh x1, [x27, #254]! -# CHECK-NEXT: [0,1] D=eeeeER . ldrsw x1, [x27], #254 -# CHECK-NEXT: [0,2] D==eeeeER. ldrsw x1, [x27, #254]! -# CHECK-NEXT: [0,3] D===eeE-R. st1 { v1.1d }, [x27], #8 -# CHECK-NEXT: [0,4] D=====eeER st1 { v1.2d }, [x27], #16 +# CHECK: [0,0] DeeeeER . ldrsh x1, [x27, #254]! +# CHECK-NEXT: [0,1] D=eeeeER. ldrsw x1, [x27], #254 +# CHECK-NEXT: [0,2] D==eeeeER ldrsw x1, [x27, #254]! +# CHECK-NEXT: [0,3] D===eeE-R st1 { v1.1d }, [x27], #8 +# CHECK-NEXT: [0,4] D====eeER st1 { v1.2d }, [x27], #16 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2589,30 +2589,29 @@ # CHECK-NEXT: 1. 1 2.0 0.0 0.0 ldrsw x1, [x27], #254 # CHECK-NEXT: 2. 1 3.0 0.0 0.0 ldrsw x1, [x27, #254]! # CHECK-NEXT: 3. 1 4.0 0.0 1.0 st1 { v1.1d }, [x27], #8 -# CHECK-NEXT: 4. 1 6.0 0.0 0.0 st1 { v1.2d }, [x27], #16 -# CHECK-NEXT: 1 3.2 0.2 0.2 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.2d }, [x27], #16 +# CHECK-NEXT: 1 3.0 0.2 0.2 # CHECK: [52] Code Region - G53 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 1003 +# CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.50 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 2.98 +# CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 012 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeeER. . . st1 { v1.2s }, [x27], #8 -# CHECK-NEXT: [0,1] D==eeER . . st1 { v1.4h }, [x27], #8 -# CHECK-NEXT: [0,2] D====eeER . . st1 { v1.4s }, [x27], #16 -# CHECK-NEXT: [0,3] D======eeER . st1 { v1.8b }, [x27], #8 -# CHECK-NEXT: [0,4] D========eeER st1 { v1.8h }, [x27], #16 +# CHECK: [0,0] DeeER. . st1 { v1.2s }, [x27], #8 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.4h }, [x27], #8 +# CHECK-NEXT: [0,2] D==eeER . st1 { v1.4s }, [x27], #16 +# CHECK-NEXT: [0,3] D===eeER. st1 { v1.8b }, [x27], #8 +# CHECK-NEXT: [0,4] D====eeER st1 { v1.8h }, [x27], #16 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2622,33 +2621,32 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s }, [x27], #8 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 st1 { v1.4h }, [x27], #8 -# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st1 { v1.4s }, [x27], #16 -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 st1 { v1.8b }, [x27], #8 -# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st1 { v1.8h }, [x27], #16 -# CHECK-NEXT: 1 5.0 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.4h }, [x27], #8 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.4s }, [x27], #16 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.8b }, [x27], #8 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.8h }, [x27], #16 +# CHECK-NEXT: 1 3.0 0.2 0.0 # CHECK: [53] Code Region - G54 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 1003 +# CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.50 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 2.98 +# CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 012 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeeER. . . st1 { v1.16b }, [x27], #16 -# CHECK-NEXT: [0,1] D==eeER . . st1 { v1.1d }, [x27], x28 -# CHECK-NEXT: [0,2] D====eeER . . st1 { v1.2d }, [x27], x28 -# CHECK-NEXT: [0,3] D======eeER . st1 { v1.2s }, [x27], x28 -# CHECK-NEXT: [0,4] D========eeER st1 { v1.4h }, [x27], x28 +# CHECK: [0,0] DeeER. . st1 { v1.16b }, [x27], #16 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.1d }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeER . st1 { v1.2d }, [x27], x28 +# CHECK-NEXT: [0,3] D===eeER. st1 { v1.2s }, [x27], x28 +# CHECK-NEXT: [0,4] D====eeER st1 { v1.4h }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2658,33 +2656,32 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b }, [x27], #16 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 st1 { v1.1d }, [x27], x28 -# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st1 { v1.2d }, [x27], x28 -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 st1 { v1.2s }, [x27], x28 -# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st1 { v1.4h }, [x27], x28 -# CHECK-NEXT: 1 5.0 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.1d }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.2d }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.2s }, [x27], x28 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.4h }, [x27], x28 +# CHECK-NEXT: 1 3.0 0.2 0.0 # CHECK: [54] Code Region - G55 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 1003 +# CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.50 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 2.98 +# CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 2.5 # CHECK: Timeline view: -# CHECK-NEXT: 012 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeeER. . . st1 { v1.4s }, [x27], x28 -# CHECK-NEXT: [0,1] D==eeER . . st1 { v1.8b }, [x27], x28 -# CHECK-NEXT: [0,2] D====eeER . . st1 { v1.8h }, [x27], x28 -# CHECK-NEXT: [0,3] D======eeER . st1 { v1.16b }, [x27], x28 -# CHECK-NEXT: [0,4] D========eeER st1 { v1.1d, v2.1d }, [x27], #16 +# CHECK: [0,0] DeeER. . st1 { v1.4s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.8b }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeER . st1 { v1.8h }, [x27], x28 +# CHECK-NEXT: [0,3] D===eeER. st1 { v1.16b }, [x27], x28 +# CHECK-NEXT: [0,4] D====eeER st1 { v1.1d, v2.1d }, [x27], #16 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2694,33 +2691,32 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s }, [x27], x28 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 st1 { v1.8b }, [x27], x28 -# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st1 { v1.8h }, [x27], x28 -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 st1 { v1.16b }, [x27], x28 -# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16 -# CHECK-NEXT: 1 5.0 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.8b }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.8h }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.16b }, [x27], x28 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], #16 +# CHECK-NEXT: 1 3.0 0.2 0.0 # CHECK: [55] Code Region - G56 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 1003 +# CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 1900 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.89 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 3.77 +# CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 3.5 # CHECK: Timeline view: -# CHECK-NEXT: 012 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeeER. . . st1 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: [0,1] D==eeER . . st1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,2] D====eeER . . st1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,3] D======eeER . st1 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: [0,4] .D=======eeER st1 { v1.8b, v2.8b }, [x27], #16 +# CHECK: [0,0] DeeER. . st1 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,2] D==eeER . st1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,3] D===eeER. st1 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,4] .D===eeER st1 { v1.8b, v2.8b }, [x27], #16 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2730,33 +2726,32 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: 1 4.8 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 1 2.8 0.2 0.0 # CHECK: [56] Code Region - G57 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 1003 +# CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2100 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 2.09 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 4.17 +# CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: -# CHECK-NEXT: 012 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeeER. . . st1 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: [0,1] D==eeER . . st1 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,2] D====eeER . . st1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: [0,3] .D=====eeER . st1 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,4] .D=======eeER st1 { v1.2s, v2.2s }, [x27], x28 +# CHECK: [0,0] DeeER. . st1 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,2] D==eeER . st1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: [0,3] .D==eeER. st1 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,4] .D===eeER st1 { v1.2s, v2.2s }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2766,33 +2761,32 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28 -# CHECK-NEXT: 3. 1 6.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 1 4.6 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.1d, v2.1d }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 1 2.6 0.2 0.0 # CHECK: [57] Code Region - G58 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 1003 +# CHECK-NEXT: Total Cycles: 504 # CHECK-NEXT: Total uOps: 2100 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 2.09 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 4.17 +# CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Timeline view: -# CHECK-NEXT: 012 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeeER. . . st1 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,1] D==eeER . . st1 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: [0,2] D====eeER . . st1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,3] D======eeER . st1 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,4] .D=======eeER st1 { v1.16b, v2.16b }, [x27], x28 +# CHECK: [0,0] DeeER. . st1 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: [0,2] D==eeER . st1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,3] D===eeER. st1 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,4] .D===eeER st1 { v1.16b, v2.16b }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2802,33 +2796,32 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 2. 1 5.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 4. 1 8.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 1 4.8 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 1 2.8 0.2 0.0 # CHECK: [58] Code Region - G59 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 1003 +# CHECK-NEXT: Total Cycles: 703 # CHECK-NEXT: Total uOps: 2900 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 2.89 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 4.13 +# CHECK-NEXT: IPC: 0.71 # CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: 012 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: [0,1] D==eeER . . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,2] .D===eeER . . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,3] .D=====eeER . st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: [0,4] . D======eeER st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK: [0,0] DeeER. . st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,2] .D=eeER . st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,3] .D===eeER. st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: [0,4] . D===eeER st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2838,33 +2831,32 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], #24 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: 2. 1 4.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: 3. 1 6.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: 4. 1 7.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: 1 4.2 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: 1 2.6 0.4 0.0 # CHECK: [59] Code Region - G60 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 1003 +# CHECK-NEXT: Total Cycles: 703 # CHECK-NEXT: Total uOps: 3100 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 3.09 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 4.41 +# CHECK-NEXT: IPC: 0.71 # CHECK-NEXT: Block RThroughput: 6.5 # CHECK: Timeline view: -# CHECK-NEXT: 012 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,1] D==eeER . . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,2] .D===eeER . . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,3] .D=====eeER . st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: [0,4] . D======eeER st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK: [0,0] DeeER. . st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,2] .D=eeER . st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,3] .D===eeER. st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: [0,4] . D===eeER st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2874,33 +2866,32 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: 2. 1 4.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: 3. 1 6.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 -# CHECK-NEXT: 4. 1 7.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 1 4.2 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: 3. 1 4.0 1.0 0.0 st1 { v1.1d, v2.1d, v3.1d }, [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 1 2.6 0.4 0.0 # CHECK: [60] Code Region - G61 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 1003 +# CHECK-NEXT: Total Cycles: 703 # CHECK-NEXT: Total uOps: 2900 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 2.89 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 4.13 +# CHECK-NEXT: IPC: 0.71 # CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: 012 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,1] D==eeER . . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,2] .D===eeER . . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,3] .D=====eeER . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,4] . D======eeER st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK: [0,0] DeeER. . st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,2] .D=eeER . st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,3] .D==eeER . st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,4] . D===eeER st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2910,33 +2901,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 2. 1 4.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 3. 1 6.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 4. 1 7.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 1 4.2 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: 4. 1 4.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 1 2.4 0.4 0.0 # CHECK: [61] Code Region - G62 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 1003 +# CHECK-NEXT: Total Cycles: 704 # CHECK-NEXT: Total uOps: 3100 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 3.09 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 4.40 +# CHECK-NEXT: IPC: 0.71 # CHECK-NEXT: Block RThroughput: 6.5 # CHECK: Timeline view: -# CHECK-NEXT: 012 +# CHECK-NEXT: 0 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,1] D==eeER . . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: [0,2] .D===eeER . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,3] .D=====eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: [0,4] . D======eeER st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK: [0,0] DeeER. . st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: [0,2] .D==eeER . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,3] .D===eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: [0,4] . D====eeER st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2946,33 +2937,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 -# CHECK-NEXT: 2. 1 4.0 0.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: 3. 1 6.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: 4. 1 7.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: 1 4.2 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], #32 +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: 4. 1 5.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: 1 3.0 0.6 0.0 # CHECK: [62] Code Region - G63 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 1003 +# CHECK-NEXT: Total Cycles: 804 # CHECK-NEXT: Total uOps: 3700 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 3.69 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 4.60 +# CHECK-NEXT: IPC: 0.62 # CHECK-NEXT: Block RThroughput: 8.0 # CHECK: Timeline view: -# CHECK-NEXT: 012 +# CHECK-NEXT: 01 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,1] D==eeER . . st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,2] .D===eeER . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,3] . D====eeER . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: [0,4] . D======eeER st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK: [0,0] DeeER. .. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,1] D=eeER .. st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,2] .D==eeER .. st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,3] . D==eeER .. st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: [0,4] . D=====eeER st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -2982,33 +2973,32 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: 2. 1 4.0 0.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: 3. 1 5.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: 4. 1 7.0 0.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 -# CHECK-NEXT: 1 4.0 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: 4. 1 6.0 2.0 0.0 st1 { v1.1d, v2.1d, v3.1d, v4.1d }, [x27], x28 +# CHECK-NEXT: 1 3.0 0.8 0.0 # CHECK: [63] Code Region - G64 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 1003 +# CHECK-NEXT: Total Cycles: 703 # CHECK-NEXT: Total uOps: 3300 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 3.29 -# CHECK-NEXT: IPC: 0.50 +# CHECK-NEXT: uOps Per Cycle: 4.69 +# CHECK-NEXT: IPC: 0.71 # CHECK-NEXT: Block RThroughput: 7.0 # CHECK: Timeline view: -# CHECK-NEXT: 012 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,1] D==eeER . . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,2] .D===eeER . . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,3] .D=====eeER . st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,4] . D======eeER st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK: [0,0] DeeER. . st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeER . st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,2] .D==eeER . st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,3] .D===eeER. st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,4] . D===eeER st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3018,33 +3008,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 1. 1 3.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 2. 1 4.0 0.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 6.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 4. 1 7.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 1 4.2 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 st1 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 1 2.8 0.4 0.0 # CHECK: [64] Code Region - G65 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 1603 +# CHECK-NEXT: Total Cycles: 706 # CHECK-NEXT: Total uOps: 3000 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.87 -# CHECK-NEXT: IPC: 0.31 +# CHECK-NEXT: uOps Per Cycle: 4.25 +# CHECK-NEXT: IPC: 0.71 # CHECK-NEXT: Block RThroughput: 7.0 # CHECK: Timeline view: -# CHECK-NEXT: 012345678 +# CHECK-NEXT: 012 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,1] .D=eeER . . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,2] .D===eeeeER . . st1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: [0,3] . D======eeeeER. . st1 { v1.b }[8], [x27], #1 -# CHECK-NEXT: [0,4] . D==========eeeeER st1 { v1.b }[0], [x27], x28 +# CHECK: [0,0] DeeER. . . st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,1] .DeeER . . st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,2] .D===eeeeER . st1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: [0,3] . D===eeeeER. st1 { v1.b }[8], [x27], #1 +# CHECK-NEXT: [0,4] . D====eeeeER st1 { v1.b }[0], [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3054,33 +3044,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 2. 1 4.0 0.0 0.0 st1 { v1.b }[0], [x27], #1 -# CHECK-NEXT: 3. 1 7.0 0.0 0.0 st1 { v1.b }[8], [x27], #1 -# CHECK-NEXT: 4. 1 11.0 0.0 0.0 st1 { v1.b }[0], [x27], x28 -# CHECK-NEXT: 1 5.0 0.2 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st1 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 2. 1 4.0 2.0 0.0 st1 { v1.b }[0], [x27], #1 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.b }[8], [x27], #1 +# CHECK-NEXT: 4. 1 5.0 0.0 0.0 st1 { v1.b }[0], [x27], x28 +# CHECK-NEXT: 1 3.0 0.6 0.0 # CHECK: [65] Code Region - G66 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 2003 +# CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.00 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 3.95 +# CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012 +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . . . . st1 { v1.b }[8], [x27], x28 -# CHECK-NEXT: [0,1] D====eeeeER . . . st1 { v1.h }[0], [x27], #2 -# CHECK-NEXT: [0,2] D========eeeeER. . . st1 { v1.h }[4], [x27], #2 -# CHECK-NEXT: [0,3] D============eeeeER . . st1 { v1.h }[0], [x27], x28 -# CHECK-NEXT: [0,4] .D===============eeeeER st1 { v1.h }[4], [x27], x28 +# CHECK: [0,0] DeeeeER . st1 { v1.b }[8], [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeER . st1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: [0,2] D==eeeeER . st1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: [0,3] D===eeeeER. st1 { v1.h }[0], [x27], x28 +# CHECK-NEXT: [0,4] .D===eeeeER st1 { v1.h }[4], [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3090,33 +3080,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.b }[8], [x27], x28 -# CHECK-NEXT: 1. 1 5.0 0.0 0.0 st1 { v1.h }[0], [x27], #2 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 st1 { v1.h }[4], [x27], #2 -# CHECK-NEXT: 3. 1 13.0 0.0 0.0 st1 { v1.h }[0], [x27], x28 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 st1 { v1.h }[4], [x27], x28 -# CHECK-NEXT: 1 8.8 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.h }[0], [x27], #2 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.h }[4], [x27], #2 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.h }[0], [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st1 { v1.h }[4], [x27], x28 +# CHECK-NEXT: 1 2.8 0.2 0.0 # CHECK: [66] Code Region - G67 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 2003 +# CHECK-NEXT: Total Cycles: 605 # CHECK-NEXT: Total uOps: 2300 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.15 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 3.80 +# CHECK-NEXT: IPC: 0.83 # CHECK-NEXT: Block RThroughput: 6.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012 +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . . . . st1 { v1.s }[0], [x27], #4 -# CHECK-NEXT: [0,1] D====eeeeER . . . st1 { v1.s }[0], [x27], x28 -# CHECK-NEXT: [0,2] D========eeeeER. . . st1 { v1.d }[0], [x27], #8 -# CHECK-NEXT: [0,3] D============eeeeER . . st1 { v1.d }[0], [x27], x28 -# CHECK-NEXT: [0,4] .D===============eeeeER st2 { v1.2d, v2.2d }, [x27], #32 +# CHECK: [0,0] DeeeeER . st1 { v1.s }[0], [x27], #4 +# CHECK-NEXT: [0,1] D=eeeeER . st1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeER . st1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: [0,3] D===eeeeER. st1 { v1.d }[0], [x27], x28 +# CHECK-NEXT: [0,4] .D===eeeeER st2 { v1.2d, v2.2d }, [x27], #32 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3126,33 +3116,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st1 { v1.s }[0], [x27], #4 -# CHECK-NEXT: 1. 1 5.0 0.0 0.0 st1 { v1.s }[0], [x27], x28 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 st1 { v1.d }[0], [x27], #8 -# CHECK-NEXT: 3. 1 13.0 0.0 0.0 st1 { v1.d }[0], [x27], x28 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32 -# CHECK-NEXT: 1 8.8 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st1 { v1.s }[0], [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st1 { v1.d }[0], [x27], #8 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st1 { v1.d }[0], [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], #32 +# CHECK-NEXT: 1 2.8 0.2 0.0 # CHECK: [67] Code Region - G68 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 2003 +# CHECK-NEXT: Total Cycles: 705 # CHECK-NEXT: Total uOps: 2600 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.30 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 3.69 +# CHECK-NEXT: IPC: 0.71 # CHECK-NEXT: Block RThroughput: 7.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012 +# CHECK-NEXT: 01 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . . . . st2 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: [0,1] D====eeeeER . . . st2 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: [0,2] D========eeeeER. . . st2 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: [0,3] .D===========eeeeER . . st2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: [0,4] .D===============eeeeER st2 { v1.8h, v2.8h }, [x27], #32 +# CHECK: [0,0] DeeeeER .. st2 { v1.2s, v2.2s }, [x27], #16 +# CHECK-NEXT: [0,1] D=eeeeER .. st2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: [0,2] D==eeeeER .. st2 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: [0,3] .D==eeeeER.. st2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: [0,4] .D====eeeeER st2 { v1.8h, v2.8h }, [x27], #32 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3162,33 +3152,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.2s, v2.2s }, [x27], #16 -# CHECK-NEXT: 1. 1 5.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32 -# CHECK-NEXT: 3. 1 12.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32 -# CHECK-NEXT: 1 8.6 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], #16 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], #32 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st2 { v1.8b, v2.8b }, [x27], #16 +# CHECK-NEXT: 4. 1 5.0 1.0 0.0 st2 { v1.8h, v2.8h }, [x27], #32 +# CHECK-NEXT: 1 2.8 0.4 0.0 # CHECK: [68] Code Region - G69 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 2003 +# CHECK-NEXT: Total Cycles: 805 # CHECK-NEXT: Total uOps: 2900 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.45 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 3.60 +# CHECK-NEXT: IPC: 0.62 # CHECK-NEXT: Block RThroughput: 8.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . . . . st2 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: [0,1] D====eeeeER . . . st2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: [0,2] .D=======eeeeER. . . st2 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: [0,3] .D===========eeeeER . . st2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: [0,4] .D===============eeeeER st2 { v1.4s, v2.4s }, [x27], x28 +# CHECK: [0,0] DeeeeER . . st2 { v1.16b, v2.16b }, [x27], #32 +# CHECK-NEXT: [0,1] D=eeeeER . . st2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: [0,2] .D===eeeeER . st2 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: [0,3] .D====eeeeER. st2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: [0,4] .D=====eeeeER st2 { v1.4s, v2.4s }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3198,33 +3188,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.16b, v2.16b }, [x27], #32 -# CHECK-NEXT: 1. 1 5.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28 -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28 -# CHECK-NEXT: 3. 1 12.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28 -# CHECK-NEXT: 1 8.4 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st2 { v1.2d, v2.2d }, [x27], x28 +# CHECK-NEXT: 2. 1 4.0 2.0 0.0 st2 { v1.2s, v2.2s }, [x27], x28 +# CHECK-NEXT: 3. 1 5.0 0.0 0.0 st2 { v1.4h, v2.4h }, [x27], x28 +# CHECK-NEXT: 4. 1 6.0 0.0 0.0 st2 { v1.4s, v2.4s }, [x27], x28 +# CHECK-NEXT: 1 3.6 0.6 0.0 # CHECK: [69] Code Region - G70 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 2003 +# CHECK-NEXT: Total Cycles: 706 # CHECK-NEXT: Total uOps: 2600 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.30 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 3.68 +# CHECK-NEXT: IPC: 0.71 # CHECK-NEXT: Block RThroughput: 7.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012 +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . . . . st2 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: [0,1] D====eeeeER . . . st2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: [0,2] .D=======eeeeER. . . st2 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: [0,3] .D===========eeeeER . . st2 { v1.b, v2.b }[0], [x27], #2 -# CHECK-NEXT: [0,4] .D===============eeeeER st2 { v1.b, v2.b }[8], [x27], #2 +# CHECK: [0,0] DeeeeER . . st2 { v1.8b, v2.8b }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeER . . st2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: [0,2] .D=eeeeER . . st2 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: [0,3] .D====eeeeER. st2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: [0,4] .D=====eeeeER st2 { v1.b, v2.b }[8], [x27], #2 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3234,33 +3224,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.8b, v2.8b }, [x27], x28 -# CHECK-NEXT: 1. 1 5.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28 -# CHECK-NEXT: 2. 1 8.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28 -# CHECK-NEXT: 3. 1 12.0 0.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2 -# CHECK-NEXT: 1 8.4 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st2 { v1.8h, v2.8h }, [x27], x28 +# CHECK-NEXT: 2. 1 2.0 0.0 0.0 st2 { v1.16b, v2.16b }, [x27], x28 +# CHECK-NEXT: 3. 1 5.0 2.0 0.0 st2 { v1.b, v2.b }[0], [x27], #2 +# CHECK-NEXT: 4. 1 6.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], #2 +# CHECK-NEXT: 1 3.2 0.6 0.0 # CHECK: [70] Code Region - G71 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 2003 +# CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.00 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 3.95 +# CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012 +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . . . . st2 { v1.b, v2.b }[0], [x27], x28 -# CHECK-NEXT: [0,1] D====eeeeER . . . st2 { v1.b, v2.b }[8], [x27], x28 -# CHECK-NEXT: [0,2] D========eeeeER. . . st2 { v1.h, v2.h }[0], [x27], #4 -# CHECK-NEXT: [0,3] D============eeeeER . . st2 { v1.h, v2.h }[4], [x27], #4 -# CHECK-NEXT: [0,4] .D===============eeeeER st2 { v1.h, v2.h }[0], [x27], x28 +# CHECK: [0,0] DeeeeER . st2 { v1.b, v2.b }[0], [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeER . st2 { v1.b, v2.b }[8], [x27], x28 +# CHECK-NEXT: [0,2] D==eeeeER . st2 { v1.h, v2.h }[0], [x27], #4 +# CHECK-NEXT: [0,3] D===eeeeER. st2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: [0,4] .D===eeeeER st2 { v1.h, v2.h }[0], [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3270,33 +3260,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.b, v2.b }[0], [x27], x28 -# CHECK-NEXT: 1. 1 5.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4 -# CHECK-NEXT: 3. 1 13.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28 -# CHECK-NEXT: 1 8.8 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st2 { v1.b, v2.b }[8], [x27], x28 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], #4 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st2 { v1.h, v2.h }[4], [x27], #4 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st2 { v1.h, v2.h }[0], [x27], x28 +# CHECK-NEXT: 1 2.8 0.2 0.0 # CHECK: [71] Code Region - G72 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 2003 +# CHECK-NEXT: Total Cycles: 506 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.00 -# CHECK-NEXT: IPC: 0.25 +# CHECK-NEXT: uOps Per Cycle: 3.95 +# CHECK-NEXT: IPC: 0.99 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012 +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . . . . st2 { v1.h, v2.h }[4], [x27], x28 -# CHECK-NEXT: [0,1] D====eeeeER . . . st2 { v1.s, v2.s }[0], [x27], #8 -# CHECK-NEXT: [0,2] D========eeeeER. . . st2 { v1.s, v2.s }[0], [x27], x28 -# CHECK-NEXT: [0,3] D============eeeeER . . st2 { v1.d, v2.d }[0], [x27], #16 -# CHECK-NEXT: [0,4] .D===============eeeeER st2 { v1.d, v2.d }[0], [x27], x28 +# CHECK: [0,0] DeeeeER . st2 { v1.h, v2.h }[4], [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeER . st2 { v1.s, v2.s }[0], [x27], #8 +# CHECK-NEXT: [0,2] D==eeeeER . st2 { v1.s, v2.s }[0], [x27], x28 +# CHECK-NEXT: [0,3] D===eeeeER. st2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: [0,4] .D===eeeeER st2 { v1.d, v2.d }[0], [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3306,33 +3296,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2 { v1.h, v2.h }[4], [x27], x28 -# CHECK-NEXT: 1. 1 5.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8 -# CHECK-NEXT: 2. 1 9.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28 -# CHECK-NEXT: 3. 1 13.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16 -# CHECK-NEXT: 4. 1 16.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28 -# CHECK-NEXT: 1 8.8 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], #8 +# CHECK-NEXT: 2. 1 3.0 0.0 0.0 st2 { v1.s, v2.s }[0], [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], #16 +# CHECK-NEXT: 4. 1 4.0 0.0 0.0 st2 { v1.d, v2.d }[0], [x27], x28 +# CHECK-NEXT: 1 2.8 0.2 0.0 # CHECK: [72] Code Region - G73 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 1803 +# CHECK-NEXT: Total Cycles: 807 # CHECK-NEXT: Total uOps: 3000 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.66 -# CHECK-NEXT: IPC: 0.28 +# CHECK-NEXT: uOps Per Cycle: 3.72 +# CHECK-NEXT: IPC: 0.62 # CHECK-NEXT: Block RThroughput: 7.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeER . . . . st2g x26, [x27], #4064 -# CHECK-NEXT: [0,1] D=eER. . . . st2g x26, [x27, #4064]! -# CHECK-NEXT: [0,2] D==eeeeeeER . . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: [0,3] .D=======eeeeeER . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: [0,4] .D============eeeeeER st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK: [0,0] DeER . . . st2g x26, [x27], #4064 +# CHECK-NEXT: [0,1] D=eER. . . st2g x26, [x27, #4064]! +# CHECK-NEXT: [0,2] D==eeeeeeER . st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 +# CHECK-NEXT: [0,3] .D==eeeeeER . st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: [0,4] .D======eeeeeER st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3344,31 +3334,31 @@ # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st2g x26, [x27], #4064 # CHECK-NEXT: 1. 1 2.0 0.0 0.0 st2g x26, [x27, #4064]! # CHECK-NEXT: 2. 1 3.0 0.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], #48 -# CHECK-NEXT: 3. 1 8.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 -# CHECK-NEXT: 4. 1 13.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 -# CHECK-NEXT: 1 5.4 0.2 0.0 +# CHECK-NEXT: 3. 1 3.0 0.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], #24 +# CHECK-NEXT: 4. 1 7.0 3.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], #24 +# CHECK-NEXT: 1 3.2 0.8 0.0 # CHECK: [73] Code Region - G74 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 2903 +# CHECK-NEXT: Total Cycles: 1405 # CHECK-NEXT: Total uOps: 4700 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.62 -# CHECK-NEXT: IPC: 0.17 +# CHECK-NEXT: uOps Per Cycle: 3.35 +# CHECK-NEXT: IPC: 0.36 # CHECK-NEXT: Block RThroughput: 14.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 01 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . . . .. st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: [0,1] .D=====eeeeeER . . . .. st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: [0,2] . D=========eeeeeeER. . .. st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: [0,3] . D==============eeeeeeER .. st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: [0,4] . D===================eeeeeeER st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK: [0,0] DeeeeeeER . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 +# CHECK-NEXT: [0,1] .DeeeeeER . . . st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: [0,2] . D===eeeeeeER . . st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: [0,3] . D===eeeeeeER. . st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: [0,4] . D=======eeeeeeER st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3378,33 +3368,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], #48 -# CHECK-NEXT: 1. 1 6.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 -# CHECK-NEXT: 2. 1 10.0 0.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 -# CHECK-NEXT: 3. 1 15.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 -# CHECK-NEXT: 4. 1 20.0 0.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 -# CHECK-NEXT: 1 10.4 0.2 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], #24 +# CHECK-NEXT: 2. 1 4.0 3.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], #48 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], #48 +# CHECK-NEXT: 4. 1 8.0 4.0 0.0 st3 { v1.2d, v2.2d, v3.2d }, [x27], x28 +# CHECK-NEXT: 1 3.6 1.6 0.0 # CHECK: [74] Code Region - G75 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 2703 +# CHECK-NEXT: Total Cycles: 1206 # CHECK-NEXT: Total uOps: 4100 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.52 -# CHECK-NEXT: IPC: 0.18 +# CHECK-NEXT: uOps Per Cycle: 3.40 +# CHECK-NEXT: IPC: 0.41 # CHECK-NEXT: Block RThroughput: 12.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 01234567 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . . . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: [0,1] D=====eeeeeER . . . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: [0,2] .D=========eeeeeeER . . . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: [0,3] . D==============eeeeeER . . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: [0,4] . D==================eeeeeeER st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK: [0,0] DeeeeeER . . . st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeER . . . st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: [0,2] .D===eeeeeeER . . st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: [0,3] . D===eeeeeER . . st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: [0,4] . D======eeeeeeER st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3414,33 +3404,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.2s, v2.2s, v3.2s }, [x27], x28 -# CHECK-NEXT: 1. 1 6.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 -# CHECK-NEXT: 2. 1 10.0 0.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 -# CHECK-NEXT: 3. 1 15.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 -# CHECK-NEXT: 4. 1 19.0 0.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 -# CHECK-NEXT: 1 10.2 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st3 { v1.4h, v2.4h, v3.4h }, [x27], x28 +# CHECK-NEXT: 2. 1 4.0 2.0 0.0 st3 { v1.4s, v2.4s, v3.4s }, [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st3 { v1.8b, v2.8b, v3.8b }, [x27], x28 +# CHECK-NEXT: 4. 1 7.0 3.0 0.0 st3 { v1.8h, v2.8h, v3.8h }, [x27], x28 +# CHECK-NEXT: 1 3.6 1.2 0.0 # CHECK: [75] Code Region - G76 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 2603 +# CHECK-NEXT: Total Cycles: 1106 # CHECK-NEXT: Total uOps: 3800 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.46 -# CHECK-NEXT: IPC: 0.19 +# CHECK-NEXT: uOps Per Cycle: 3.44 +# CHECK-NEXT: IPC: 0.45 # CHECK-NEXT: Block RThroughput: 11.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345678 +# CHECK-NEXT: 0123456 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . . . . st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: [0,1] .D=====eeeeeER . . . . st3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: [0,2] .D==========eeeeeER . . . st3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: [0,3] . D==============eeeeeER . . st3 { v1.b, v2.b, v3.b }[0], [x27], x28 -# CHECK-NEXT: [0,4] . D===================eeeeeER st3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK: [0,0] DeeeeeeER . .. st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 +# CHECK-NEXT: [0,1] .DeeeeeER . .. st3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: [0,2] .D====eeeeeER .. st3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: [0,3] . D====eeeeeER .. st3 { v1.b, v2.b, v3.b }[0], [x27], x28 +# CHECK-NEXT: [0,4] . D=======eeeeeER st3 { v1.b, v2.b, v3.b }[8], [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3450,33 +3440,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.16b, v2.16b, v3.16b }, [x27], x28 -# CHECK-NEXT: 1. 1 6.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3 -# CHECK-NEXT: 2. 1 11.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3 -# CHECK-NEXT: 3. 1 15.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28 -# CHECK-NEXT: 4. 1 20.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28 -# CHECK-NEXT: 1 10.6 0.2 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], #3 +# CHECK-NEXT: 2. 1 5.0 3.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], #3 +# CHECK-NEXT: 3. 1 5.0 0.0 0.0 st3 { v1.b, v2.b, v3.b }[0], [x27], x28 +# CHECK-NEXT: 4. 1 8.0 2.0 0.0 st3 { v1.b, v2.b, v3.b }[8], [x27], x28 +# CHECK-NEXT: 1 4.0 1.2 0.0 # CHECK: [76] Code Region - G77 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 2503 +# CHECK-NEXT: Total Cycles: 1005 # CHECK-NEXT: Total uOps: 3500 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.40 -# CHECK-NEXT: IPC: 0.20 +# CHECK-NEXT: uOps Per Cycle: 3.48 +# CHECK-NEXT: IPC: 0.50 # CHECK-NEXT: Block RThroughput: 10.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 01234567 +# CHECK-NEXT: 012345 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . . . . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6 -# CHECK-NEXT: [0,1] D=====eeeeeER . . . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6 -# CHECK-NEXT: [0,2] .D=========eeeeeER . . . st3 { v1.h, v2.h, v3.h }[0], [x27], x28 -# CHECK-NEXT: [0,3] .D==============eeeeeER . . st3 { v1.h, v2.h, v3.h }[4], [x27], x28 -# CHECK-NEXT: [0,4] . D==================eeeeeER st3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK: [0,0] DeeeeeER . . st3 { v1.h, v2.h, v3.h }[0], [x27], #6 +# CHECK-NEXT: [0,1] D=eeeeeER . . st3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: [0,2] .D===eeeeeER . st3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: [0,3] .D====eeeeeER . st3 { v1.h, v2.h, v3.h }[4], [x27], x28 +# CHECK-NEXT: [0,4] . D======eeeeeER st3 { v1.s, v2.s, v3.s }[0], [x27], #12 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3486,33 +3476,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], #6 -# CHECK-NEXT: 1. 1 6.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6 -# CHECK-NEXT: 2. 1 10.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28 -# CHECK-NEXT: 3. 1 15.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28 -# CHECK-NEXT: 4. 1 19.0 0.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12 -# CHECK-NEXT: 1 10.2 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], #6 +# CHECK-NEXT: 2. 1 4.0 2.0 0.0 st3 { v1.h, v2.h, v3.h }[0], [x27], x28 +# CHECK-NEXT: 3. 1 5.0 0.0 0.0 st3 { v1.h, v2.h, v3.h }[4], [x27], x28 +# CHECK-NEXT: 4. 1 7.0 2.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], #12 +# CHECK-NEXT: 1 3.8 1.0 0.0 # CHECK: [77] Code Region - G78 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 2603 +# CHECK-NEXT: Total Cycles: 1304 # CHECK-NEXT: Total uOps: 4300 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.65 -# CHECK-NEXT: IPC: 0.19 +# CHECK-NEXT: uOps Per Cycle: 3.30 +# CHECK-NEXT: IPC: 0.38 # CHECK-NEXT: Block RThroughput: 13.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345678 +# CHECK-NEXT: 0123456 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . . . . . st3 { v1.s, v2.s, v3.s }[0], [x27], x28 -# CHECK-NEXT: [0,1] D=====eeeeeER . . . . st3 { v1.d, v2.d, v3.d }[0], [x27], #24 -# CHECK-NEXT: [0,2] .D=========eeeeeER . . . st3 { v1.d, v2.d, v3.d }[0], [x27], x28 -# CHECK-NEXT: [0,3] . D=============eeeeeER . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: [0,4] . D=================eeeeeeER st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK: [0,0] DeeeeeER . .. st3 { v1.s, v2.s, v3.s }[0], [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeER . .. st3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: [0,2] .D===eeeeeER .. st3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: [0,3] . D===eeeeeER .. st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: [0,4] . D=====eeeeeeER st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3522,33 +3512,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st3 { v1.s, v2.s, v3.s }[0], [x27], x28 -# CHECK-NEXT: 1. 1 6.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24 -# CHECK-NEXT: 2. 1 10.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28 -# CHECK-NEXT: 3. 1 14.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 -# CHECK-NEXT: 4. 1 18.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 -# CHECK-NEXT: 1 9.8 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], #24 +# CHECK-NEXT: 2. 1 4.0 2.0 0.0 st3 { v1.d, v2.d, v3.d }[0], [x27], x28 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], #64 +# CHECK-NEXT: 4. 1 6.0 2.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], #32 +# CHECK-NEXT: 1 3.4 1.0 0.0 # CHECK: [78] Code Region - G79 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 3303 +# CHECK-NEXT: Total Cycles: 2399 # CHECK-NEXT: Total uOps: 6900 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 2.09 -# CHECK-NEXT: IPC: 0.15 +# CHECK-NEXT: uOps Per Cycle: 2.88 +# CHECK-NEXT: IPC: 0.21 # CHECK-NEXT: Block RThroughput: 24.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012345 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 012 -# CHECK: [0,0] DeeeeeeER . . . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: [0,1] .D=====eeeeeeeER . . . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: [0,2] . D===========eeeeeeER . . . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: [0,3] . D================eeeeeeeER . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: [0,4] . D=====================eeeeeeeER st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK: [0,0] DeeeeeeER . . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 +# CHECK-NEXT: [0,1] .DeeeeeeeER . . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: [0,2] . D====eeeeeeER. . . st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: [0,3] . D=========eeeeeeeER. st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: [0,4] . D========eeeeeeeER st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3558,33 +3548,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], #32 -# CHECK-NEXT: 1. 1 6.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 -# CHECK-NEXT: 2. 1 12.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 -# CHECK-NEXT: 3. 1 17.0 0.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 -# CHECK-NEXT: 4. 1 22.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 -# CHECK-NEXT: 1 11.6 0.2 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], #64 +# CHECK-NEXT: 2. 1 5.0 4.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], #32 +# CHECK-NEXT: 3. 1 10.0 5.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], #64 +# CHECK-NEXT: 4. 1 9.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], #64 +# CHECK-NEXT: 1 5.2 2.0 0.0 # CHECK: [79] Code Region - G80 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Total Cycles: 1903 # CHECK-NEXT: Total uOps: 5700 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.90 -# CHECK-NEXT: IPC: 0.17 +# CHECK-NEXT: uOps Per Cycle: 3.00 +# CHECK-NEXT: IPC: 0.26 # CHECK-NEXT: Block RThroughput: 19.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 01 -# CHECK: [0,0] DeeeeeER . . . . . . st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: [0,1] .D====eeeeeeER . . . . . st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: [0,2] . D=========eeeeeeER. . . . st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: [0,3] . D==============eeeeeeeER . . st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: [0,4] . D====================eeeeeeER st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK: [0,0] DeeeeeER . . .. st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 +# CHECK-NEXT: [0,1] .DeeeeeeER. . .. st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: [0,2] . D=====eeeeeeER .. st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: [0,3] . D=====eeeeeeeER .. st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: [0,4] . D=========eeeeeeER st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3594,33 +3584,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.2d, v2.2d, v3.2d, v4.2d }, [x27], x28 -# CHECK-NEXT: 1. 1 5.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 -# CHECK-NEXT: 2. 1 10.0 0.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 -# CHECK-NEXT: 3. 1 15.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 -# CHECK-NEXT: 4. 1 21.0 0.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 -# CHECK-NEXT: 1 10.4 0.2 0.0 +# CHECK-NEXT: 1. 1 1.0 0.0 0.0 st4 { v1.2s, v2.2s, v3.2s, v4.2s }, [x27], x28 +# CHECK-NEXT: 2. 1 6.0 5.0 0.0 st4 { v1.4h, v2.4h, v3.4h, v4.4h }, [x27], x28 +# CHECK-NEXT: 3. 1 6.0 0.0 0.0 st4 { v1.4s, v2.4s, v3.4s, v4.4s }, [x27], x28 +# CHECK-NEXT: 4. 1 10.0 4.0 0.0 st4 { v1.8b, v2.8b, v3.8b, v4.8b }, [x27], x28 +# CHECK-NEXT: 1 4.8 2.0 0.0 # CHECK: [80] Code Region - G81 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 3203 +# CHECK-NEXT: Total Cycles: 1658 # CHECK-NEXT: Total uOps: 4900 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.53 -# CHECK-NEXT: IPC: 0.16 +# CHECK-NEXT: uOps Per Cycle: 2.96 +# CHECK-NEXT: IPC: 0.30 # CHECK-NEXT: Block RThroughput: 16.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 01234 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 0123456789 +# CHECK-NEXT: Index 0123456789 0123 -# CHECK: [0,0] DeeeeeeeER. . . . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: [0,1] . D=====eeeeeeeER . . . . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: [0,2] . D===========eeeeeeER . . . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 -# CHECK-NEXT: [0,3] . D=================eeeeeeER . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 -# CHECK-NEXT: [0,4] . D=======================eeeeeeER st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK: [0,0] DeeeeeeeER. . . . st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 +# CHECK-NEXT: [0,1] . DeeeeeeeER . . . st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: [0,2] . D=========eeeeeeER . st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 +# CHECK-NEXT: [0,3] . D===========eeeeeeER. st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: [0,4] . D============eeeeeeER st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3630,33 +3620,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.8h, v2.8h, v3.8h, v4.8h }, [x27], x28 -# CHECK-NEXT: 1. 1 6.0 0.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 -# CHECK-NEXT: 2. 1 12.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 -# CHECK-NEXT: 3. 1 18.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 -# CHECK-NEXT: 4. 1 24.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 -# CHECK-NEXT: 1 12.2 0.2 0.0 +# CHECK-NEXT: 1. 1 1.0 1.0 0.0 st4 { v1.16b, v2.16b, v3.16b, v4.16b }, [x27], x28 +# CHECK-NEXT: 2. 1 10.0 9.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], #4 +# CHECK-NEXT: 3. 1 12.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], #4 +# CHECK-NEXT: 4. 1 13.0 0.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[0], [x27], x28 +# CHECK-NEXT: 1 7.4 2.4 0.0 # CHECK: [81] Code Region - G82 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Total Cycles: 757 # CHECK-NEXT: Total uOps: 2500 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 0.83 -# CHECK-NEXT: IPC: 0.17 +# CHECK-NEXT: uOps Per Cycle: 3.30 +# CHECK-NEXT: IPC: 0.66 # CHECK-NEXT: Block RThroughput: 7.5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 01234 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . . . . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 -# CHECK-NEXT: [0,1] D======eeeeeeER. . . . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 -# CHECK-NEXT: [0,2] D============eeeeeeER . . . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 -# CHECK-NEXT: [0,3] .D=================eeeeeeER . . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 -# CHECK-NEXT: [0,4] .D=======================eeeeeeER st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK: [0,0] DeeeeeeER . . st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 +# CHECK-NEXT: [0,1] D=eeeeeeER. . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: [0,2] D===eeeeeeER . st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 +# CHECK-NEXT: [0,3] .D===eeeeeeER . st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: [0,4] .D=====eeeeeeER st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3666,33 +3656,33 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.b, v2.b, v3.b, v4.b }[8], [x27], x28 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 -# CHECK-NEXT: 2. 1 13.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 -# CHECK-NEXT: 3. 1 18.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 -# CHECK-NEXT: 4. 1 24.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 -# CHECK-NEXT: 1 12.6 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], #8 +# CHECK-NEXT: 2. 1 4.0 1.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], #8 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[0], [x27], x28 +# CHECK-NEXT: 4. 1 6.0 1.0 0.0 st4 { v1.h, v2.h, v3.h, v4.h }[4], [x27], x28 +# CHECK-NEXT: 1 3.4 0.6 0.0 # CHECK: [82] Code Region - G83 # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 2103 +# CHECK-NEXT: Total Cycles: 704 # CHECK-NEXT: Total uOps: 2700 # CHECK: Dispatch Width: 16 -# CHECK-NEXT: uOps Per Cycle: 1.28 -# CHECK-NEXT: IPC: 0.24 +# CHECK-NEXT: uOps Per Cycle: 3.84 +# CHECK-NEXT: IPC: 0.71 # CHECK-NEXT: Block RThroughput: 7.0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123 +# CHECK-NEXT: 0 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . . . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 -# CHECK-NEXT: [0,1] D======eeeeeeER. . . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 -# CHECK-NEXT: [0,2] .D===========eeeeER . . st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: [0,3] .D===============eeeeER. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: [0,4] . D==================eER stg x26, [x27], #4064 +# CHECK: [0,0] DeeeeeeER . st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 +# CHECK-NEXT: [0,1] D=eeeeeeER. st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: [0,2] .D==eeeeER. st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +# CHECK-NEXT: [0,3] .D===eeeeER st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: [0,4] . D===eE--R stg x26, [x27], #4064 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -3702,11 +3692,11 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], #16 -# CHECK-NEXT: 1. 1 7.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 -# CHECK-NEXT: 2. 1 12.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 -# CHECK-NEXT: 3. 1 16.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 -# CHECK-NEXT: 4. 1 19.0 0.0 0.0 stg x26, [x27], #4064 -# CHECK-NEXT: 1 11.0 0.2 0.0 +# CHECK-NEXT: 1. 1 2.0 0.0 0.0 st4 { v1.s, v2.s, v3.s, v4.s }[0], [x27], x28 +# CHECK-NEXT: 2. 1 3.0 1.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], #32 +# CHECK-NEXT: 3. 1 4.0 0.0 0.0 st4 { v1.d, v2.d, v3.d, v4.d }[0], [x27], x28 +# CHECK-NEXT: 4. 1 4.0 0.0 2.0 stg x26, [x27], #4064 +# CHECK-NEXT: 1 2.8 0.4 0.4 # CHECK: [83] Code Region - G84