diff --git a/llvm/include/llvm/MCA/CodeEmitter.h b/llvm/include/llvm/MCA/CodeEmitter.h --- a/llvm/include/llvm/MCA/CodeEmitter.h +++ b/llvm/include/llvm/MCA/CodeEmitter.h @@ -60,6 +60,8 @@ : STI(ST), MAB(AB), MCE(CE), VecOS(Code), Sequence(S), Encodings(S.size()) {} + const MCInst &getInst(unsigned MCID) const { return Sequence[MCID]; } + StringRef getEncoding(unsigned MCID) { EncodingInfo EI = getOrCreateEncodingInfo(MCID); return StringRef(&Code[EI.first], EI.second); diff --git a/llvm/include/llvm/MCA/HardwareUnits/InstructionBuffer.h b/llvm/include/llvm/MCA/HardwareUnits/InstructionBuffer.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/MCA/HardwareUnits/InstructionBuffer.h @@ -0,0 +1,83 @@ +//===---------------------- InstructionBuffer.h -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file simulates the hardware responsible for queueing instruction bytes +/// between fetch unit and decode unit. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_INSTRUCTION_BUFFER_H +#define LLVM_MCA_INSTRUCTION_BUFFER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/MCA/HardwareUnits/HardwareUnit.h" +#include + +namespace llvm { +namespace mca { + +class InstructionBuffer final : public HardwareUnit { +public: + struct Buffer { + int NumBytesRemaining; + bool FetchCompleted; + + Buffer(); + + // Can we read from this buffer yet? + bool isReady() const; + // Is this buffer actually alive, or we just didn't GC it yet? + bool isDepleted() const; + }; + +private: + std::vector Buffers; + unsigned NumKnownDepletedBuffers; + + // Per cycle + MutableArrayRef BuffersAvaliableToDecoderThisCycle; + unsigned BytesRemaining; + unsigned FetchesPerformed; + + static constexpr unsigned MaxBuffersTotal = 16U; + static constexpr unsigned BytesPerBuffer = 16U; + static constexpr unsigned BytesFetchedPerCycle = 32U; + static constexpr unsigned BuffersFilledPerCycle = + BytesFetchedPerCycle / BytesPerBuffer; + static_assert(BuffersFilledPerCycle * BytesPerBuffer == BytesFetchedPerCycle, + "Fetch amount should be a multiple of buffer size"); + + static constexpr unsigned NumBuffersDecoderCanAccessPerCycle = 2U; + static constexpr unsigned NumBytesDecoderCanAccessPerCycle = + NumBuffersDecoderCanAccessPerCycle * BytesPerBuffer; + static constexpr unsigned MaxFetchesPerCycle = 4U; + + bool canAccomodateInstructionFetch() const; + void enqueueInstructionFetch(); + unsigned getNumDepletedBuffers() const; + void recalculateNumDepletedBuffers(); + + unsigned getNumOccupiedBuffers() const; + unsigned getNumVacantBuffers() const; + +public: + InstructionBuffer(); + + void cycleStart(); + + unsigned getNumBytesRemaining() const; + void consumeNumBytes(unsigned NumBytesToConsume); + + void cycleEnd(); +}; + +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_INSTRUCTION_BUFFER_H diff --git a/llvm/include/llvm/MCA/InstrBuilder.h b/llvm/include/llvm/MCA/InstrBuilder.h --- a/llvm/include/llvm/MCA/InstrBuilder.h +++ b/llvm/include/llvm/MCA/InstrBuilder.h @@ -18,6 +18,7 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MCA/CodeEmitter.h" #include "llvm/MCA/Instruction.h" #include "llvm/MCA/Support.h" #include "llvm/Support/Error.h" @@ -48,8 +49,10 @@ bool FirstCallInst; bool FirstReturnInst; - Expected createInstrDescImpl(const MCInst &MCI); - Expected getOrCreateInstrDesc(const MCInst &MCI); + Expected + createInstrDescImpl(CodeEmitter &CE, unsigned MCID, const MCInst &MCI); + Expected + getOrCreateInstrDesc(CodeEmitter &CE, unsigned MCID, const MCInst &MCI); InstrBuilder(const InstrBuilder &) = delete; InstrBuilder &operator=(const InstrBuilder &) = delete; @@ -68,7 +71,8 @@ FirstReturnInst = true; } - Expected> createInstruction(const MCInst &MCI); + Expected> createInstruction(CodeEmitter &CE, + unsigned MCID); }; } // namespace mca } // namespace llvm diff --git a/llvm/include/llvm/MCA/Instruction.h b/llvm/include/llvm/MCA/Instruction.h --- a/llvm/include/llvm/MCA/Instruction.h +++ b/llvm/include/llvm/MCA/Instruction.h @@ -362,6 +362,9 @@ // A bitmask of used processor resource groups. uint64_t UsedProcResGroups; + // How many bytes does the endcoding of this instruction occupy. + unsigned EncodingByteLength; + // What latency does this instruction incur. unsigned MaxLatency; // Number of MicroOps for this instruction. unsigned NumMicroOps; @@ -417,6 +420,7 @@ const ArrayRef getUses() const { return Uses; } const InstrDesc &getDesc() const { return Desc; } + unsigned getEncodingByteLength() const { return Desc.EncodingByteLength; } unsigned getLatency() const { return Desc.MaxLatency; } unsigned getNumMicroOps() const { return Desc.NumMicroOps; } diff --git a/llvm/include/llvm/MCA/Stages/DecodeStage.h b/llvm/include/llvm/MCA/Stages/DecodeStage.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/MCA/Stages/DecodeStage.h @@ -0,0 +1,87 @@ +//===---------------------- DecodeStage.h -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines a stage that implements instruction decoding +/// into micro-ops. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_DECODE_STAGE_H +#define LLVM_MCA_DECODE_STAGE_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/MCA/Instruction.h" +#include "llvm/MCA/Stages/Stage.h" +#include "llvm/Support/Error.h" +#include + +namespace llvm { +namespace mca { + +class InstructionBuffer; +class SourceMgr; + +/// A stage that simulates an instruction decoder. +class DecodeStage : public Stage { + InstructionBuffer &Buffer; + SourceMgr &SM; + + SmallVector, 16> Instructions; + unsigned NumRetired; + + InstRef CurrentInstruction; + + // Updates the program counter, and sets 'CurrentInstruction'. + void getNextInstructionFromSourceManager(); + + struct MicroOpEngine { + InstRef IR; + int MicroOpsLeftToGenerate; + } MicroOpEngine; + SmallVector Decoders; + + DecodeStage(const DecodeStage &Other) = delete; + DecodeStage &operator=(const DecodeStage &Other) = delete; + + bool microOpDecodersHaveWorkToComplete() const; + + // Is this instruction microcoded? + static bool IsMicroCoded(const InstRef &IR); + + InstRef peekNextInstructionFromBuffer() const; + InstRef getNextInstructionFromBuffer(); + + // Would we be able to place this decoded (from it's byte encoding) + // instruction onto micro-op decoders? + bool canEnqueueForMicroOpDecoding(const InstRef &IR) const; + + Error performMicroOpDecoding(); + +public: + DecodeStage(InstructionBuffer &Buffer, SourceMgr &SM); + + Error cycleStart() override; + + // Are there any instructions currently being decoded? + bool hasWorkToComplete() const override; + + // Would decoder be able to start decoding next instruction? + bool isAvailable(const InstRef & /*unused*/) const override; + + // Start decoding the next instruction. + Error execute(InstRef & /*unused*/) override; + + // Actually generate microcodes, if any. + Error cycleEnd() override; +}; + +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_DECODE_STAGE_H diff --git a/llvm/include/llvm/MCA/Stages/EntryStage.h b/llvm/include/llvm/MCA/Stages/EntryStage.h --- a/llvm/include/llvm/MCA/Stages/EntryStage.h +++ b/llvm/include/llvm/MCA/Stages/EntryStage.h @@ -38,9 +38,9 @@ public: EntryStage(SourceMgr &SM) : CurrentInstruction(), SM(SM), NumRetired(0) { } - bool isAvailable(const InstRef &IR) const override; + bool isAvailable(const InstRef & /*unused*/) const override; bool hasWorkToComplete() const override; - Error execute(InstRef &IR) override; + Error execute(InstRef & /*unused*/) override; Error cycleStart() override; Error cycleEnd() override; }; diff --git a/llvm/include/llvm/MCA/Stages/FetchStage.h b/llvm/include/llvm/MCA/Stages/FetchStage.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/MCA/Stages/FetchStage.h @@ -0,0 +1,49 @@ +//===----------------- FetchStage.h ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// FIXME +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MCA_FETCH_STAGE_H +#define LLVM_MCA_FETCH_STAGE_H + +#include "llvm/MCA/Stages/Stage.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace mca { + +class InstRef; +class InstructionBuffer; + +class FetchStage final : public Stage { + InstructionBuffer &Buffer; + + FetchStage(const FetchStage &Other) = delete; + FetchStage &operator=(const FetchStage &Other) = delete; + +public: + FetchStage(InstructionBuffer &Buffer); + + Error cycleStart() override; + + bool hasWorkToComplete() const override; + + bool isAvailable(const InstRef & /*unused*/) const override; + + Error execute(InstRef & /*unused*/) override; + + Error cycleEnd() override; +}; + +} // namespace mca +} // namespace llvm + +#endif // LLVM_MCA_FETCH_STAGE_H diff --git a/llvm/lib/MCA/CMakeLists.txt b/llvm/lib/MCA/CMakeLists.txt --- a/llvm/lib/MCA/CMakeLists.txt +++ b/llvm/lib/MCA/CMakeLists.txt @@ -3,6 +3,7 @@ Context.cpp HWEventListener.cpp HardwareUnits/HardwareUnit.cpp + HardwareUnits/InstructionBuffer.cpp HardwareUnits/LSUnit.cpp HardwareUnits/RegisterFile.cpp HardwareUnits/ResourceManager.cpp @@ -11,9 +12,11 @@ InstrBuilder.cpp Instruction.cpp Pipeline.cpp + Stages/DecodeStage.cpp Stages/DispatchStage.cpp Stages/EntryStage.cpp Stages/ExecuteStage.cpp + Stages/FetchStage.cpp Stages/InstructionTables.cpp Stages/MicroOpQueueStage.cpp Stages/RetireStage.cpp diff --git a/llvm/lib/MCA/Context.cpp b/llvm/lib/MCA/Context.cpp --- a/llvm/lib/MCA/Context.cpp +++ b/llvm/lib/MCA/Context.cpp @@ -15,12 +15,15 @@ //===----------------------------------------------------------------------===// #include "llvm/MCA/Context.h" +#include "llvm/MCA/HardwareUnits/InstructionBuffer.h" #include "llvm/MCA/HardwareUnits/RegisterFile.h" #include "llvm/MCA/HardwareUnits/RetireControlUnit.h" #include "llvm/MCA/HardwareUnits/Scheduler.h" +#include "llvm/MCA/Stages/DecodeStage.h" #include "llvm/MCA/Stages/DispatchStage.h" #include "llvm/MCA/Stages/EntryStage.h" #include "llvm/MCA/Stages/ExecuteStage.h" +#include "llvm/MCA/Stages/FetchStage.h" #include "llvm/MCA/Stages/MicroOpQueueStage.h" #include "llvm/MCA/Stages/RetireStage.h" @@ -32,6 +35,7 @@ const MCSchedModel &SM = STI.getSchedModel(); // Create the hardware units defining the backend. + auto IB = std::make_unique(); auto RCU = std::make_unique(SM); auto PRF = std::make_unique(SM, MRI, Opts.RegisterFileSize); auto LSU = std::make_unique(SM, Opts.LoadQueueSize, @@ -39,7 +43,9 @@ auto HWS = std::make_unique(SM, *LSU); // Create the pipeline stages. - auto Fetch = std::make_unique(SrcMgr); + auto Fetch = std::make_unique(*IB); + auto Decode = std::make_unique(*IB, SrcMgr); + auto Entry = std::make_unique(SrcMgr); auto Dispatch = std::make_unique(STI, MRI, Opts.DispatchWidth, *RCU, *PRF); auto Execute = @@ -47,6 +53,10 @@ auto Retire = std::make_unique(*RCU, *PRF, *LSU); // Pass the ownership of all the hardware units to this Context. + // FIXME: parametrize and enable globally. + if (STI.getTargetTriple().getArch() == Triple::ArchType::x86_64 && + STI.getCPU() == "bdver2") + addHardwareUnit(std::move(IB)); addHardwareUnit(std::move(RCU)); addHardwareUnit(std::move(PRF)); addHardwareUnit(std::move(LSU)); @@ -54,7 +64,13 @@ // Build the pipeline. auto StagePipeline = std::make_unique(); - StagePipeline->appendStage(std::move(Fetch)); + // FIXME: parametrize and enable globally. + if (STI.getTargetTriple().getArch() == Triple::ArchType::x86_64 && + STI.getCPU() == "bdver2") { + StagePipeline->appendStage(std::move(Fetch)); + StagePipeline->appendStage(std::move(Decode)); + } else + StagePipeline->appendStage(std::move(Entry)); if (Opts.MicroOpQueueSize) StagePipeline->appendStage(std::make_unique( Opts.MicroOpQueueSize, Opts.DecodersThroughput)); diff --git a/llvm/lib/MCA/HardwareUnits/InstructionBuffer.cpp b/llvm/lib/MCA/HardwareUnits/InstructionBuffer.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/MCA/HardwareUnits/InstructionBuffer.cpp @@ -0,0 +1,184 @@ +//===------------------ InstructionBuffer.cpp -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the anchor for the base class that describes +/// simulated hardware units. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/HardwareUnits/InstructionBuffer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/iterator_range.h" +#include +#include +#include +#include + +namespace llvm { +namespace mca { + +#define DEBUG_TYPE "llvm-mca" + +const unsigned InstructionBuffer::MaxBuffersTotal; +const unsigned InstructionBuffer::BytesPerBuffer; +const unsigned InstructionBuffer::BytesFetchedPerCycle; +const unsigned InstructionBuffer::BuffersFilledPerCycle; +const unsigned InstructionBuffer::NumBuffersDecoderCanAccessPerCycle; +const unsigned InstructionBuffer::NumBytesDecoderCanAccessPerCycle; + +InstructionBuffer::InstructionBuffer() : NumKnownDepletedBuffers(0) { + // Really overallocate to avoid allocations at all. + Buffers.reserve(4 * MaxBuffersTotal); +}; + +InstructionBuffer::Buffer::Buffer() + : NumBytesRemaining(-1), FetchCompleted(false) {} + +bool InstructionBuffer::Buffer::isReady() const { return FetchCompleted; }; + +bool InstructionBuffer::Buffer::isDepleted() const { + if (!isReady()) // Can't deplete a buffer that hasn't fetched yet. + return false; + assert(NumBytesRemaining >= 0 && + "Shouldn't ever over-consume bytes in the buffer."); + return NumBytesRemaining == 0; +}; + +unsigned InstructionBuffer::getNumDepletedBuffers() const { +#ifndef NDEBUG + llvm::for_each(ArrayRef(Buffers).take_front(NumKnownDepletedBuffers), + [](const Buffer &Buf) { + assert(Buf.isDepleted() && + "All buffers that we counted as depleted should " + "actually be depleted."); + }); +#endif + + // Ignore known-depleted buffers. + auto Range = + make_range(Buffers.begin() + NumKnownDepletedBuffers, Buffers.end()); + // And find first non-depleted one. + auto It = find_if(Range, [](const Buffer &Buf) { return !Buf.isDepleted(); }); + // So how many depleted buffers there are total? + return std::distance(Buffers.begin(), It); +} + +void InstructionBuffer::recalculateNumDepletedBuffers() { + NumKnownDepletedBuffers = getNumDepletedBuffers(); +} + +unsigned InstructionBuffer::getNumOccupiedBuffers() const { + unsigned NumOccupiedBuffers = Buffers.size() - getNumDepletedBuffers(); + assert(NumOccupiedBuffers <= MaxBuffersTotal && + "There is a hard limit on the buffer count."); + return NumOccupiedBuffers; +} + +unsigned InstructionBuffer::getNumVacantBuffers() const { + int NumVacantBuffers = (int)MaxBuffersTotal - getNumOccupiedBuffers(); + assert(NumVacantBuffers >= 0 && "Can't have negative count of empty buffers"); + return NumVacantBuffers; +} + +bool InstructionBuffer::canAccomodateInstructionFetch() const { + return getNumVacantBuffers() >= BuffersFilledPerCycle; +} + +void InstructionBuffer::enqueueInstructionFetch() { + assert(canAccomodateInstructionFetch() && + "Should not be performing fetch if can't accomodate for it."); + + for (int NumBuffersFilled = 0; NumBuffersFilled != BuffersFilledPerCycle; + ++NumBuffersFilled) + Buffers.emplace_back(); +} + +void InstructionBuffer::cycleStart() { + // The fetches from previous cycle (if any) have completed by now. + for (Buffer &Buf : llvm::reverse(Buffers)) { + if (Buf.FetchCompleted) + break; // All the earlier buffers already completed fetching. + Buf.NumBytesRemaining = BytesPerBuffer; + Buf.FetchCompleted = true; + } + + // And if can accomodate it, enqueue next fetch. + if (canAccomodateInstructionFetch()) + enqueueInstructionFetch(); + + // Finally, which buffers can decoder actually look into this cycle? + BuffersAvaliableToDecoderThisCycle = Buffers; + assert(NumKnownDepletedBuffers == getNumDepletedBuffers() && + "NumKnownDepletedBuffers should not be outdated yet."); + BuffersAvaliableToDecoderThisCycle = + BuffersAvaliableToDecoderThisCycle.drop_front(NumKnownDepletedBuffers) + .take_front(NumBuffersDecoderCanAccessPerCycle) + .take_while([](const Buffer &Buf) { return Buf.isReady(); }); + +#ifndef NDEBUG + assert(BuffersAvaliableToDecoderThisCycle.size() <= + NumBuffersDecoderCanAccessPerCycle && + "Predicate error?"); + llvm::for_each(BuffersAvaliableToDecoderThisCycle, [](const Buffer &Buf) { + assert(Buf.isReady() && !Buf.isDepleted() && + "Should have only selected buffers that finished fetching and " + "weren't depleted previously."); + }); +#endif + + // The number of eligible buffers must match decoder's expectations exactly. + if (BuffersAvaliableToDecoderThisCycle.size() != + NumBuffersDecoderCanAccessPerCycle) + BuffersAvaliableToDecoderThisCycle = + decltype(BuffersAvaliableToDecoderThisCycle)(); + + BytesRemaining = + std::accumulate(BuffersAvaliableToDecoderThisCycle.begin(), + BuffersAvaliableToDecoderThisCycle.end(), unsigned(0), + [](unsigned BytesRemainingSoFar, const Buffer &Buf) { + return BytesRemainingSoFar + Buf.NumBytesRemaining; + }); + BytesRemaining = std::min(BytesRemaining, NumBytesDecoderCanAccessPerCycle); + FetchesPerformed = 0; +} + +unsigned InstructionBuffer::getNumBytesRemaining() const { + return BytesRemaining; +} + +void InstructionBuffer::consumeNumBytes(unsigned NumBytesToConsume) { + assert(NumBytesToConsume <= getNumBytesRemaining() && + "Can't consume more bytes than avaliable."); + + BytesRemaining -= NumBytesToConsume; + for (Buffer &Buf : BuffersAvaliableToDecoderThisCycle) { + unsigned NumBytesCanConsumeFromThisBuffer = + std::min((unsigned)Buf.NumBytesRemaining, NumBytesToConsume); + Buf.NumBytesRemaining -= NumBytesCanConsumeFromThisBuffer; + NumBytesToConsume -= NumBytesCanConsumeFromThisBuffer; + } + assert(NumBytesToConsume == 0 && "Consumption failure"); + + ++FetchesPerformed; + if (FetchesPerformed == MaxFetchesPerCycle) + BytesRemaining = 0; // ratelimit +} + +void InstructionBuffer::cycleEnd() { + recalculateNumDepletedBuffers(); + // If at least half of the buffers we're tracking are depleted, GC them. + if ((NumKnownDepletedBuffers * 2) >= Buffers.size()) { + // Erase buffers up to the first that hasn't been depleted. + Buffers.erase(Buffers.begin(), Buffers.begin() + NumKnownDepletedBuffers); + NumKnownDepletedBuffers = 0; + } +} + +} // namespace mca +} // namespace llvm diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp --- a/llvm/lib/MCA/InstrBuilder.cpp +++ b/llvm/lib/MCA/InstrBuilder.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/MC/MCInst.h" +#include "llvm/MCA/CodeEmitter.h" #include "llvm/Support/Debug.h" #include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" @@ -498,7 +499,8 @@ } Expected -InstrBuilder::createInstrDescImpl(const MCInst &MCI) { +InstrBuilder::createInstrDescImpl(CodeEmitter &CE, unsigned MCID, + const MCInst &MCI) { assert(STI.getSchedModel().hasInstrSchedModel() && "Itineraries are not yet supported!"); @@ -536,6 +538,9 @@ // Create a new empty descriptor. std::unique_ptr ID = std::make_unique(); + ID->EncodingByteLength = CE.getEncoding(MCID).size(); + assert(ID->EncodingByteLength != 0 && + "Instruction with zero-byte-lenght encoding?"); ID->NumMicroOps = SCDesc.NumMicroOps; ID->SchedClassID = SchedClassID; @@ -588,19 +593,21 @@ } Expected -InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) { +InstrBuilder::getOrCreateInstrDesc(CodeEmitter &CE, unsigned MCID, + const MCInst &MCI) { if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end()) return *Descriptors[MCI.getOpcode()]; if (VariantDescriptors.find(&MCI) != VariantDescriptors.end()) return *VariantDescriptors[&MCI]; - return createInstrDescImpl(MCI); + return createInstrDescImpl(CE, MCID, MCI); } Expected> -InstrBuilder::createInstruction(const MCInst &MCI) { - Expected DescOrErr = getOrCreateInstrDesc(MCI); +InstrBuilder::createInstruction(CodeEmitter &CE, unsigned MCID) { + const MCInst &MCI = CE.getInst(MCID); + Expected DescOrErr = getOrCreateInstrDesc(CE, MCID, MCI); if (!DescOrErr) return DescOrErr.takeError(); const InstrDesc &D = *DescOrErr; diff --git a/llvm/lib/MCA/Pipeline.cpp b/llvm/lib/MCA/Pipeline.cpp --- a/llvm/lib/MCA/Pipeline.cpp +++ b/llvm/lib/MCA/Pipeline.cpp @@ -57,7 +57,7 @@ } // Now fetch and execute new instructions. - InstRef IR; + InstRef IR; // placeholder, not actually used. Stage &FirstStage = *Stages[0]; while (!Err && FirstStage.isAvailable(IR)) Err = FirstStage.execute(IR); diff --git a/llvm/lib/MCA/Stages/DecodeStage.cpp b/llvm/lib/MCA/Stages/DecodeStage.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/MCA/Stages/DecodeStage.cpp @@ -0,0 +1,224 @@ +//===---------------------- DecodeStage.cpp ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the DecodeStage. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Stages/DecodeStage.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/MCA/HardwareUnits/InstructionBuffer.h" +#include "llvm/MCA/SourceMgr.h" +#include +#include +#include +#include +#include + +namespace llvm { +namespace mca { + +#define DEBUG_TYPE "llvm-mca" + +DecodeStage::DecodeStage(InstructionBuffer &Buffer_, SourceMgr &SM_) + : Buffer(Buffer_), SM(SM_), NumRetired(0) {} + +void DecodeStage::getNextInstructionFromSourceManager() { + assert(!CurrentInstruction && "There is already an instruction to process!"); + if (!SM.hasNext()) + return; + SourceRef SR = SM.peekNext(); + std::unique_ptr Inst = std::make_unique(SR.second); + CurrentInstruction = InstRef(SR.first, Inst.get()); + Instructions.emplace_back(std::move(Inst)); + SM.updateNext(); +} + +llvm::Error DecodeStage::cycleStart() { + if (!CurrentInstruction) + getNextInstructionFromSourceManager(); + return llvm::ErrorSuccess(); +} + +bool DecodeStage::microOpDecodersHaveWorkToComplete() const { + return MicroOpEngine.IR || std::any_of(Decoders.begin(), Decoders.end(), + [](const InstRef &IR) { return IR; }); +} + +bool DecodeStage::hasWorkToComplete() const { + return static_cast(CurrentInstruction) || + microOpDecodersHaveWorkToComplete(); +} + +bool DecodeStage::IsMicroCoded(const InstRef &IR) { + // FIXME: parametrize. + return IR.getInstruction()->getDesc().NumMicroOps > 2; +} + +InstRef DecodeStage::peekNextInstructionFromBuffer() const { + if (!CurrentInstruction) + return InstRef(); + + if (Buffer.getNumBytesRemaining() < + CurrentInstruction.getInstruction()->getEncodingByteLength()) + return InstRef(); + + return CurrentInstruction; +} + +bool DecodeStage::canEnqueueForMicroOpDecoding(const InstRef &IR) const { + // If we are currently decoding microcoded instruction, + // we can't start decoding *anything* else. + if (MicroOpEngine.IR) + return false; + + if (IsMicroCoded(IR)) { + // We can not start decoding microcoded instruction until + // we finish decoding *all* preceding instructions. + return !microOpDecodersHaveWorkToComplete(); + } + + // There are only 4 decoders. + // FIXME: parametrize. + if (std::count_if(Decoders.begin(), Decoders.end(), + [](const InstRef &IR) { return IR; }) >= 4) + return false; + + // FIXME: parametrize. It can be more complex than that. + unsigned WouldBeNumMicroOpsTotal = + std::accumulate(Decoders.begin(), Decoders.end(), + IR.getInstruction()->getDesc().NumMicroOps, + [](unsigned NumMicroOpsSoFar, const InstRef &IR) { + if (const Instruction *Instr = IR.getInstruction()) + NumMicroOpsSoFar += Instr->getDesc().NumMicroOps; + return NumMicroOpsSoFar; + }); + // We can at most generate 4 microops per cycle. + // That is, we can generate 2-2/2-1-1/1-1-1-1. + return WouldBeNumMicroOpsTotal <= 4; +} + +bool DecodeStage::isAvailable(const InstRef & /*unused*/) const { + InstRef IR = peekNextInstructionFromBuffer(); + if (!IR) + return false; + + return canEnqueueForMicroOpDecoding(IR); +}; + +InstRef DecodeStage::getNextInstructionFromBuffer() { + InstRef IR = peekNextInstructionFromBuffer(); + assert(IR && "No next instruction?"); + Buffer.consumeNumBytes(IR.getInstruction()->getEncodingByteLength()); + return IR; +} + +Error DecodeStage::execute(InstRef & /*unused*/) { + assert(isAvailable(InstRef()) && + "Should not start decoding instruction unless we are ready to."); + + InstRef IR = getNextInstructionFromBuffer(); + + // Move the program counter. + CurrentInstruction.invalidate(); + getNextInstructionFromSourceManager(); + + if (IsMicroCoded(IR)) { + assert(Decoders.empty() && !MicroOpEngine.IR && + "Must not start decoding microcoded instruction if the decoder is " + "already occupied."); + + MicroOpEngine.IR = IR; + MicroOpEngine.MicroOpsLeftToGenerate = + IR.getInstruction()->getDesc().NumMicroOps; + return llvm::ErrorSuccess(); + } + + assert(!MicroOpEngine.IR && + "Must not start decoding non-microcoded instruction if already " + "decoding microcoded instruction."); + Decoders.emplace_back(IR); + return llvm::ErrorSuccess(); +} + +Error DecodeStage::performMicroOpDecoding() { + if (!microOpDecodersHaveWorkToComplete()) + return llvm::ErrorSuccess(); + + if (InstRef &IR = MicroOpEngine.IR) { + assert(Decoders.empty() && + "Microcoded instruction must be decoded standalone"); + + // Is next stage ready to recieve all the microcodes? + if (!checkNextStage(IR)) + return llvm::ErrorSuccess(); // Stall. + + // Okay, start/continue generating microops. + + // FIXME: is that so for BdVer2? + // FIXME: parametrize. + MicroOpEngine.MicroOpsLeftToGenerate -= 2; + // Did we just finish generating microops for this Microcoded instruction? + if (MicroOpEngine.MicroOpsLeftToGenerate > 0) + return llvm::ErrorSuccess(); // More microops left to generate... + + // Done decoding/generating. + if (llvm::Error Val = moveToTheNextStage(IR)) + return Val; + IR.invalidate(); + return llvm::ErrorSuccess(); + } + + // Okay, must be a normal instruction. + assert(!Decoders.empty() && "Should be decoding some plain instructions."); + for (InstRef &IR : Decoders) { + if (!IR) + continue; + + // Is next stage ready to recieve microops of this decoded instruction? + if (!checkNextStage(IR)) + break; // Stall. + // Done decoding/generating in a single cycle. + if (llvm::Error Val = moveToTheNextStage(IR)) + return Val; + IR.invalidate(); + } + + return llvm::ErrorSuccess(); +} + +Error DecodeStage::cycleEnd() { + if (Error E = performMicroOpDecoding()) + return E; + + // Find the first instruction which hasn't been fully decoded. + auto DIt = find_if(Decoders, [](const InstRef &IR) { return IR; }); + unsigned NumDecoded = std::distance(Decoders.begin(), DIt); + // Erase instructions up to the first that hasn't been decoded. + if ((NumDecoded * 2) >= Decoders.size()) + Decoders.erase(Decoders.begin(), DIt); + + // Find the first instruction which hasn't been retired. + auto RRange = make_range(&Instructions[NumRetired], Instructions.end()); + auto RIt = find_if(RRange, [](const std::unique_ptr &I) { + return !I->isRetired(); + }); + NumRetired = std::distance(Instructions.begin(), RIt); + // Erase instructions up to the first that hasn't been retired. + if ((NumRetired * 2) >= Instructions.size()) { + Instructions.erase(Instructions.begin(), RIt); + NumRetired = 0; + } + + return llvm::ErrorSuccess(); +} + +} // namespace mca +} // namespace llvm diff --git a/llvm/lib/MCA/Stages/EntryStage.cpp b/llvm/lib/MCA/Stages/EntryStage.cpp --- a/llvm/lib/MCA/Stages/EntryStage.cpp +++ b/llvm/lib/MCA/Stages/EntryStage.cpp @@ -39,7 +39,7 @@ SM.updateNext(); } -llvm::Error EntryStage::execute(InstRef & /*unused */) { +llvm::Error EntryStage::execute(InstRef & /*unused*/) { assert(CurrentInstruction && "There is no instruction to process!"); if (llvm::Error Val = moveToTheNextStage(CurrentInstruction)) return Val; diff --git a/llvm/lib/MCA/Stages/FetchStage.cpp b/llvm/lib/MCA/Stages/FetchStage.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/MCA/Stages/FetchStage.cpp @@ -0,0 +1,60 @@ +//===--------------------- FetchStage.cpp -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file defines the FetchStage. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/MCA/Stages/FetchStage.h" +#include "llvm/MCA/HardwareUnits/InstructionBuffer.h" +#include "llvm/MCA/Instruction.h" +#include + +namespace llvm { +namespace mca { + +#define DEBUG_TYPE "llvm-mca" + +FetchStage::FetchStage(InstructionBuffer &Buffer_) : Buffer(Buffer_) {} + +Error FetchStage::cycleStart() { + Buffer.cycleStart(); + return llvm::ErrorSuccess(); +} + +bool FetchStage::hasWorkToComplete() const { + // This stage's sole purpose is keeping the Instruction Byte Buffer filled, + // so we effectively never run out of work, let other stages answer instead. + return false; +} + +bool FetchStage::isAvailable(const InstRef & /*unused*/) const { + // Just passthrough the question to the next stage, is it ready? + return checkNextStage(InstRef()); +}; + +Error FetchStage::execute(InstRef & /*unused*/) { + assert(isAvailable(InstRef()) && + "Should not be executing if we don't have resources to do so."); + + // Just passthrough to the next stage. + InstRef IR; // placeholder, not actually used. + if (llvm::Error Val = moveToTheNextStage(IR)) + return Val; + + return llvm::ErrorSuccess(); +} + +Error FetchStage::cycleEnd() { + Buffer.cycleEnd(); + return llvm::ErrorSuccess(); +} + +} // namespace mca +} // namespace llvm diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/add-sequence.s b/llvm/test/tools/llvm-mca/X86/BdVer2/add-sequence.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/add-sequence.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/add-sequence.s @@ -7,7 +7,7 @@ # CHECK: Iterations: 1000 # CHECK-NEXT: Instructions: 3000 -# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Total Cycles: 3004 # CHECK-NEXT: Total uOps: 3000 # CHECK: Dispatch Width: 4 @@ -64,39 +64,39 @@ # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - addl %eax, %edx # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 01 +# CHECK-NEXT: 0123456789 012 # CHECK-NEXT: Index 0123456789 0123456789 -# CHECK: [0,0] DeER . . . . . .. addl %eax, %ecx -# CHECK-NEXT: [0,1] DeER . . . . . .. addl %esi, %eax -# CHECK-NEXT: [0,2] D==eER . . . . .. addl %eax, %edx -# CHECK-NEXT: [1,0] D====eER . . . . .. addl %eax, %ecx -# CHECK-NEXT: [1,1] .D=eE--R . . . . .. addl %esi, %eax -# CHECK-NEXT: [1,2] .D===eER . . . . .. addl %eax, %edx -# CHECK-NEXT: [2,0] .D=====eER. . . . .. addl %eax, %ecx -# CHECK-NEXT: [2,1] .D=====eER. . . . .. addl %esi, %eax -# CHECK-NEXT: [2,2] . D======eER . . . .. addl %eax, %edx -# CHECK-NEXT: [3,0] . D========eER . . . .. addl %eax, %ecx -# CHECK-NEXT: [3,1] . D======eE--R . . . .. addl %esi, %eax -# CHECK-NEXT: [3,2] . D========eER . . . .. addl %eax, %edx -# CHECK-NEXT: [4,0] . D=========eER . . .. addl %eax, %ecx -# CHECK-NEXT: [4,1] . D=========eER . . .. addl %esi, %eax -# CHECK-NEXT: [4,2] . D===========eER . . .. addl %eax, %edx -# CHECK-NEXT: [5,0] . D=============eER. . .. addl %eax, %ecx -# CHECK-NEXT: [5,1] . D==========eE--R. . .. addl %esi, %eax -# CHECK-NEXT: [5,2] . D============eER. . .. addl %eax, %edx -# CHECK-NEXT: [6,0] . D==============eER . .. addl %eax, %ecx -# CHECK-NEXT: [6,1] . D==============eER . .. addl %esi, %eax -# CHECK-NEXT: [6,2] . D===============eER . .. addl %eax, %edx -# CHECK-NEXT: [7,0] . D=================eER .. addl %eax, %ecx -# CHECK-NEXT: [7,1] . D===============eE--R .. addl %esi, %eax -# CHECK-NEXT: [7,2] . D=================eER .. addl %eax, %edx -# CHECK-NEXT: [8,0] . .D==================eER .. addl %eax, %ecx -# CHECK-NEXT: [8,1] . .D==================eER .. addl %esi, %eax -# CHECK-NEXT: [8,2] . .D====================eER.. addl %eax, %edx -# CHECK-NEXT: [9,0] . .D======================eER addl %eax, %ecx -# CHECK-NEXT: [9,1] . . D===================eE--R addl %esi, %eax -# CHECK-NEXT: [9,2] . . D=====================eER addl %eax, %edx +# CHECK: [0,0] .DeER. . . . . . . addl %eax, %ecx +# CHECK-NEXT: [0,1] .DeER. . . . . . . addl %esi, %eax +# CHECK-NEXT: [0,2] .D==eER . . . . . . addl %eax, %edx +# CHECK-NEXT: [1,0] .D====eER . . . . . . addl %eax, %ecx +# CHECK-NEXT: [1,1] . D=eE--R . . . . . . addl %esi, %eax +# CHECK-NEXT: [1,2] . D===eER . . . . . . addl %eax, %edx +# CHECK-NEXT: [2,0] . D=====eER . . . . . addl %eax, %ecx +# CHECK-NEXT: [2,1] . D=====eER . . . . . addl %esi, %eax +# CHECK-NEXT: [2,2] . D======eER . . . . . addl %eax, %edx +# CHECK-NEXT: [3,0] . D========eER. . . . . addl %eax, %ecx +# CHECK-NEXT: [3,1] . D======eE--R. . . . . addl %esi, %eax +# CHECK-NEXT: [3,2] . D========eER. . . . . addl %eax, %edx +# CHECK-NEXT: [4,0] . D=========eER . . . . addl %eax, %ecx +# CHECK-NEXT: [4,1] . D=========eER . . . . addl %esi, %eax +# CHECK-NEXT: [4,2] . D===========eER . . . . addl %eax, %edx +# CHECK-NEXT: [5,0] . D=============eER . . . addl %eax, %ecx +# CHECK-NEXT: [5,1] . D==========eE--R . . . addl %esi, %eax +# CHECK-NEXT: [5,2] . D============eER . . . addl %eax, %edx +# CHECK-NEXT: [6,0] . D==============eER . . . addl %eax, %ecx +# CHECK-NEXT: [6,1] . D==============eER . . . addl %esi, %eax +# CHECK-NEXT: [6,2] . .D===============eER. . . addl %eax, %edx +# CHECK-NEXT: [7,0] . .D=================eER . . addl %eax, %ecx +# CHECK-NEXT: [7,1] . .D===============eE--R . . addl %esi, %eax +# CHECK-NEXT: [7,2] . .D=================eER . . addl %eax, %edx +# CHECK-NEXT: [8,0] . . D==================eER . . addl %eax, %ecx +# CHECK-NEXT: [8,1] . . D==================eER . . addl %esi, %eax +# CHECK-NEXT: [8,2] . . D====================eER . addl %eax, %edx +# CHECK-NEXT: [9,0] . . D======================eER addl %eax, %ecx +# CHECK-NEXT: [9,1] . . D===================eE--R addl %esi, %eax +# CHECK-NEXT: [9,2] . . D=====================eER addl %eax, %edx # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-1.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-1.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-1.s @@ -15,7 +15,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 900 +# CHECK-NEXT: Total Cycles: 901 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 4 @@ -38,17 +38,17 @@ # CHECK-NEXT: 6 3 3.00 bsfq %rax, %rcx # CHECK: Timeline view: -# CHECK-NEXT: 01234567 +# CHECK-NEXT: 012345678 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . . . imulq $5, %rcx, %rax -# CHECK-NEXT: [0,1] DeeE----R . . . lzcntl %ecx, %eax -# CHECK-NEXT: [0,2] D==eE---R . . . andq %rcx, %rax -# CHECK-NEXT: [0,3] .D==eeeER . . . bsfq %rax, %rcx -# CHECK-NEXT: [1,0] . D====eeeeeeER. . imulq $5, %rcx, %rax -# CHECK-NEXT: [1,1] . D======eeE-R. . lzcntl %ecx, %eax -# CHECK-NEXT: [1,2] . D========eER. . andq %rcx, %rax -# CHECK-NEXT: [1,3] . D========eeeER bsfq %rax, %rcx +# CHECK: [0,0] .DeeeeeeER. . . imulq $5, %rcx, %rax +# CHECK-NEXT: [0,1] .DeeE----R. . . lzcntl %ecx, %eax +# CHECK-NEXT: [0,2] .D==eE---R. . . andq %rcx, %rax +# CHECK-NEXT: [0,3] . DeeeER. . . bsfq %rax, %rcx +# CHECK-NEXT: [1,0] . D==eeeeeeER . imulq $5, %rcx, %rax +# CHECK-NEXT: [1,1] . .D====eeE-R . lzcntl %ecx, %eax +# CHECK-NEXT: [1,2] . .D======eER . andq %rcx, %rax +# CHECK-NEXT: [1,3] . . D====eeeER bsfq %rax, %rcx # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -57,8 +57,8 @@ # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 2 3.0 0.5 0.0 imulq $5, %rcx, %rax -# CHECK-NEXT: 1. 2 4.0 2.0 2.5 lzcntl %ecx, %eax -# CHECK-NEXT: 2. 2 6.0 0.0 1.5 andq %rcx, %rax -# CHECK-NEXT: 3. 2 6.0 0.0 0.0 bsfq %rax, %rcx -# CHECK-NEXT: 2 4.8 0.6 1.0 +# CHECK-NEXT: 0. 2 2.0 0.5 0.0 imulq $5, %rcx, %rax +# CHECK-NEXT: 1. 2 3.0 2.0 2.5 lzcntl %ecx, %eax +# CHECK-NEXT: 2. 2 5.0 0.0 1.5 andq %rcx, %rax +# CHECK-NEXT: 3. 2 3.0 0.0 0.0 bsfq %rax, %rcx +# CHECK-NEXT: 2 3.3 0.6 1.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-2.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-2.s @@ -33,7 +33,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1800 -# CHECK-NEXT: Total Cycles: 3203 +# CHECK-NEXT: Total Cycles: 3204 # CHECK-NEXT: Total uOps: 3400 # CHECK: Dispatch Width: 4 @@ -71,44 +71,44 @@ # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 01234567 -# CHECK: [0,0] DeeeeeeeeeER . . . . . . . . . . .. vdivps %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: [0,1] DeeeeeE----R . . . . . . . . . . .. vaddps %xmm0, %xmm1, %xmm3 -# CHECK-NEXT: [0,2] .D====eeeeeER . . . . . . . . . . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,3] .D======eeeeeER. . . . . . . . . . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,4] . D=======eeeeeER . . . . . . . . . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,5] . D=========eeeeeER . . . . . . . . . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,6] . D==========eeeeeER . . . . . . . . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,7] . D============eeeeeER . . . . . . . . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,8] . D=============eeeeeER. . . . . . . . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,9] . D==============eeeeeER . . . . . . . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,10] . D==============eeeeeER . . . . . . . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,11] . D===============eeeeeER . . . . . . . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,12] . .D===============eeeeeER . . . . . . . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,13] . .D================eeeeeER. . . . . . . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,14] . . D================eeeeeER . . . . . . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,15] . . D=================eeeeeER . . . . . . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,16] . . D=================eeeeeER . . . . . . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [0,17] . . D======================eeER. . . . . . .. vandps %xmm4, %xmm1, %xmm0 -# CHECK-NEXT: [1,0] . . D=======================eeeeeeeeeER . . . . .. vdivps %ymm0, %ymm1, %ymm3 -# CHECK-NEXT: [1,1] . . D=======================eeeeeE----R . . . . .. vaddps %xmm0, %xmm1, %xmm3 -# CHECK-NEXT: [1,2] . . D===========================eeeeeER. . . . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,3] . . D=============================eeeeeER . . . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,4] . . .D==============================eeeeeER . . . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,5] . . .D================================eeeeeER . . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,6] . . . D=================================eeeeeER . . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,7] . . . D===================================eeeeeER. . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,8] . . . D====================================eeeeeER . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,9] . . . D=====================================eeeeeER . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,10] . . . D=====================================eeeeeER . .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,11] . . . D======================================eeeeeER. .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,12] . . . D======================================eeeeeER .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,13] . . . D=======================================eeeeeER .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,14] . . . .D=======================================eeeeeER .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,15] . . . .D========================================eeeeeER .. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,16] . . . . D========================================eeeeeER.. vaddps %ymm3, %ymm1, %ymm4 -# CHECK-NEXT: [1,17] . . . . D=============================================eeER vandps %xmm4, %xmm1, %xmm0 +# CHECK: [0,0] .DeeeeeeeeeER . . . . . . . . . . . . vdivps %ymm0, %ymm1, %ymm3 +# CHECK-NEXT: [0,1] .DeeeeeE----R . . . . . . . . . . . . vaddps %xmm0, %xmm1, %xmm3 +# CHECK-NEXT: [0,2] . D====eeeeeER . . . . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,3] . D======eeeeeER . . . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,4] . D=======eeeeeER . . . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,5] . D=========eeeeeER. . . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,6] . D==========eeeeeER . . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,7] . D============eeeeeER . . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,8] . D=============eeeeeER . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,9] . D==============eeeeeER . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,10] . .D==============eeeeeER . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,11] . .D===============eeeeeER . . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,12] . . D===============eeeeeER. . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,13] . . D================eeeeeER . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,14] . . D================eeeeeER . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,15] . . D=================eeeeeER . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,16] . . D=================eeeeeER . . . . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [0,17] . . D======================eeER . . . . . . . vandps %xmm4, %xmm1, %xmm0 +# CHECK-NEXT: [1,0] . . D=======================eeeeeeeeeER. . . . . . vdivps %ymm0, %ymm1, %ymm3 +# CHECK-NEXT: [1,1] . . D=======================eeeeeE----R. . . . . . vaddps %xmm0, %xmm1, %xmm3 +# CHECK-NEXT: [1,2] . . .D===========================eeeeeER . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,3] . . .D=============================eeeeeER . . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,4] . . . D==============================eeeeeER. . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,5] . . . D================================eeeeeER . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,6] . . . D=================================eeeeeER . . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,7] . . . D===================================eeeeeER . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,8] . . . D====================================eeeeeER . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,9] . . . D=====================================eeeeeER . . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,10] . . . D=====================================eeeeeER. . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,11] . . . D======================================eeeeeER . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,12] . . . .D======================================eeeeeER . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,13] . . . .D=======================================eeeeeER . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,14] . . . . D=======================================eeeeeER . . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,15] . . . . D========================================eeeeeER. . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,16] . . . . D========================================eeeeeER . vaddps %ymm3, %ymm1, %ymm4 +# CHECK-NEXT: [1,17] . . . . D=============================================eeER vandps %xmm4, %xmm1, %xmm0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-3.s b/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-3.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-3.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/clear-super-register-3.s @@ -21,7 +21,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 300 -# CHECK-NEXT: Total Cycles: 655 +# CHECK-NEXT: Total Cycles: 656 # CHECK-NEXT: Total uOps: 300 # CHECK: Dispatch Width: 4 @@ -44,14 +44,14 @@ # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012 +# CHECK-NEXT: Index 0123456789 0123 -# CHECK: [0,0] D=eeeeeeeeeER . . . sqrtss %xmm0, %xmm0 -# CHECK-NEXT: [0,1] DeeeeeE-----R . . . movss (%eax), %xmm0 -# CHECK-NEXT: [0,2] D======eeeeeER . . . addps %xmm0, %xmm0 -# CHECK-NEXT: [1,0] D===========eeeeeeeeeER sqrtss %xmm0, %xmm0 -# CHECK-NEXT: [1,1] .D==eeeeeE------------R movss (%eax), %xmm0 -# CHECK-NEXT: [1,2] .D=========eeeeeE-----R addps %xmm0, %xmm0 +# CHECK: [0,0] .D=eeeeeeeeeER . . . sqrtss %xmm0, %xmm0 +# CHECK-NEXT: [0,1] .DeeeeeE-----R . . . movss (%eax), %xmm0 +# CHECK-NEXT: [0,2] .D======eeeeeER. . . addps %xmm0, %xmm0 +# CHECK-NEXT: [1,0] .D===========eeeeeeeeeER sqrtss %xmm0, %xmm0 +# CHECK-NEXT: [1,1] . D==eeeeeE------------R movss (%eax), %xmm0 +# CHECK-NEXT: [1,2] . D=========eeeeeE-----R addps %xmm0, %xmm0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -69,7 +69,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 300 -# CHECK-NEXT: Total Cycles: 655 +# CHECK-NEXT: Total Cycles: 656 # CHECK-NEXT: Total uOps: 300 # CHECK: Dispatch Width: 4 @@ -92,14 +92,14 @@ # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012 +# CHECK-NEXT: Index 0123456789 0123 -# CHECK: [0,0] D=eeeeeeeeeER . . . sqrtsd %xmm0, %xmm0 -# CHECK-NEXT: [0,1] DeeeeeE-----R . . . movsd (%eax), %xmm0 -# CHECK-NEXT: [0,2] D======eeeeeER . . . addps %xmm0, %xmm0 -# CHECK-NEXT: [1,0] D===========eeeeeeeeeER sqrtsd %xmm0, %xmm0 -# CHECK-NEXT: [1,1] .D==eeeeeE------------R movsd (%eax), %xmm0 -# CHECK-NEXT: [1,2] .D=========eeeeeE-----R addps %xmm0, %xmm0 +# CHECK: [0,0] .D=eeeeeeeeeER . . . sqrtsd %xmm0, %xmm0 +# CHECK-NEXT: [0,1] .DeeeeeE-----R . . . movsd (%eax), %xmm0 +# CHECK-NEXT: [0,2] .D======eeeeeER. . . addps %xmm0, %xmm0 +# CHECK-NEXT: [1,0] .D===========eeeeeeeeeER sqrtsd %xmm0, %xmm0 +# CHECK-NEXT: [1,1] . D==eeeeeE------------R movsd (%eax), %xmm0 +# CHECK-NEXT: [1,2] . D=========eeeeeE-----R addps %xmm0, %xmm0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-cmp.s b/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-cmp.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-cmp.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-cmp.s @@ -11,7 +11,7 @@ # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 3000 -# CHECK-NEXT: Total Cycles: 2253 +# CHECK-NEXT: Total Cycles: 2254 # CHECK-NEXT: Total uOps: 3000 # CHECK: Dispatch Width: 4 @@ -66,14 +66,14 @@ # CHECK-NEXT: - - - - - 0.50 0.50 - - - - - - - - - - - - - - - - cmovael %ebx, %eax # CHECK: Timeline view: -# CHECK-NEXT: Index 01234567 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . cmpl %eax, %eax -# CHECK-NEXT: [0,1] D==eER . cmovael %ebx, %eax -# CHECK-NEXT: [1,0] DeE--R . cmpl %eax, %eax -# CHECK-NEXT: [1,1] D===eER. cmovael %ebx, %eax -# CHECK-NEXT: [2,0] .D=eE-R. cmpl %eax, %eax -# CHECK-NEXT: [2,1] .D===eER cmovael %ebx, %eax +# CHECK: [0,0] .DeER. . cmpl %eax, %eax +# CHECK-NEXT: [0,1] .D==eER . cmovael %ebx, %eax +# CHECK-NEXT: [1,0] .DeE--R . cmpl %eax, %eax +# CHECK-NEXT: [1,1] .D===eER. cmovael %ebx, %eax +# CHECK-NEXT: [2,0] . D=eE-R. cmpl %eax, %eax +# CHECK-NEXT: [2,1] . D===eER cmovael %ebx, %eax # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpeq.s b/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpeq.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpeq.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpeq.s @@ -14,7 +14,7 @@ # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 6000 -# CHECK-NEXT: Total Cycles: 6003 +# CHECK-NEXT: Total Cycles: 6004 # CHECK-NEXT: Total uOps: 6000 # CHECK: Dispatch Width: 4 @@ -73,21 +73,21 @@ # CHECK-NEXT: - - - - - - - - - - 2.00 - - - 1.00 - - - - - - - - vpcmpeqq %xmm3, %xmm3, %xmm0 # CHECK: Timeline view: -# CHECK-NEXT: 01234 +# CHECK-NEXT: 012345 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . . vpcmpeqb %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [0,1] D==eeER . . vpcmpeqw %xmm1, %xmm1, %xmm2 -# CHECK-NEXT: [0,2] DeeE--R . . vpcmpeqd %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [0,3] D==eeER . . vpcmpeqq %xmm3, %xmm3, %xmm0 -# CHECK-NEXT: [1,0] .D===eeER . . vpcmpeqb %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [1,1] .D=====eeER . vpcmpeqw %xmm1, %xmm1, %xmm2 -# CHECK-NEXT: [1,2] .D===eeE--R . vpcmpeqd %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [1,3] .D=====eeER . vpcmpeqq %xmm3, %xmm3, %xmm0 -# CHECK-NEXT: [2,0] . D======eeER . vpcmpeqb %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [2,1] . D========eeER vpcmpeqw %xmm1, %xmm1, %xmm2 -# CHECK-NEXT: [2,2] . D======eeE--R vpcmpeqd %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [2,3] . D========eeER vpcmpeqq %xmm3, %xmm3, %xmm0 +# CHECK: [0,0] .DeeER . . vpcmpeqb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [0,1] .D==eeER . . vpcmpeqw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [0,2] .DeeE--R . . vpcmpeqd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [0,3] .D==eeER . . vpcmpeqq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: [1,0] . D===eeER. . vpcmpeqb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [1,1] . D=====eeER . vpcmpeqw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [1,2] . D===eeE--R . vpcmpeqd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [1,3] . D=====eeER . vpcmpeqq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: [2,0] . D======eeER . vpcmpeqb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [2,1] . D========eeER vpcmpeqw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [2,2] . D======eeE--R vpcmpeqd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [2,3] . D========eeER vpcmpeqq %xmm3, %xmm3, %xmm0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpgt.s b/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpgt.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpgt.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-pcmpgt.s @@ -15,7 +15,7 @@ # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 6000 -# CHECK-NEXT: Total Cycles: 1504 +# CHECK-NEXT: Total Cycles: 1505 # CHECK-NEXT: Total uOps: 6000 # CHECK: Dispatch Width: 4 @@ -74,20 +74,20 @@ # CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - - - - - vpcmpgtq %xmm3, %xmm3, %xmm0 # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456 +# CHECK-NEXT: Index 01234567 -# CHECK: [0,0] DR .. vpcmpgtb %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [0,1] DR .. vpcmpgtw %xmm1, %xmm1, %xmm2 -# CHECK-NEXT: [0,2] DR .. vpcmpgtd %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [0,3] DeeER.. vpcmpgtq %xmm3, %xmm3, %xmm0 -# CHECK-NEXT: [1,0] .D--R.. vpcmpgtb %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [1,1] .D--R.. vpcmpgtw %xmm1, %xmm1, %xmm2 -# CHECK-NEXT: [1,2] .D--R.. vpcmpgtd %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [1,3] .DeeER. vpcmpgtq %xmm3, %xmm3, %xmm0 -# CHECK-NEXT: [2,0] . D--R. vpcmpgtb %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [2,1] . D--R. vpcmpgtw %xmm1, %xmm1, %xmm2 -# CHECK-NEXT: [2,2] . D--R. vpcmpgtd %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [2,3] . DeeER vpcmpgtq %xmm3, %xmm3, %xmm0 +# CHECK: [0,0] .DR . . vpcmpgtb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [0,1] .DR . . vpcmpgtw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [0,2] .DR . . vpcmpgtd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [0,3] .DeeER . vpcmpgtq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: [1,0] . D--R . vpcmpgtb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [1,1] . D--R . vpcmpgtw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [1,2] . D--R . vpcmpgtd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [1,3] . DeeER. vpcmpgtq %xmm3, %xmm3, %xmm0 +# CHECK-NEXT: [2,0] . D--R. vpcmpgtb %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [2,1] . D--R. vpcmpgtw %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [2,2] . D--R. vpcmpgtd %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [2,3] . DeeER vpcmpgtq %xmm3, %xmm3, %xmm0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-1.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-1.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-1.s @@ -12,7 +12,7 @@ # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 3000 -# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Total Cycles: 3004 # CHECK-NEXT: Total uOps: 3000 # CHECK: Dispatch Width: 4 @@ -67,14 +67,14 @@ # CHECK-NEXT: - - - - - 2.00 - - - - - - - - - - - - - - - - - sbbl %eax, %eax # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeER . . sbbl %edx, %edx -# CHECK-NEXT: [0,1] D=eER. . sbbl %eax, %eax -# CHECK-NEXT: [1,0] D==eER . sbbl %edx, %edx -# CHECK-NEXT: [1,1] D===eER . sbbl %eax, %eax -# CHECK-NEXT: [2,0] .D===eER. sbbl %edx, %edx -# CHECK-NEXT: [2,1] .D====eER sbbl %eax, %eax +# CHECK: [0,0] .DeER. . sbbl %edx, %edx +# CHECK-NEXT: [0,1] .D=eER . sbbl %eax, %eax +# CHECK-NEXT: [1,0] .D==eER . sbbl %edx, %edx +# CHECK-NEXT: [1,1] .D===eER . sbbl %eax, %eax +# CHECK-NEXT: [2,0] . D===eER. sbbl %edx, %edx +# CHECK-NEXT: [2,1] . D====eER sbbl %eax, %eax # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-2.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/dependency-breaking-sbb-2.s @@ -13,7 +13,7 @@ # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 4500 -# CHECK-NEXT: Total Cycles: 4014 +# CHECK-NEXT: Total Cycles: 4015 # CHECK-NEXT: Total uOps: 4500 # CHECK: Dispatch Width: 4 @@ -70,18 +70,18 @@ # CHECK-NEXT: - - - - - 1.33 0.67 - - - - - - - - - - - - - - - - sbbl %eax, %eax # CHECK: Timeline view: -# CHECK-NEXT: 01234 +# CHECK-NEXT: 012345 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] D==eeeeER . . imull %edx, %eax -# CHECK-NEXT: [0,1] DeE-----R . . addl %edx, %edx -# CHECK-NEXT: [0,2] D===eE--R . . sbbl %eax, %eax -# CHECK-NEXT: [1,0] D=====eeeeER . imull %edx, %eax -# CHECK-NEXT: [1,1] .DeE-------R . addl %edx, %edx -# CHECK-NEXT: [1,2] .D====eE---R . sbbl %eax, %eax -# CHECK-NEXT: [2,0] .D=======eeeeER imull %edx, %eax -# CHECK-NEXT: [2,1] .D==eE--------R addl %edx, %edx -# CHECK-NEXT: [2,2] . D=====eE----R sbbl %eax, %eax +# CHECK: [0,0] .D==eeeeER. . imull %edx, %eax +# CHECK-NEXT: [0,1] .DeE-----R. . addl %edx, %edx +# CHECK-NEXT: [0,2] .D===eE--R. . sbbl %eax, %eax +# CHECK-NEXT: [1,0] .D=====eeeeER . imull %edx, %eax +# CHECK-NEXT: [1,1] . DeE-------R . addl %edx, %edx +# CHECK-NEXT: [1,2] . D====eE---R . sbbl %eax, %eax +# CHECK-NEXT: [2,0] . D=======eeeeER imull %edx, %eax +# CHECK-NEXT: [2,1] . D==eE--------R addl %edx, %edx +# CHECK-NEXT: [2,2] . D=====eE----R sbbl %eax, %eax # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/dependent-pmuld-paddd.s b/llvm/test/tools/llvm-mca/X86/BdVer2/dependent-pmuld-paddd.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/dependent-pmuld-paddd.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/dependent-pmuld-paddd.s @@ -7,7 +7,7 @@ # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 1500 -# CHECK-NEXT: Total Cycles: 3005 +# CHECK-NEXT: Total Cycles: 3006 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 4 @@ -65,38 +65,38 @@ # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123456789 01234 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 012345 -# CHECK: [0,0] DeeeeER . . . . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [0,1] D====eeER . . . . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: [0,2] D======eeER . . . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 -# CHECK-NEXT: [1,0] D======eeeeER . . . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [1,1] .D=========eeER. . . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: [1,2] .D===========eeER . . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 -# CHECK-NEXT: [2,0] .D===========eeeeER . . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [2,1] .D===============eeER . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: [2,2] . D================eeER . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 -# CHECK-NEXT: [3,0] . D================eeeeER. . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [3,1] . D====================eeER . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: [3,2] . D======================eeER . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 -# CHECK-NEXT: [4,0] . D=====================eeeeER . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [4,1] . D=========================eeER . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: [4,2] . D===========================eeER. . . . . . . vpaddd %xmm0, %xmm0, %xmm3 -# CHECK-NEXT: [5,0] . D===========================eeeeER . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [5,1] . D==============================eeER . . . . . . vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: [5,2] . D================================eeER . . . . . vpaddd %xmm0, %xmm0, %xmm3 -# CHECK-NEXT: [6,0] . D================================eeeeER . . . . . vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [6,1] . D====================================eeER. . . . . vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: [6,2] . D=====================================eeER . . . . vpaddd %xmm0, %xmm0, %xmm3 -# CHECK-NEXT: [7,0] . D=====================================eeeeER . . . . vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [7,1] . D=========================================eeER . . . vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: [7,2] . D===========================================eeER . . . vpaddd %xmm0, %xmm0, %xmm3 -# CHECK-NEXT: [8,0] . .D==========================================eeeeER. . . vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [8,1] . .D==============================================eeER . . vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: [8,2] . .D================================================eeER . . vpaddd %xmm0, %xmm0, %xmm3 -# CHECK-NEXT: [9,0] . .D================================================eeeeER . vpmuldq %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [9,1] . . D===================================================eeER . vpaddd %xmm1, %xmm1, %xmm0 -# CHECK-NEXT: [9,2] . . D=====================================================eeER vpaddd %xmm0, %xmm0, %xmm3 +# CHECK: [0,0] .DeeeeER . . . . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [0,1] .D====eeER. . . . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: [0,2] .D======eeER . . . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: [1,0] .D======eeeeER . . . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [1,1] . D=========eeER . . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: [1,2] . D===========eeER . . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: [2,0] . D===========eeeeER. . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [2,1] . D===============eeER . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: [2,2] . D================eeER . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: [3,0] . D================eeeeER . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [3,1] . D====================eeER . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: [3,2] . D======================eeER. . . . . . . . vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: [4,0] . D=====================eeeeER . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [4,1] . D=========================eeER . . . . . . . vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: [4,2] . D===========================eeER . . . . . . vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: [5,0] . D===========================eeeeER . . . . . . vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [5,1] . D==============================eeER. . . . . . vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: [5,2] . D================================eeER . . . . . vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: [6,0] . D================================eeeeER . . . . . vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [6,1] . D====================================eeER . . . . vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: [6,2] . .D=====================================eeER . . . . vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: [7,0] . .D=====================================eeeeER. . . . vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [7,1] . .D=========================================eeER . . . vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: [7,2] . .D===========================================eeER . . . vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: [8,0] . . D==========================================eeeeER . . vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [8,1] . . D==============================================eeER . . vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: [8,2] . . D================================================eeER. . vpaddd %xmm0, %xmm0, %xmm3 +# CHECK-NEXT: [9,0] . . D================================================eeeeER . vpmuldq %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [9,1] . . D===================================================eeER . vpaddd %xmm1, %xmm1, %xmm0 +# CHECK-NEXT: [9,2] . . D=====================================================eeER vpaddd %xmm0, %xmm0, %xmm3 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/dot-product.s b/llvm/test/tools/llvm-mca/X86/BdVer2/dot-product.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/dot-product.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/dot-product.s @@ -7,11 +7,11 @@ # CHECK: Iterations: 300 # CHECK-NEXT: Instructions: 900 -# CHECK-NEXT: Total Cycles: 1709 +# CHECK-NEXT: Total Cycles: 1683 # CHECK-NEXT: Total uOps: 2100 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.23 +# CHECK-NEXT: uOps Per Cycle: 1.25 # CHECK-NEXT: IPC: 0.53 # CHECK-NEXT: Block RThroughput: 5.5 @@ -55,27 +55,27 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] -# CHECK-NEXT: - - - - - - - - 5.50 5.50 - - - - 2.00 1.00 - - - - - - - +# CHECK-NEXT: - - - - - - - - 5.52 5.48 - - - - 2.00 1.00 - - - - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: -# CHECK-NEXT: - - - - - - - - 0.49 0.51 - - - - - 1.00 - - - - - - - vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - 2.53 2.47 - - - - 1.00 - - - - - - - - vhaddps %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: - - - - - - - - 2.48 2.52 - - - - 1.00 - - - - - - - - vhaddps %xmm3, %xmm3, %xmm4 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - - 1.00 - - - - - - - vmulps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 2.13 2.87 - - - - 1.00 - - - - - - - - vhaddps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: - - - - - - - - 2.88 2.12 - - - - 1.00 - - - - - - - - vhaddps %xmm3, %xmm3, %xmm4 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 01234 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 0 -# CHECK: [0,0] DeeeeeER . . . . . . vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [0,1] D=====eeeeeeeeeeeER . . . . vhaddps %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [0,2] .D===============eeeeeeeeeeeER. . vhaddps %xmm3, %xmm3, %xmm4 -# CHECK-NEXT: [1,0] .DeeeeeE---------------------R. . vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [1,1] . D====eeeeeeeeeeeE----------R. . vhaddps %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [1,2] . D==============eeeeeeeeeeeER . vhaddps %xmm3, %xmm3, %xmm4 -# CHECK-NEXT: [2,0] . DeeeeeE--------------------R . vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [2,1] . D======eeeeeeeeeeeE-------R . vhaddps %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: [2,2] . D================eeeeeeeeeeeER vhaddps %xmm3, %xmm3, %xmm4 +# CHECK: [0,0] .DeeeeeER . . . . . . . vmulps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [0,1] . D===eeeeeeeeeeeER. . . . . vhaddps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [0,2] . D============eeeeeeeeeeeER . . vhaddps %xmm3, %xmm3, %xmm4 +# CHECK-NEXT: [1,0] . .DeeeeeE-----------------R . . vmulps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [1,1] . . D===eeeeeeeeeeeE------R . . vhaddps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [1,2] . . D============eeeeeeeeeeeER . vhaddps %xmm3, %xmm3, %xmm4 +# CHECK-NEXT: [2,0] . . .DeeeeeE-----------------R . vmulps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [2,1] . . . D===eeeeeeeeeeeE------R . vhaddps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [2,2] . . . D============eeeeeeeeeeeER vhaddps %xmm3, %xmm3, %xmm4 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -84,7 +84,7 @@ # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 3 1.0 1.0 13.7 vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1. 3 6.0 0.7 5.7 vhaddps %xmm2, %xmm2, %xmm3 -# CHECK-NEXT: 2. 3 16.0 0.0 0.0 vhaddps %xmm3, %xmm3, %xmm4 -# CHECK-NEXT: 3 7.7 0.6 6.4 +# CHECK-NEXT: 0. 3 1.0 1.0 11.3 vmulps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1. 3 4.0 0.0 4.0 vhaddps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: 2. 3 13.0 0.0 0.0 vhaddps %xmm3, %xmm3, %xmm4 +# CHECK-NEXT: 3 6.0 0.3 5.1 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-1.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-1.s @@ -6,12 +6,12 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 20 +# CHECK-NEXT: Total Cycles: 22 # CHECK-NEXT: Total uOps: 5 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.25 -# CHECK-NEXT: IPC: 0.10 +# CHECK-NEXT: uOps Per Cycle: 0.23 +# CHECK-NEXT: IPC: 0.09 # CHECK-NEXT: Block RThroughput: 3.5 # CHECK: Instruction Info: @@ -28,10 +28,10 @@ # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 0123456789 01 -# CHECK: [0,0] DeeER. . . . vshufps $0, %xmm0, %xmm1, %xmm1 -# CHECK-NEXT: [0,1] .DeeeeeeeeeeeeeeeeER vhaddps (%rdi), %xmm1, %xmm2 +# CHECK: [0,0] .DeeER . . .. vshufps $0, %xmm0, %xmm1, %xmm1 +# CHECK-NEXT: [0,1] . DeeeeeeeeeeeeeeeeER vhaddps (%rdi), %xmm1, %xmm2 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-2.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/hadd-read-after-ld-2.s @@ -6,12 +6,12 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 20 +# CHECK-NEXT: Total Cycles: 25 # CHECK-NEXT: Total uOps: 11 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.55 -# CHECK-NEXT: IPC: 0.10 +# CHECK-NEXT: uOps Per Cycle: 0.44 +# CHECK-NEXT: IPC: 0.08 # CHECK-NEXT: Block RThroughput: 5.0 # CHECK: Instruction Info: @@ -28,10 +28,10 @@ # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 +# CHECK-NEXT: Index 0123456789 01234 -# CHECK: [0,0] DeeER. . . . vshufps $0, %xmm0, %xmm1, %xmm1 -# CHECK-NEXT: [0,1] .DeeeeeeeeeeeeeeeeER vhaddps (%rdi), %ymm1, %ymm2 +# CHECK: [0,0] .DeeER . . . . vshufps $0, %xmm0, %xmm1, %xmm1 +# CHECK-NEXT: [0,1] . .DeeeeeeeeeeeeeeeeER vhaddps (%rdi), %ymm1, %ymm2 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-1.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-1.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-1.s @@ -25,7 +25,7 @@ # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2003 +# CHECK-NEXT: Total Cycles: 2004 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 4 @@ -83,7 +83,7 @@ # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2003 +# CHECK-NEXT: Total Cycles: 2004 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 4 @@ -141,7 +141,7 @@ # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2003 +# CHECK-NEXT: Total Cycles: 2004 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 4 @@ -199,7 +199,7 @@ # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 1000 -# CHECK-NEXT: Total Cycles: 2003 +# CHECK-NEXT: Total Cycles: 2004 # CHECK-NEXT: Total uOps: 2000 # CHECK: Dispatch Width: 4 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-2.s @@ -29,7 +29,7 @@ # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 2003 +# CHECK-NEXT: Total Cycles: 2004 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 4 @@ -85,7 +85,7 @@ # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 2003 +# CHECK-NEXT: Total Cycles: 2004 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 4 @@ -141,7 +141,7 @@ # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 2003 +# CHECK-NEXT: Total Cycles: 2004 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 4 @@ -197,7 +197,7 @@ # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 2003 +# CHECK-NEXT: Total Cycles: 2004 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 4 @@ -253,12 +253,12 @@ # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 512 +# CHECK-NEXT: Total Cycles: 513 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 4 # CHECK-NEXT: uOps Per Cycle: 1.95 -# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: IPC: 0.97 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: @@ -309,12 +309,12 @@ # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 500 -# CHECK-NEXT: Total Cycles: 512 +# CHECK-NEXT: Total Cycles: 513 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 4 # CHECK-NEXT: uOps Per Cycle: 1.95 -# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: IPC: 0.97 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/int-to-fpu-forwarding-3.s @@ -7,7 +7,7 @@ # CHECK: Iterations: 500 # CHECK-NEXT: Instructions: 1500 -# CHECK-NEXT: Total Cycles: 2014 +# CHECK-NEXT: Total Cycles: 2015 # CHECK-NEXT: Total uOps: 2500 # CHECK: Dispatch Width: 4 @@ -65,17 +65,17 @@ # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 012345 +# CHECK-NEXT: Index 0123456789 0123456 -# CHECK: [0,0] DeER . . . . . addl %eax, %eax -# CHECK-NEXT: [0,1] D===========eeER . . vpinsrb $0, %eax, %xmm0, %xmm0 -# CHECK-NEXT: [0,2] .D============eeER . . vpinsrb $1, %eax, %xmm0, %xmm0 -# CHECK-NEXT: [1,0] .DeE-------------R . . addl %eax, %eax -# CHECK-NEXT: [1,1] . D=============eeER. . vpinsrb $0, %eax, %xmm0, %xmm0 -# CHECK-NEXT: [1,2] . D===============eeER . vpinsrb $1, %eax, %xmm0, %xmm0 -# CHECK-NEXT: [2,0] . DeE---------------R . addl %eax, %eax -# CHECK-NEXT: [2,1] . D================eeER . vpinsrb $0, %eax, %xmm0, %xmm0 -# CHECK-NEXT: [2,2] . D=================eeER vpinsrb $1, %eax, %xmm0, %xmm0 +# CHECK: [0,0] .DeER. . . . .. addl %eax, %eax +# CHECK-NEXT: [0,1] .D===========eeER . .. vpinsrb $0, %eax, %xmm0, %xmm0 +# CHECK-NEXT: [0,2] . D============eeER . .. vpinsrb $1, %eax, %xmm0, %xmm0 +# CHECK-NEXT: [1,0] . DeE-------------R . .. addl %eax, %eax +# CHECK-NEXT: [1,1] . D=============eeER .. vpinsrb $0, %eax, %xmm0, %xmm0 +# CHECK-NEXT: [1,2] . D===============eeER .. vpinsrb $1, %eax, %xmm0, %xmm0 +# CHECK-NEXT: [2,0] . DeE---------------R .. addl %eax, %eax +# CHECK-NEXT: [2,1] . D================eeER.. vpinsrb $0, %eax, %xmm0, %xmm0 +# CHECK-NEXT: [2,2] . D=================eeER vpinsrb $1, %eax, %xmm0, %xmm0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-alias.s b/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-alias.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-alias.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-alias.s @@ -12,7 +12,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 800 -# CHECK-NEXT: Total Cycles: 2403 +# CHECK-NEXT: Total Cycles: 2404 # CHECK-NEXT: Total uOps: 800 # CHECK: Dispatch Width: 4 @@ -80,16 +80,16 @@ # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 +# CHECK-NEXT: Index 0123456789 01234567 -# CHECK: [0,0] DeeeeeER . . . .. vmovaps (%rsi), %xmm0 -# CHECK-NEXT: [0,1] D=====eER . . . .. vmovaps %xmm0, (%rdi) -# CHECK-NEXT: [0,2] D======eeeeeER . . .. vmovaps 16(%rsi), %xmm0 -# CHECK-NEXT: [0,3] D===========eER. . .. vmovaps %xmm0, 16(%rdi) -# CHECK-NEXT: [0,4] .D===========eeeeeER. .. vmovaps 32(%rsi), %xmm0 -# CHECK-NEXT: [0,5] .D================eER .. vmovaps %xmm0, 32(%rdi) -# CHECK-NEXT: [0,6] .D=================eeeeeER. vmovaps 48(%rsi), %xmm0 -# CHECK-NEXT: [0,7] .D======================eER vmovaps %xmm0, 48(%rdi) +# CHECK: [0,0] .DeeeeeER . . . . . vmovaps (%rsi), %xmm0 +# CHECK-NEXT: [0,1] .D=====eER. . . . . vmovaps %xmm0, (%rdi) +# CHECK-NEXT: [0,2] .D======eeeeeER. . . . vmovaps 16(%rsi), %xmm0 +# CHECK-NEXT: [0,3] .D===========eER . . . vmovaps %xmm0, 16(%rdi) +# CHECK-NEXT: [0,4] . D===========eeeeeER . . vmovaps 32(%rsi), %xmm0 +# CHECK-NEXT: [0,5] . D================eER . . vmovaps %xmm0, 32(%rdi) +# CHECK-NEXT: [0,6] . D=================eeeeeER. vmovaps 48(%rsi), %xmm0 +# CHECK-NEXT: [0,7] . D======================eER vmovaps %xmm0, 48(%rdi) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s b/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/load-store-throughput.s @@ -47,12 +47,12 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 306 +# CHECK-NEXT: Total Cycles: 307 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.31 -# CHECK-NEXT: IPC: 1.31 +# CHECK-NEXT: uOps Per Cycle: 1.30 +# CHECK-NEXT: IPC: 1.30 # CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Instruction Info: @@ -72,23 +72,23 @@ # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 257 (84.0%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 257 (83.7%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (11.1%) -# CHECK-NEXT: 1, 172 (56.2%) -# CHECK-NEXT: 2, 86 (28.1%) +# CHECK-NEXT: 0, 35 (11.4%) +# CHECK-NEXT: 1, 172 (56.0%) +# CHECK-NEXT: 2, 86 (28.0%) # CHECK-NEXT: 4, 14 (4.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 5 (1.6%) -# CHECK-NEXT: 1, 202 (66.0%) -# CHECK-NEXT: 2, 99 (32.4%) +# CHECK-NEXT: 0, 6 (2.0%) +# CHECK-NEXT: 1, 202 (65.8%) +# CHECK-NEXT: 2, 99 (32.2%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -139,12 +139,13 @@ # CHECK-NEXT: 0.04 0.96 - - - - - - - - - - - - - - - - - - - - 1.00 movb %dil, (%rbx) # CHECK: Timeline view: +# CHECK-NEXT: 0 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeER . . movb %spl, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movb (%rcx), %bpl -# CHECK-NEXT: [0,2] D=eeeeeER. movb (%rdx), %sil -# CHECK-NEXT: [0,3] D======eER movb %dil, (%rbx) +# CHECK: [0,0] .DeER. . movb %spl, (%rax) +# CHECK-NEXT: [0,1] .DeeeeeER . movb (%rcx), %bpl +# CHECK-NEXT: [0,2] .D=eeeeeER. movb (%rdx), %sil +# CHECK-NEXT: [0,3] .D======eER movb %dil, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -163,12 +164,12 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 306 +# CHECK-NEXT: Total Cycles: 307 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.31 -# CHECK-NEXT: IPC: 1.31 +# CHECK-NEXT: uOps Per Cycle: 1.30 +# CHECK-NEXT: IPC: 1.30 # CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Instruction Info: @@ -188,23 +189,23 @@ # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 257 (84.0%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 257 (83.7%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (11.1%) -# CHECK-NEXT: 1, 172 (56.2%) -# CHECK-NEXT: 2, 86 (28.1%) +# CHECK-NEXT: 0, 35 (11.4%) +# CHECK-NEXT: 1, 172 (56.0%) +# CHECK-NEXT: 2, 86 (28.0%) # CHECK-NEXT: 4, 14 (4.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 5 (1.6%) -# CHECK-NEXT: 1, 202 (66.0%) -# CHECK-NEXT: 2, 99 (32.4%) +# CHECK-NEXT: 0, 6 (2.0%) +# CHECK-NEXT: 1, 202 (65.8%) +# CHECK-NEXT: 2, 99 (32.2%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -255,12 +256,13 @@ # CHECK-NEXT: 0.04 0.96 - - - - - - - - - - - - - - - - - - - - 1.00 movw %di, (%rbx) # CHECK: Timeline view: +# CHECK-NEXT: 0 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeER . . movw %sp, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movw (%rcx), %bp -# CHECK-NEXT: [0,2] D=eeeeeER. movw (%rdx), %si -# CHECK-NEXT: [0,3] D======eER movw %di, (%rbx) +# CHECK: [0,0] .DeER. . movw %sp, (%rax) +# CHECK-NEXT: [0,1] .DeeeeeER . movw (%rcx), %bp +# CHECK-NEXT: [0,2] .D=eeeeeER. movw (%rdx), %si +# CHECK-NEXT: [0,3] .D======eER movw %di, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -279,12 +281,12 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 306 +# CHECK-NEXT: Total Cycles: 307 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.31 -# CHECK-NEXT: IPC: 1.31 +# CHECK-NEXT: uOps Per Cycle: 1.30 +# CHECK-NEXT: IPC: 1.30 # CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Instruction Info: @@ -304,23 +306,23 @@ # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 257 (84.0%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 257 (83.7%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (11.1%) -# CHECK-NEXT: 1, 172 (56.2%) -# CHECK-NEXT: 2, 86 (28.1%) +# CHECK-NEXT: 0, 35 (11.4%) +# CHECK-NEXT: 1, 172 (56.0%) +# CHECK-NEXT: 2, 86 (28.0%) # CHECK-NEXT: 4, 14 (4.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 5 (1.6%) -# CHECK-NEXT: 1, 202 (66.0%) -# CHECK-NEXT: 2, 99 (32.4%) +# CHECK-NEXT: 0, 6 (2.0%) +# CHECK-NEXT: 1, 202 (65.8%) +# CHECK-NEXT: 2, 99 (32.2%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -371,12 +373,13 @@ # CHECK-NEXT: 0.04 0.96 - - - - - - - - - - - - - - - - - - - - 1.00 movl %edi, (%rbx) # CHECK: Timeline view: +# CHECK-NEXT: 0 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeER . . movl %esp, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movl (%rcx), %ebp -# CHECK-NEXT: [0,2] D=eeeeeER. movl (%rdx), %esi -# CHECK-NEXT: [0,3] D======eER movl %edi, (%rbx) +# CHECK: [0,0] .DeER. . movl %esp, (%rax) +# CHECK-NEXT: [0,1] .DeeeeeER . movl (%rcx), %ebp +# CHECK-NEXT: [0,2] .D=eeeeeER. movl (%rdx), %esi +# CHECK-NEXT: [0,3] .D======eER movl %edi, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -395,12 +398,12 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 306 +# CHECK-NEXT: Total Cycles: 307 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.31 -# CHECK-NEXT: IPC: 1.31 +# CHECK-NEXT: uOps Per Cycle: 1.30 +# CHECK-NEXT: IPC: 1.30 # CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Instruction Info: @@ -420,23 +423,23 @@ # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 257 (84.0%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 257 (83.7%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 34 (11.1%) -# CHECK-NEXT: 1, 172 (56.2%) -# CHECK-NEXT: 2, 86 (28.1%) +# CHECK-NEXT: 0, 35 (11.4%) +# CHECK-NEXT: 1, 172 (56.0%) +# CHECK-NEXT: 2, 86 (28.0%) # CHECK-NEXT: 4, 14 (4.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 5 (1.6%) -# CHECK-NEXT: 1, 202 (66.0%) -# CHECK-NEXT: 2, 99 (32.4%) +# CHECK-NEXT: 0, 6 (2.0%) +# CHECK-NEXT: 1, 202 (65.8%) +# CHECK-NEXT: 2, 99 (32.2%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -487,12 +490,13 @@ # CHECK-NEXT: 0.04 0.96 - - - - - - - - - - - - - - - - - - - - 1.00 movq %rdi, (%rbx) # CHECK: Timeline view: +# CHECK-NEXT: 0 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeER . . movq %rsp, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movq (%rcx), %rbp -# CHECK-NEXT: [0,2] D=eeeeeER. movq (%rdx), %rsi -# CHECK-NEXT: [0,3] D======eER movq %rdi, (%rbx) +# CHECK: [0,0] .DeER. . movq %rsp, (%rax) +# CHECK-NEXT: [0,1] .DeeeeeER . movq (%rcx), %rbp +# CHECK-NEXT: [0,2] .D=eeeeeER. movq (%rdx), %rsi +# CHECK-NEXT: [0,3] .D======eER movq %rdi, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -511,7 +515,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 554 +# CHECK-NEXT: Total Cycles: 555 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 @@ -538,12 +542,12 @@ # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 55 (9.9%) # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 437 (78.9%) +# CHECK-NEXT: SQ - Store queue full: 437 (78.7%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 365 (65.9%) +# CHECK-NEXT: 0, 366 (65.9%) # CHECK-NEXT: 1, 88 (15.9%) # CHECK-NEXT: 2, 3 (0.5%) # CHECK-NEXT: 3, 86 (15.5%) @@ -551,9 +555,9 @@ # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 253 (45.7%) -# CHECK-NEXT: 1, 202 (36.5%) -# CHECK-NEXT: 2, 99 (17.9%) +# CHECK-NEXT: 0, 254 (45.8%) +# CHECK-NEXT: 1, 202 (36.4%) +# CHECK-NEXT: 2, 99 (17.8%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -604,13 +608,13 @@ # CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - - 3.00 - - - - - 1.00 movd %mm3, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: 0 +# CHECK-NEXT: 01 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . movd %mm0, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movd (%rcx), %mm1 -# CHECK-NEXT: [0,2] D=eeeeeER . movd (%rdx), %mm2 -# CHECK-NEXT: [0,3] D======eeER movd %mm3, (%rbx) +# CHECK: [0,0] .DeeER .. movd %mm0, (%rax) +# CHECK-NEXT: [0,1] .DeeeeeER .. movd (%rcx), %mm1 +# CHECK-NEXT: [0,2] .D=eeeeeER.. movd (%rdx), %mm2 +# CHECK-NEXT: [0,3] .D======eeER movd %mm3, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -629,7 +633,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 405 +# CHECK-NEXT: Total Cycles: 406 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 @@ -654,22 +658,22 @@ # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 347 (85.7%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 347 (85.5%) # CHECK-NEXT: LQ - Load queue full: 0 # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 131 (32.3%) -# CHECK-NEXT: 1, 174 (43.0%) -# CHECK-NEXT: 2, 87 (21.5%) +# CHECK-NEXT: 0, 132 (32.5%) +# CHECK-NEXT: 1, 174 (42.9%) +# CHECK-NEXT: 2, 87 (21.4%) # CHECK-NEXT: 4, 13 (3.2%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 104 (25.7%) -# CHECK-NEXT: 1, 202 (49.9%) +# CHECK-NEXT: 0, 105 (25.9%) +# CHECK-NEXT: 1, 202 (49.8%) # CHECK-NEXT: 2, 99 (24.4%) # CHECK: Scheduler's queue usage: @@ -721,12 +725,13 @@ # CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - - 3.00 - - - - - 1.00 movaps %xmm3, (%rbx) # CHECK: Timeline view: +# CHECK-NEXT: 0 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeER . . movaps %xmm0, (%rax) -# CHECK-NEXT: [0,1] DeeeeeER . movaps (%rcx), %xmm1 -# CHECK-NEXT: [0,2] D=eeeeeER. movaps (%rdx), %xmm2 -# CHECK-NEXT: [0,3] D======eER movaps %xmm3, (%rbx) +# CHECK: [0,0] .DeER. . movaps %xmm0, (%rax) +# CHECK-NEXT: [0,1] .DeeeeeER . movaps (%rcx), %xmm1 +# CHECK-NEXT: [0,2] .D=eeeeeER. movaps (%rdx), %xmm2 +# CHECK-NEXT: [0,3] .D======eER movaps %xmm3, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/load-throughput.s b/llvm/test/tools/llvm-mca/X86/BdVer2/load-throughput.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/load-throughput.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/load-throughput.s @@ -54,12 +54,12 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 406 +# CHECK-NEXT: Total Cycles: 407 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.99 -# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: uOps Per Cycle: 0.98 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: @@ -80,20 +80,20 @@ # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 354 (87.2%) +# CHECK-NEXT: LQ - Load queue full: 354 (87.0%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 217 (53.4%) -# CHECK-NEXT: 2, 178 (43.8%) +# CHECK-NEXT: 0, 218 (53.6%) +# CHECK-NEXT: 2, 178 (43.7%) # CHECK-NEXT: 4, 11 (2.7%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 206 (50.7%) -# CHECK-NEXT: 2, 200 (49.3%) +# CHECK-NEXT: 0, 207 (50.9%) +# CHECK-NEXT: 2, 200 (49.1%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -144,12 +144,13 @@ # CHECK-NEXT: 2.00 - - - - - - - - - - - - - - - - - - 2.00 - - - movb (%rbx), %dil # CHECK: Timeline view: +# CHECK-NEXT: 0 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . movb (%rax), %spl -# CHECK-NEXT: [0,1] DeeeeeER . movb (%rcx), %bpl -# CHECK-NEXT: [0,2] D==eeeeeER movb (%rdx), %sil -# CHECK-NEXT: [0,3] D==eeeeeER movb (%rbx), %dil +# CHECK: [0,0] .DeeeeeER . movb (%rax), %spl +# CHECK-NEXT: [0,1] .DeeeeeER . movb (%rcx), %bpl +# CHECK-NEXT: [0,2] .D==eeeeeER movb (%rdx), %sil +# CHECK-NEXT: [0,3] .D==eeeeeER movb (%rbx), %dil # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -168,12 +169,12 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 406 +# CHECK-NEXT: Total Cycles: 407 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.99 -# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: uOps Per Cycle: 0.98 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: @@ -194,20 +195,20 @@ # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 354 (87.2%) +# CHECK-NEXT: LQ - Load queue full: 354 (87.0%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 217 (53.4%) -# CHECK-NEXT: 2, 178 (43.8%) +# CHECK-NEXT: 0, 218 (53.6%) +# CHECK-NEXT: 2, 178 (43.7%) # CHECK-NEXT: 4, 11 (2.7%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 206 (50.7%) -# CHECK-NEXT: 2, 200 (49.3%) +# CHECK-NEXT: 0, 207 (50.9%) +# CHECK-NEXT: 2, 200 (49.1%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -258,12 +259,13 @@ # CHECK-NEXT: 2.00 - - - - - - - - - - - - - - - - - - 2.00 - - - movw (%rbx), %di # CHECK: Timeline view: +# CHECK-NEXT: 0 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . movw (%rax), %sp -# CHECK-NEXT: [0,1] DeeeeeER . movw (%rcx), %bp -# CHECK-NEXT: [0,2] D==eeeeeER movw (%rdx), %si -# CHECK-NEXT: [0,3] D==eeeeeER movw (%rbx), %di +# CHECK: [0,0] .DeeeeeER . movw (%rax), %sp +# CHECK-NEXT: [0,1] .DeeeeeER . movw (%rcx), %bp +# CHECK-NEXT: [0,2] .D==eeeeeER movw (%rdx), %si +# CHECK-NEXT: [0,3] .D==eeeeeER movw (%rbx), %di # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -282,12 +284,12 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 406 +# CHECK-NEXT: Total Cycles: 407 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.99 -# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: uOps Per Cycle: 0.98 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: @@ -308,20 +310,20 @@ # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 354 (87.2%) +# CHECK-NEXT: LQ - Load queue full: 354 (87.0%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 217 (53.4%) -# CHECK-NEXT: 2, 178 (43.8%) +# CHECK-NEXT: 0, 218 (53.6%) +# CHECK-NEXT: 2, 178 (43.7%) # CHECK-NEXT: 4, 11 (2.7%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 206 (50.7%) -# CHECK-NEXT: 2, 200 (49.3%) +# CHECK-NEXT: 0, 207 (50.9%) +# CHECK-NEXT: 2, 200 (49.1%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -372,12 +374,13 @@ # CHECK-NEXT: 2.00 - - - - - - - - - - - - - - - - - - 2.00 - - - movl (%rbx), %edi # CHECK: Timeline view: +# CHECK-NEXT: 0 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . movl (%rax), %esp -# CHECK-NEXT: [0,1] DeeeeeER . movl (%rcx), %ebp -# CHECK-NEXT: [0,2] D==eeeeeER movl (%rdx), %esi -# CHECK-NEXT: [0,3] D==eeeeeER movl (%rbx), %edi +# CHECK: [0,0] .DeeeeeER . movl (%rax), %esp +# CHECK-NEXT: [0,1] .DeeeeeER . movl (%rcx), %ebp +# CHECK-NEXT: [0,2] .D==eeeeeER movl (%rdx), %esi +# CHECK-NEXT: [0,3] .D==eeeeeER movl (%rbx), %edi # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -396,12 +399,12 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 406 +# CHECK-NEXT: Total Cycles: 407 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.99 -# CHECK-NEXT: IPC: 0.99 +# CHECK-NEXT: uOps Per Cycle: 0.98 +# CHECK-NEXT: IPC: 0.98 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: @@ -422,20 +425,20 @@ # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 354 (87.2%) +# CHECK-NEXT: LQ - Load queue full: 354 (87.0%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 217 (53.4%) -# CHECK-NEXT: 2, 178 (43.8%) +# CHECK-NEXT: 0, 218 (53.6%) +# CHECK-NEXT: 2, 178 (43.7%) # CHECK-NEXT: 4, 11 (2.7%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 206 (50.7%) -# CHECK-NEXT: 2, 200 (49.3%) +# CHECK-NEXT: 0, 207 (50.9%) +# CHECK-NEXT: 2, 200 (49.1%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -486,12 +489,13 @@ # CHECK-NEXT: 2.00 - - - - - - - - - - - - - - - - - - 2.00 - - - movq (%rbx), %rdi # CHECK: Timeline view: +# CHECK-NEXT: 0 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . movq (%rax), %rsp -# CHECK-NEXT: [0,1] DeeeeeER . movq (%rcx), %rbp -# CHECK-NEXT: [0,2] D==eeeeeER movq (%rdx), %rsi -# CHECK-NEXT: [0,3] D==eeeeeER movq (%rbx), %rdi +# CHECK: [0,0] .DeeeeeER . movq (%rax), %rsp +# CHECK-NEXT: [0,1] .DeeeeeER . movq (%rcx), %rbp +# CHECK-NEXT: [0,2] .D==eeeeeER movq (%rdx), %rsi +# CHECK-NEXT: [0,3] .D==eeeeeER movq (%rbx), %rdi # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -510,7 +514,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 605 +# CHECK-NEXT: Total Cycles: 606 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 @@ -536,20 +540,20 @@ # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 533 (88.1%) +# CHECK-NEXT: LQ - Load queue full: 533 (88.0%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 416 (68.8%) +# CHECK-NEXT: 0, 417 (68.8%) # CHECK-NEXT: 2, 178 (29.4%) # CHECK-NEXT: 4, 11 (1.8%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 405 (66.9%) -# CHECK-NEXT: 2, 200 (33.1%) +# CHECK-NEXT: 0, 406 (67.0%) +# CHECK-NEXT: 2, 200 (33.0%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -600,13 +604,13 @@ # CHECK-NEXT: 3.00 - - - - - - - - - 3.00 - - - 1.00 - - - - 3.00 - - - movd (%rbx), %mm3 # CHECK: Timeline view: -# CHECK-NEXT: 0 +# CHECK-NEXT: 01 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . movd (%rax), %mm0 -# CHECK-NEXT: [0,1] DeeeeeER . movd (%rcx), %mm1 -# CHECK-NEXT: [0,2] D===eeeeeER movd (%rdx), %mm2 -# CHECK-NEXT: [0,3] D===eeeeeER movd (%rbx), %mm3 +# CHECK: [0,0] .DeeeeeER .. movd (%rax), %mm0 +# CHECK-NEXT: [0,1] .DeeeeeER .. movd (%rcx), %mm1 +# CHECK-NEXT: [0,2] .D===eeeeeER movd (%rdx), %mm2 +# CHECK-NEXT: [0,3] .D===eeeeeER movd (%rbx), %mm3 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -625,7 +629,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 605 +# CHECK-NEXT: Total Cycles: 606 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 @@ -651,20 +655,20 @@ # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 533 (88.1%) +# CHECK-NEXT: LQ - Load queue full: 533 (88.0%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 416 (68.8%) +# CHECK-NEXT: 0, 417 (68.8%) # CHECK-NEXT: 2, 178 (29.4%) # CHECK-NEXT: 4, 11 (1.8%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 405 (66.9%) -# CHECK-NEXT: 2, 200 (33.1%) +# CHECK-NEXT: 0, 406 (67.0%) +# CHECK-NEXT: 2, 200 (33.0%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -715,13 +719,13 @@ # CHECK-NEXT: 3.00 - - - - - - - 3.00 - - - - - 1.00 - - - - 3.00 - - - movaps (%rbx), %xmm3 # CHECK: Timeline view: -# CHECK-NEXT: 0 +# CHECK-NEXT: 01 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . movaps (%rax), %xmm0 -# CHECK-NEXT: [0,1] DeeeeeER . movaps (%rcx), %xmm1 -# CHECK-NEXT: [0,2] D===eeeeeER movaps (%rdx), %xmm2 -# CHECK-NEXT: [0,3] D===eeeeeER movaps (%rbx), %xmm3 +# CHECK: [0,0] .DeeeeeER .. movaps (%rax), %xmm0 +# CHECK-NEXT: [0,1] .DeeeeeER .. movaps (%rcx), %xmm1 +# CHECK-NEXT: [0,2] .D===eeeeeER movaps (%rdx), %xmm2 +# CHECK-NEXT: [0,3] .D===eeeeeER movaps (%rbx), %xmm3 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -740,7 +744,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 605 +# CHECK-NEXT: Total Cycles: 606 # CHECK-NEXT: Total uOps: 800 # CHECK: Dispatch Width: 4 @@ -766,19 +770,19 @@ # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 -# CHECK-NEXT: LQ - Load queue full: 345 (57.0%) +# CHECK-NEXT: LQ - Load queue full: 345 (56.9%) # CHECK-NEXT: SQ - Store queue full: 0 # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 405 (66.9%) -# CHECK-NEXT: 4, 200 (33.1%) +# CHECK-NEXT: 0, 406 (67.0%) +# CHECK-NEXT: 4, 200 (33.0%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 405 (66.9%) -# CHECK-NEXT: 4, 200 (33.1%) +# CHECK-NEXT: 0, 406 (67.0%) +# CHECK-NEXT: 4, 200 (33.0%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -829,13 +833,13 @@ # CHECK-NEXT: 3.00 - - - - - - - 3.00 - - - - - 1.00 - - - - 3.00 - - - vmovaps (%rbx), %ymm3 # CHECK: Timeline view: -# CHECK-NEXT: 0 +# CHECK-NEXT: 01 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . vmovaps (%rax), %ymm0 -# CHECK-NEXT: [0,1] DeeeeeER . vmovaps (%rcx), %ymm1 -# CHECK-NEXT: [0,2] .D==eeeeeER vmovaps (%rdx), %ymm2 -# CHECK-NEXT: [0,3] .D==eeeeeER vmovaps (%rbx), %ymm3 +# CHECK: [0,0] .DeeeeeER .. vmovaps (%rax), %ymm0 +# CHECK-NEXT: [0,1] .DeeeeeER .. vmovaps (%rcx), %ymm1 +# CHECK-NEXT: [0,2] . D==eeeeeER vmovaps (%rdx), %ymm2 +# CHECK-NEXT: [0,3] . D==eeeeeER vmovaps (%rbx), %ymm3 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s b/llvm/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/memcpy-like-test.s @@ -12,7 +12,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 800 -# CHECK-NEXT: Total Cycles: 806 +# CHECK-NEXT: Total Cycles: 807 # CHECK-NEXT: Total uOps: 800 # CHECK: Dispatch Width: 4 @@ -79,17 +79,17 @@ # CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - - 3.00 - - - - - 1.00 vmovaps %xmm0, 48(%rdi) # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 01234 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . . vmovaps (%rsi), %xmm0 -# CHECK-NEXT: [0,1] D======eER. . vmovaps %xmm0, (%rdi) -# CHECK-NEXT: [0,2] DeeeeeE--R. . vmovaps 16(%rsi), %xmm0 -# CHECK-NEXT: [0,3] D=======eER . vmovaps %xmm0, 16(%rdi) -# CHECK-NEXT: [0,4] .D==eeeeeER . vmovaps 32(%rsi), %xmm0 -# CHECK-NEXT: [0,5] .D========eER. vmovaps %xmm0, 32(%rdi) -# CHECK-NEXT: [0,6] .D==eeeeeE--R. vmovaps 48(%rsi), %xmm0 -# CHECK-NEXT: [0,7] .D=========eER vmovaps %xmm0, 48(%rdi) +# CHECK: [0,0] .DeeeeeER . . vmovaps (%rsi), %xmm0 +# CHECK-NEXT: [0,1] .D======eER . vmovaps %xmm0, (%rdi) +# CHECK-NEXT: [0,2] .DeeeeeE--R . vmovaps 16(%rsi), %xmm0 +# CHECK-NEXT: [0,3] .D=======eER . vmovaps %xmm0, 16(%rdi) +# CHECK-NEXT: [0,4] . D==eeeeeER . vmovaps 32(%rsi), %xmm0 +# CHECK-NEXT: [0,5] . D========eER. vmovaps %xmm0, 32(%rdi) +# CHECK-NEXT: [0,6] . D==eeeeeE--R. vmovaps 48(%rsi), %xmm0 +# CHECK-NEXT: [0,7] . D=========eER vmovaps %xmm0, 48(%rdi) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/one-idioms.s b/llvm/test/tools/llvm-mca/X86/BdVer2/one-idioms.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/one-idioms.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/one-idioms.s @@ -29,7 +29,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 1500 -# CHECK-NEXT: Total Cycles: 1353 +# CHECK-NEXT: Total Cycles: 1354 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 4 @@ -124,24 +124,24 @@ # CHECK-NEXT: - - - - - - - - - - 1.00 1.00 - - 0.50 0.50 - - - - - - - vpcmpeqw %xmm3, %xmm3, %xmm5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456 +# CHECK-NEXT: 01234567 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . .. pcmpeqb %mm2, %mm2 -# CHECK-NEXT: [0,1] DeeER. . .. pcmpeqd %mm2, %mm2 -# CHECK-NEXT: [0,2] D=eeER . .. pcmpeqw %mm2, %mm2 -# CHECK-NEXT: [0,3] D==eeER . .. pcmpeqb %xmm2, %xmm2 -# CHECK-NEXT: [0,4] .DeeE-R . .. pcmpeqd %xmm2, %xmm2 -# CHECK-NEXT: [0,5] .D==eeER . .. pcmpeqq %xmm2, %xmm2 -# CHECK-NEXT: [0,6] .D===eeER . .. pcmpeqw %xmm2, %xmm2 -# CHECK-NEXT: [0,7] .D=====eeER .. vpcmpeqb %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,8] . D===eeE-R .. vpcmpeqd %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,9] . D======eeER .. vpcmpeqq %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,10] . D=====eeE-R .. vpcmpeqw %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,11] . D=======eeER .. vpcmpeqb %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,12] . D=======eeER.. vpcmpeqd %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,13] . D========eeER. vpcmpeqq %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,14] . D=========eeER vpcmpeqw %xmm3, %xmm3, %xmm5 +# CHECK: [0,0] .DeeER . . . pcmpeqb %mm2, %mm2 +# CHECK-NEXT: [0,1] .DeeER . . . pcmpeqd %mm2, %mm2 +# CHECK-NEXT: [0,2] .D=eeER . . . pcmpeqw %mm2, %mm2 +# CHECK-NEXT: [0,3] .D==eeER . . . pcmpeqb %xmm2, %xmm2 +# CHECK-NEXT: [0,4] . DeeE-R . . . pcmpeqd %xmm2, %xmm2 +# CHECK-NEXT: [0,5] . D==eeER . . . pcmpeqq %xmm2, %xmm2 +# CHECK-NEXT: [0,6] . D===eeER. . . pcmpeqw %xmm2, %xmm2 +# CHECK-NEXT: [0,7] . D=====eeER . . vpcmpeqb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,8] . D===eeE-R . . vpcmpeqd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,9] . D======eeER . . vpcmpeqq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,10] . D=====eeE-R . . vpcmpeqw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,11] . D=======eeER. . vpcmpeqb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,12] . D=======eeER . vpcmpeqd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,13] . D========eeER. vpcmpeqq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,14] . D=========eeER vpcmpeqw %xmm3, %xmm3, %xmm5 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-2.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-2.s @@ -7,12 +7,12 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 3 -# CHECK-NEXT: Total Cycles: 11 +# CHECK-NEXT: Total Cycles: 12 # CHECK-NEXT: Total uOps: 4 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.36 -# CHECK-NEXT: IPC: 0.27 +# CHECK-NEXT: uOps Per Cycle: 0.33 +# CHECK-NEXT: IPC: 0.25 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: @@ -29,12 +29,12 @@ # CHECK-NEXT: 1 1 1.00 addl %ecx, %ebx # CHECK: Timeline view: -# CHECK-NEXT: 0 +# CHECK-NEXT: 01 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeeER . imulq %rax, %rbx -# CHECK-NEXT: [0,1] D=====eeER. lzcntw %ax, %bx -# CHECK-NEXT: [0,2] D=======eER addl %ecx, %ebx +# CHECK: [0,0] .DeeeeeeER.. imulq %rax, %rbx +# CHECK-NEXT: [0,1] .D=====eeER. lzcntw %ax, %bx +# CHECK-NEXT: [0,2] .D=======eER addl %ecx, %ebx # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-3.s b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-3.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-3.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-3.s @@ -12,7 +12,7 @@ # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 4500 -# CHECK-NEXT: Total Cycles: 4503 +# CHECK-NEXT: Total Cycles: 4504 # CHECK-NEXT: Total uOps: 4500 # CHECK: Dispatch Width: 4 @@ -69,18 +69,18 @@ # CHECK-NEXT: - - - - - 1.00 1.00 - - - - - - - - - - - - - - - - xorw %bx, %dx # CHECK: Timeline view: -# CHECK-NEXT: 01 +# CHECK-NEXT: 012 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeER . .. addw %cx, %dx -# CHECK-NEXT: [0,1] D=eER. .. movw %ax, %dx -# CHECK-NEXT: [0,2] D==eER .. xorw %bx, %dx -# CHECK-NEXT: [1,0] D===eER .. addw %cx, %dx -# CHECK-NEXT: [1,1] .D===eER .. movw %ax, %dx -# CHECK-NEXT: [1,2] .D====eER .. xorw %bx, %dx -# CHECK-NEXT: [2,0] .D=====eER.. addw %cx, %dx -# CHECK-NEXT: [2,1] .D======eER. movw %ax, %dx -# CHECK-NEXT: [2,2] . D======eER xorw %bx, %dx +# CHECK: [0,0] .DeER. . . addw %cx, %dx +# CHECK-NEXT: [0,1] .D=eER . . movw %ax, %dx +# CHECK-NEXT: [0,2] .D==eER . . xorw %bx, %dx +# CHECK-NEXT: [1,0] .D===eER . . addw %cx, %dx +# CHECK-NEXT: [1,1] . D===eER . . movw %ax, %dx +# CHECK-NEXT: [1,2] . D====eER. . xorw %bx, %dx +# CHECK-NEXT: [2,0] . D=====eER . addw %cx, %dx +# CHECK-NEXT: [2,1] . D======eER. movw %ax, %dx +# CHECK-NEXT: [2,2] . D======eER xorw %bx, %dx # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-4.s b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-4.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-4.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-4.s @@ -12,7 +12,7 @@ # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 4500 -# CHECK-NEXT: Total Cycles: 9753 +# CHECK-NEXT: Total Cycles: 9754 # CHECK-NEXT: Total uOps: 6000 # CHECK: Dispatch Width: 4 @@ -70,17 +70,17 @@ # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 01 +# CHECK-NEXT: Index 0123456789 012 -# CHECK: [0,0] DeeeeER . . .. imulw %ax, %bx -# CHECK-NEXT: [0,1] D===eeER . . .. lzcntw %ax, %bx -# CHECK-NEXT: [0,2] D=====eER . . .. addw %cx, %bx -# CHECK-NEXT: [1,0] .D======eeeeER . .. imulw %ax, %bx -# CHECK-NEXT: [1,1] .D=========eeER. .. lzcntw %ax, %bx -# CHECK-NEXT: [1,2] .D===========eER .. addw %cx, %bx -# CHECK-NEXT: [2,0] . D===========eeeeER.. imulw %ax, %bx -# CHECK-NEXT: [2,1] . D==============eeER. lzcntw %ax, %bx -# CHECK-NEXT: [2,2] . D================eER addw %cx, %bx +# CHECK: [0,0] .DeeeeER . . . . imulw %ax, %bx +# CHECK-NEXT: [0,1] .D===eeER . . . . lzcntw %ax, %bx +# CHECK-NEXT: [0,2] .D=====eER. . . . addw %cx, %bx +# CHECK-NEXT: [1,0] . D======eeeeER. . . imulw %ax, %bx +# CHECK-NEXT: [1,1] . D=========eeER . . lzcntw %ax, %bx +# CHECK-NEXT: [1,2] . D===========eER . . addw %cx, %bx +# CHECK-NEXT: [2,0] . D===========eeeeER . imulw %ax, %bx +# CHECK-NEXT: [2,1] . D==============eeER. lzcntw %ax, %bx +# CHECK-NEXT: [2,2] . D================eER addw %cx, %bx # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-5.s b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-5.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-5.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-5.s @@ -7,7 +7,7 @@ # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 1500 -# CHECK-NEXT: Total Cycles: 3003 +# CHECK-NEXT: Total Cycles: 3004 # CHECK-NEXT: Total uOps: 3000 # CHECK: Dispatch Width: 4 @@ -60,11 +60,11 @@ # CHECK-NEXT: - - - - - 2.00 - - - - - - - - - - - - - - - - - lzcntw %ax, %bx # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . lzcntw %ax, %bx -# CHECK-NEXT: [1,0] D==eeER . lzcntw %ax, %bx -# CHECK-NEXT: [2,0] .D===eeER lzcntw %ax, %bx +# CHECK: [0,0] .DeeER . lzcntw %ax, %bx +# CHECK-NEXT: [1,0] .D==eeER . lzcntw %ax, %bx +# CHECK-NEXT: [2,0] . D===eeER lzcntw %ax, %bx # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-6.s b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-6.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-6.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update-6.s @@ -13,7 +13,7 @@ # CHECK: Iterations: 1500 # CHECK-NEXT: Instructions: 4500 -# CHECK-NEXT: Total Cycles: 12003 +# CHECK-NEXT: Total Cycles: 12004 # CHECK-NEXT: Total uOps: 7500 # CHECK: Dispatch Width: 4 @@ -71,17 +71,17 @@ # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 +# CHECK-NEXT: Index 0123456789 01234567 -# CHECK: [0,0] DeeeeER . . . .. imull %edx, %ecx -# CHECK-NEXT: [0,1] DeeeeeeER . . . .. lzcntw (%rsp), %cx -# CHECK-NEXT: [0,2] .D=eeeeeeER . . .. lzcntw 2(%rsp), %cx -# CHECK-NEXT: [1,0] .D=======eeeeER. . .. imull %edx, %ecx -# CHECK-NEXT: [1,1] . D======eeeeeeER . .. lzcntw (%rsp), %cx -# CHECK-NEXT: [1,2] . D========eeeeeeER . .. lzcntw 2(%rsp), %cx -# CHECK-NEXT: [2,0] . D=============eeeeER .. imull %edx, %ecx -# CHECK-NEXT: [2,1] . D=============eeeeeeER.. lzcntw (%rsp), %cx -# CHECK-NEXT: [2,2] . D==============eeeeeeER lzcntw 2(%rsp), %cx +# CHECK: [0,0] .DeeeeER . . . . . imull %edx, %ecx +# CHECK-NEXT: [0,1] .DeeeeeeER. . . . . lzcntw (%rsp), %cx +# CHECK-NEXT: [0,2] . D=eeeeeeER . . . . lzcntw 2(%rsp), %cx +# CHECK-NEXT: [1,0] . D=======eeeeER . . . imull %edx, %ecx +# CHECK-NEXT: [1,1] . D======eeeeeeER . . . lzcntw (%rsp), %cx +# CHECK-NEXT: [1,2] . D========eeeeeeER. . . lzcntw 2(%rsp), %cx +# CHECK-NEXT: [2,0] . D=============eeeeER . . imull %edx, %ecx +# CHECK-NEXT: [2,1] . D=============eeeeeeER . lzcntw (%rsp), %cx +# CHECK-NEXT: [2,2] . D==============eeeeeeER lzcntw 2(%rsp), %cx # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update.s b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/partial-reg-update.s @@ -7,12 +7,12 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 3 -# CHECK-NEXT: Total Cycles: 9 +# CHECK-NEXT: Total Cycles: 10 # CHECK-NEXT: Total uOps: 3 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.33 -# CHECK-NEXT: IPC: 0.33 +# CHECK-NEXT: uOps Per Cycle: 0.30 +# CHECK-NEXT: IPC: 0.30 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: @@ -29,11 +29,11 @@ # CHECK-NEXT: 1 1 1.00 addl %ecx, %ebx # CHECK: Timeline view: -# CHECK-NEXT: Index 012345678 +# CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . imulw %ax, %cx -# CHECK-NEXT: [0,1] D====eER. addb %al, %cl -# CHECK-NEXT: [0,2] D=====eER addl %ecx, %ebx +# CHECK: [0,0] .DeeeeER . imulw %ax, %cx +# CHECK-NEXT: [0,1] .D====eER. addb %al, %cl +# CHECK-NEXT: [0,2] .D=====eER addl %ecx, %ebx # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s b/llvm/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/pipes-fpu.s @@ -19,11 +19,11 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 800 -# CHECK-NEXT: Total Cycles: 1503 +# CHECK-NEXT: Total Cycles: 1508 # CHECK-NEXT: Total uOps: 1500 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.00 +# CHECK-NEXT: uOps Per Cycle: 0.99 # CHECK-NEXT: IPC: 0.53 # CHECK-NEXT: Block RThroughput: 15.0 @@ -72,39 +72,39 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] -# CHECK-NEXT: - - - - - - - 1.00 15.06 14.94 1.12 1.88 9.00 1.00 6.44 4.56 - - - - - - - +# CHECK-NEXT: - - - - - - - 1.00 15.09 14.91 1.10 1.90 9.00 1.00 6.00 5.00 - - - - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7.0] [7.1] [8.0] [8.1] [9] [10] [11] [12] [13] [14] [15] [16.0] [16.1] [17] [18] Instructions: -# CHECK-NEXT: - - - - - - - - - - 0.88 0.12 2.00 - 2.00 1.00 - - - - - - - vpmulld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - - - 0.24 1.76 - - 0.44 0.56 - - - - - - - vpand %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.90 0.10 2.00 - 2.00 1.00 - - - - - - - vpmulld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - 0.20 1.80 - - - 1.00 - - - - - - - vpand %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - - - - 1.00 1.00 - - - - - - - - vcvttps2dq %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - - - - - - 7.00 - 1.00 - - - - - - - - vpclmulqdq $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - 0.52 0.48 - - - - 1.00 - - - - - - - - vaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - - - - - - - 4.50 4.50 - - - - - 1.00 - - - - - - - vsqrtps %xmm0, %xmm2 -# CHECK-NEXT: - - - - - - - - 1.04 0.96 - - - - 1.00 - - - - - - - - vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - - - - - - - 0.50 0.50 - - - - 1.00 - - - - - - - - vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - 4.59 4.41 - - - - - 1.00 - - - - - - - vsqrtps %xmm0, %xmm2 +# CHECK-NEXT: - - - - - - - - 1.00 1.00 - - - - 1.00 - - - - - - - - vaddps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - - 9.00 9.00 - - - - - 2.00 - - - - - - - vsqrtps %ymm0, %ymm2 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 012 +# CHECK-NEXT: 0123456789 01234567 # CHECK-NEXT: Index 0123456789 0123456789 -# CHECK: [0,0] DeeeeeER . . . . . . vpmulld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [0,1] D=eeE--R . . . . . . vpand %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [0,2] D==eeeeER . . . . . . vcvttps2dq %xmm0, %xmm2 -# CHECK-NEXT: [0,3] .D==eeeeeeeeeeeeER . . . . vpclmulqdq $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [0,4] . D===================eeeeeER . . vaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [0,5] . DeeeeeeeeeE---------------R . . vsqrtps %xmm0, %xmm2 -# CHECK-NEXT: [0,6] . D===================eeeeeER. . vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: [0,7] . DeeeeeeeeeE---------------R. . vsqrtps %ymm0, %ymm2 -# CHECK-NEXT: [1,0] . D======eeeeeE------------R. . vpmulld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [1,1] . DeeE---------------------R. . vpand %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [1,2] . D=eeeeE-------------------R . vcvttps2dq %xmm0, %xmm2 -# CHECK-NEXT: [1,3] . D=======eeeeeeeeeeeeE----R . vpclmulqdq $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [1,4] . .D==================eeeeeER. vaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: [1,5] . .D=====eeeeeeeeeE---------R. vsqrtps %xmm0, %xmm2 -# CHECK-NEXT: [1,6] . . D==================eeeeeER vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: [1,7] . . D=============eeeeeeeeeE-R vsqrtps %ymm0, %ymm2 +# CHECK: [0,0] .DeeeeeER . . . . . . . vpmulld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [0,1] .D=eeE--R . . . . . . . vpand %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [0,2] .D==eeeeER. . . . . . . vcvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: [0,3] . DeeeeeeeeeeeeER . . . . . vpclmulqdq $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [0,4] . DeeeeeE------R . . . . . vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [0,5] . DeeeeeeeeeE--R . . . . . vsqrtps %xmm0, %xmm2 +# CHECK-NEXT: [0,6] . .DeeeeeE-----R . . . . . vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: [0,7] . .D==eeeeeeeeeER. . . . . vsqrtps %ymm0, %ymm2 +# CHECK-NEXT: [1,0] . . D====eeeeeE-R. . . . . vpmulld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [1,1] . . DeeE--------R. . . . . vpand %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [1,2] . . DeeeeE------R. . . . . vcvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: [1,3] . . D===eeeeeeeeeeeeER . . . vpclmulqdq $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [1,4] . . .D===eeeeeE------R . . . vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [1,5] . . .D====eeeeeeeeeE-R . . . vsqrtps %xmm0, %xmm2 +# CHECK-NEXT: [1,6] . . . D============eeeeeER . . vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: [1,7] . . . D==============eeeeeeeeeER vsqrtps %ymm0, %ymm2 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -113,12 +113,12 @@ # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 2 4.0 4.0 6.0 vpmulld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1. 2 1.5 1.5 11.5 vpand %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 2. 2 2.5 2.5 9.5 vcvttps2dq %xmm0, %xmm2 -# CHECK-NEXT: 3. 2 5.5 5.5 2.0 vpclmulqdq $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 4. 2 19.5 19.5 0.0 vaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 5. 2 3.5 3.5 12.0 vsqrtps %xmm0, %xmm2 -# CHECK-NEXT: 6. 2 19.5 19.5 0.0 vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 7. 2 7.5 7.5 8.0 vsqrtps %ymm0, %ymm2 -# CHECK-NEXT: 2 7.9 7.9 6.1 +# CHECK-NEXT: 0. 2 3.0 3.0 0.5 vpmulld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1. 2 1.5 1.5 5.0 vpand %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2. 2 2.0 2.0 3.0 vcvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: 3. 2 2.5 2.5 0.0 vpclmulqdq $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 4. 2 2.5 2.5 6.0 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 5. 2 3.0 3.0 1.5 vsqrtps %xmm0, %xmm2 +# CHECK-NEXT: 6. 2 7.0 7.0 2.5 vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 7. 2 9.0 9.0 0.0 vsqrtps %ymm0, %ymm2 +# CHECK-NEXT: 2 3.8 3.8 2.3 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/pr37790.s b/llvm/test/tools/llvm-mca/X86/BdVer2/pr37790.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/pr37790.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/pr37790.s @@ -6,7 +6,7 @@ # CHECK: Iterations: 2 # CHECK-NEXT: Instructions: 4 -# CHECK-NEXT: Total Cycles: 205 +# CHECK-NEXT: Total Cycles: 206 # CHECK-NEXT: Total uOps: 6 # CHECK: Dispatch Width: 4 @@ -30,8 +30,7 @@ # CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789 # CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123 -# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER. int3 -# CHECK-NEXT: [0,1] D====================================================================================================eER stmxcsr (%rsp) +# CHECK: [0,0] .DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER int3 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/rank.s b/llvm/test/tools/llvm-mca/X86/BdVer2/rank.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/rank.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/rank.s @@ -12,7 +12,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 800 -# CHECK-NEXT: Total Cycles: 803 +# CHECK-NEXT: Total Cycles: 804 # CHECK-NEXT: Total uOps: 800 # CHECK: Dispatch Width: 4 @@ -80,32 +80,32 @@ # CHECK: Timeline view: # CHECK-NEXT: 0123456789 -# CHECK-NEXT: Index 0123456789 0123456 +# CHECK-NEXT: Index 0123456789 01234567 -# CHECK: [0,0] DeER . . . . .. addl %eax, %ecx -# CHECK-NEXT: [0,1] DeER . . . . .. addl %eax, %edx -# CHECK-NEXT: [0,2] D==eER . . . .. addl %eax, %ebx -# CHECK-NEXT: [0,3] D==eER . . . .. addl %edx, %esi -# CHECK-NEXT: [0,4] .D===eER . . . .. addl %ebx, %eax -# CHECK-NEXT: [0,5] .D===eER . . . .. addl %edx, %esi -# CHECK-NEXT: [0,6] .D=====eER. . . .. addl %ebx, %eax -# CHECK-NEXT: [0,7] .D======eER . . .. addl %ebx, %eax -# CHECK-NEXT: [1,0] . D========eER . . .. addl %eax, %ecx -# CHECK-NEXT: [1,1] . D======eE--R . . .. addl %eax, %edx -# CHECK-NEXT: [1,2] . D=======eE-R . . .. addl %eax, %ebx -# CHECK-NEXT: [1,3] . D=========eER. . .. addl %edx, %esi -# CHECK-NEXT: [1,4] . D=========eER . .. addl %ebx, %eax -# CHECK-NEXT: [1,5] . D==========eER . .. addl %edx, %esi -# CHECK-NEXT: [1,6] . D===========eER . .. addl %ebx, %eax -# CHECK-NEXT: [1,7] . D============eER . .. addl %ebx, %eax -# CHECK-NEXT: [2,0] . D==============eER .. addl %eax, %ecx -# CHECK-NEXT: [2,1] . D============eE--R .. addl %eax, %edx -# CHECK-NEXT: [2,2] . D=============eE-R .. addl %eax, %ebx -# CHECK-NEXT: [2,3] . D===============eER .. addl %edx, %esi -# CHECK-NEXT: [2,4] . D===============eER .. addl %ebx, %eax -# CHECK-NEXT: [2,5] . D================eER.. addl %edx, %esi -# CHECK-NEXT: [2,6] . D=================eER. addl %ebx, %eax -# CHECK-NEXT: [2,7] . D==================eER addl %ebx, %eax +# CHECK: [0,0] .DeER. . . . . . addl %eax, %ecx +# CHECK-NEXT: [0,1] .DeER. . . . . . addl %eax, %edx +# CHECK-NEXT: [0,2] .D==eER . . . . . addl %eax, %ebx +# CHECK-NEXT: [0,3] .D==eER . . . . . addl %edx, %esi +# CHECK-NEXT: [0,4] . D===eER . . . . . addl %ebx, %eax +# CHECK-NEXT: [0,5] . D===eER . . . . . addl %edx, %esi +# CHECK-NEXT: [0,6] . D=====eER . . . . addl %ebx, %eax +# CHECK-NEXT: [0,7] . D======eER . . . . addl %ebx, %eax +# CHECK-NEXT: [1,0] . D========eER. . . . addl %eax, %ecx +# CHECK-NEXT: [1,1] . D======eE--R. . . . addl %eax, %edx +# CHECK-NEXT: [1,2] . D=======eE-R. . . . addl %eax, %ebx +# CHECK-NEXT: [1,3] . D=========eER . . . addl %edx, %esi +# CHECK-NEXT: [1,4] . D=========eER . . . addl %ebx, %eax +# CHECK-NEXT: [1,5] . D==========eER . . . addl %edx, %esi +# CHECK-NEXT: [1,6] . D===========eER . . . addl %ebx, %eax +# CHECK-NEXT: [1,7] . D============eER. . . addl %ebx, %eax +# CHECK-NEXT: [2,0] . D==============eER . . addl %eax, %ecx +# CHECK-NEXT: [2,1] . D============eE--R . . addl %eax, %edx +# CHECK-NEXT: [2,2] . D=============eE-R . . addl %eax, %ebx +# CHECK-NEXT: [2,3] . D===============eER . . addl %edx, %esi +# CHECK-NEXT: [2,4] . .D===============eER. . addl %ebx, %eax +# CHECK-NEXT: [2,5] . .D================eER . addl %edx, %esi +# CHECK-NEXT: [2,6] . .D=================eER. addl %ebx, %eax +# CHECK-NEXT: [2,7] . .D==================eER addl %ebx, %eax # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/rcu-statistics.s b/llvm/test/tools/llvm-mca/X86/BdVer2/rcu-statistics.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/rcu-statistics.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/rcu-statistics.s @@ -20,12 +20,12 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 16 -# CHECK-NEXT: Total Cycles: 22 +# CHECK-NEXT: Total Cycles: 23 # CHECK-NEXT: Total uOps: 16 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.73 -# CHECK-NEXT: IPC: 0.73 +# CHECK-NEXT: uOps Per Cycle: 0.70 +# CHECK-NEXT: IPC: 0.70 # CHECK-NEXT: Block RThroughput: 15.0 # CHECK: Instruction Info: @@ -56,10 +56,10 @@ # CHECK: Retire Control Unit - number of cycles where we saw N instructions retired: # CHECK-NEXT: [# retired], [# cycles] -# CHECK-NEXT: 0, 11 (50.0%) -# CHECK-NEXT: 1, 9 (40.9%) -# CHECK-NEXT: 3, 1 (4.5%) -# CHECK-NEXT: 4, 1 (4.5%) +# CHECK-NEXT: 0, 12 (52.2%) +# CHECK-NEXT: 1, 9 (39.1%) +# CHECK-NEXT: 3, 1 (4.3%) +# CHECK-NEXT: 4, 1 (4.3%) # CHECK: Total ROB Entries: 128 # CHECK-NEXT: Max Used ROB Entries: 16 ( 12.5% ) diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-1.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-1.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-1.s @@ -10,12 +10,12 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 13 +# CHECK-NEXT: Total Cycles: 14 # CHECK-NEXT: Total uOps: 2 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.15 -# CHECK-NEXT: IPC: 0.15 +# CHECK-NEXT: uOps Per Cycle: 0.14 +# CHECK-NEXT: IPC: 0.14 # CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Instruction Info: @@ -31,11 +31,11 @@ # CHECK-NEXT: 1 10 1.50 * vmulps (%rdi), %xmm1, %xmm2 # CHECK: Timeline view: -# CHECK-NEXT: 012 +# CHECK-NEXT: 0123 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . . vaddps %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [0,1] DeeeeeeeeeeER vmulps (%rdi), %xmm1, %xmm2 +# CHECK: [0,0] .DeeeeeER . . vaddps %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [0,1] .DeeeeeeeeeeER vmulps (%rdi), %xmm1, %xmm2 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-2.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-2.s @@ -9,7 +9,7 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 15 +# CHECK-NEXT: Total Cycles: 16 # CHECK-NEXT: Total uOps: 2 # CHECK: Dispatch Width: 4 @@ -30,11 +30,11 @@ # CHECK-NEXT: 1 8 4.00 * imull (%rdi) # CHECK: Timeline view: -# CHECK-NEXT: 01234 +# CHECK-NEXT: 012345 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeER . . imull %esi -# CHECK-NEXT: [0,1] D====eeeeeeeeER imull (%rdi) +# CHECK: [0,0] .DeeeeER . . imull %esi +# CHECK-NEXT: [0,1] .D====eeeeeeeeER imull (%rdi) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-3.s b/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-3.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-3.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/read-advance-3.s @@ -7,12 +7,12 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 3 -# CHECK-NEXT: Total Cycles: 8 +# CHECK-NEXT: Total Cycles: 9 # CHECK-NEXT: Total uOps: 3 # CHECK: Dispatch Width: 3 -# CHECK-NEXT: uOps Per Cycle: 0.38 -# CHECK-NEXT: IPC: 0.38 +# CHECK-NEXT: uOps Per Cycle: 0.33 +# CHECK-NEXT: IPC: 0.33 # CHECK-NEXT: Block RThroughput: 3.0 # CHECK: Instruction Info: @@ -29,11 +29,11 @@ # CHECK-NEXT: 1 1 1.00 addq %rdx, %r8 # CHECK: Timeline view: -# CHECK-NEXT: Index 01234567 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . addq %rdi, %rsi -# CHECK-NEXT: [0,1] DeeeeeER addq (%rsp), %rsi -# CHECK-NEXT: [0,2] D==eE--R addq %rdx, %r8 +# CHECK: [0,0] .DeER. . addq %rdi, %rsi +# CHECK-NEXT: [0,1] .DeeeeeER addq (%rsp), %rsi +# CHECK-NEXT: [0,2] .D==eE--R addq %rdx, %r8 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-1.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-1.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-1.s @@ -10,12 +10,12 @@ # CHECK: Iterations: 3 # CHECK-NEXT: Instructions: 9 -# CHECK-NEXT: Total Cycles: 13 +# CHECK-NEXT: Total Cycles: 14 # CHECK-NEXT: Total uOps: 9 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.69 -# CHECK-NEXT: IPC: 0.69 +# CHECK-NEXT: uOps Per Cycle: 0.64 +# CHECK-NEXT: IPC: 0.64 # CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Instruction Info: @@ -81,18 +81,18 @@ # CHECK-NEXT: - - - - - - - - 0.67 0.33 - - - - 1.00 - - - - - - - - vaddps %xmm1, %xmm1, %xmm2 # CHECK: Timeline view: -# CHECK-NEXT: 012 +# CHECK-NEXT: 0123 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DR . . . vxorps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [0,1] DeER . . . vmovaps %xmm0, %xmm1 -# CHECK-NEXT: [0,2] D=eeeeeER . . vaddps %xmm1, %xmm1, %xmm2 -# CHECK-NEXT: [1,0] D-------R . . vxorps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [1,1] .D=eE---R . . vmovaps %xmm0, %xmm1 -# CHECK-NEXT: [1,2] .D===eeeeeER. vaddps %xmm1, %xmm1, %xmm2 -# CHECK-NEXT: [2,0] .D---------R. vxorps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [2,1] .D=eE------R. vmovaps %xmm0, %xmm1 -# CHECK-NEXT: [2,2] . D===eeeeeER vaddps %xmm1, %xmm1, %xmm2 +# CHECK: [0,0] .DR . . . vxorps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [0,1] .DeER. . . vmovaps %xmm0, %xmm1 +# CHECK-NEXT: [0,2] .D=eeeeeER. . vaddps %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [1,0] .D-------R. . vxorps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [1,1] . D=eE---R. . vmovaps %xmm0, %xmm1 +# CHECK-NEXT: [1,2] . D===eeeeeER. vaddps %xmm1, %xmm1, %xmm2 +# CHECK-NEXT: [2,0] . D---------R. vxorps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [2,1] . D=eE------R. vmovaps %xmm0, %xmm1 +# CHECK-NEXT: [2,2] . D===eeeeeER vaddps %xmm1, %xmm1, %xmm2 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-2.s @@ -14,12 +14,12 @@ # CHECK: Iterations: 3 # CHECK-NEXT: Instructions: 27 -# CHECK-NEXT: Total Cycles: 17 +# CHECK-NEXT: Total Cycles: 18 # CHECK-NEXT: Total uOps: 27 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.59 -# CHECK-NEXT: IPC: 1.59 +# CHECK-NEXT: uOps Per Cycle: 1.50 +# CHECK-NEXT: IPC: 1.50 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: @@ -97,36 +97,36 @@ # CHECK-NEXT: - - - - - - - - - - 0.67 1.33 - - 0.33 0.67 - - - - - - - movdqu %xmm5, %xmm0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456 +# CHECK-NEXT: 01234567 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DR . . .. pxor %mm0, %mm0 -# CHECK-NEXT: [0,1] DeeER. . .. movq %mm0, %mm1 -# CHECK-NEXT: [0,2] D---R. . .. xorps %xmm0, %xmm0 -# CHECK-NEXT: [0,3] DeE-R. . .. movaps %xmm0, %xmm1 -# CHECK-NEXT: [0,4] .DeER. . .. movups %xmm1, %xmm2 -# CHECK-NEXT: [0,5] .D=eER . .. movapd %xmm2, %xmm3 -# CHECK-NEXT: [0,6] .D==eER . .. movupd %xmm3, %xmm4 -# CHECK-NEXT: [0,7] .D===eER . .. movdqa %xmm4, %xmm5 -# CHECK-NEXT: [0,8] . D===eER . .. movdqu %xmm5, %xmm0 -# CHECK-NEXT: [1,0] . D-----R . .. pxor %mm0, %mm0 -# CHECK-NEXT: [1,1] . DeeE--R . .. movq %mm0, %mm1 -# CHECK-NEXT: [1,2] . D-----R . .. xorps %xmm0, %xmm0 -# CHECK-NEXT: [1,3] . D=eE--R. .. movaps %xmm0, %xmm1 -# CHECK-NEXT: [1,4] . D==eE-R. .. movups %xmm1, %xmm2 -# CHECK-NEXT: [1,5] . D===eER. .. movapd %xmm2, %xmm3 -# CHECK-NEXT: [1,6] . D====eER .. movupd %xmm3, %xmm4 -# CHECK-NEXT: [1,7] . D====eER .. movdqa %xmm4, %xmm5 -# CHECK-NEXT: [1,8] . D=====eER .. movdqu %xmm5, %xmm0 -# CHECK-NEXT: [2,0] . D-------R .. pxor %mm0, %mm0 -# CHECK-NEXT: [2,1] . D==eeE--R .. movq %mm0, %mm1 -# CHECK-NEXT: [2,2] . D------R .. xorps %xmm0, %xmm0 -# CHECK-NEXT: [2,3] . D===eE--R .. movaps %xmm0, %xmm1 -# CHECK-NEXT: [2,4] . D====eE-R .. movups %xmm1, %xmm2 -# CHECK-NEXT: [2,5] . D=====eER .. movapd %xmm2, %xmm3 -# CHECK-NEXT: [2,6] . .D=====eER.. movupd %xmm3, %xmm4 -# CHECK-NEXT: [2,7] . .D======eER. movdqa %xmm4, %xmm5 -# CHECK-NEXT: [2,8] . .D=======eER movdqu %xmm5, %xmm0 +# CHECK: [0,0] .DR . . . . pxor %mm0, %mm0 +# CHECK-NEXT: [0,1] .DeeER . . . movq %mm0, %mm1 +# CHECK-NEXT: [0,2] .D---R . . . xorps %xmm0, %xmm0 +# CHECK-NEXT: [0,3] .DeE-R . . . movaps %xmm0, %xmm1 +# CHECK-NEXT: [0,4] . DeER . . . movups %xmm1, %xmm2 +# CHECK-NEXT: [0,5] . D=eER . . . movapd %xmm2, %xmm3 +# CHECK-NEXT: [0,6] . D==eER . . . movupd %xmm3, %xmm4 +# CHECK-NEXT: [0,7] . D===eER . . . movdqa %xmm4, %xmm5 +# CHECK-NEXT: [0,8] . D===eER. . . movdqu %xmm5, %xmm0 +# CHECK-NEXT: [1,0] . D-----R. . . pxor %mm0, %mm0 +# CHECK-NEXT: [1,1] . DeeE--R. . . movq %mm0, %mm1 +# CHECK-NEXT: [1,2] . D-----R. . . xorps %xmm0, %xmm0 +# CHECK-NEXT: [1,3] . D=eE--R . . movaps %xmm0, %xmm1 +# CHECK-NEXT: [1,4] . D==eE-R . . movups %xmm1, %xmm2 +# CHECK-NEXT: [1,5] . D===eER . . movapd %xmm2, %xmm3 +# CHECK-NEXT: [1,6] . D====eER . . movupd %xmm3, %xmm4 +# CHECK-NEXT: [1,7] . D====eER . . movdqa %xmm4, %xmm5 +# CHECK-NEXT: [1,8] . D=====eER . . movdqu %xmm5, %xmm0 +# CHECK-NEXT: [2,0] . D-------R . . pxor %mm0, %mm0 +# CHECK-NEXT: [2,1] . D==eeE--R . . movq %mm0, %mm1 +# CHECK-NEXT: [2,2] . .D------R . . xorps %xmm0, %xmm0 +# CHECK-NEXT: [2,3] . .D===eE--R. . movaps %xmm0, %xmm1 +# CHECK-NEXT: [2,4] . .D====eE-R. . movups %xmm1, %xmm2 +# CHECK-NEXT: [2,5] . .D=====eER. . movapd %xmm2, %xmm3 +# CHECK-NEXT: [2,6] . . D=====eER . movupd %xmm3, %xmm4 +# CHECK-NEXT: [2,7] . . D======eER. movdqa %xmm4, %xmm5 +# CHECK-NEXT: [2,8] . . D=======eER movdqu %xmm5, %xmm0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s b/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-3.s @@ -11,12 +11,12 @@ # CHECK: Iterations: 3 # CHECK-NEXT: Instructions: 21 -# CHECK-NEXT: Total Cycles: 17 +# CHECK-NEXT: Total Cycles: 18 # CHECK-NEXT: Total uOps: 21 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.24 -# CHECK-NEXT: IPC: 1.24 +# CHECK-NEXT: uOps Per Cycle: 1.17 +# CHECK-NEXT: IPC: 1.17 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: @@ -90,30 +90,30 @@ # CHECK-NEXT: - - - - - - - - - - 2.00 - - - 0.33 0.67 - - - - - - - vmovdqu %xmm5, %xmm0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456 +# CHECK-NEXT: 01234567 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DR . . .. vxorps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [0,1] DeER . . .. vmovaps %xmm0, %xmm1 -# CHECK-NEXT: [0,2] D=eER. . .. vmovups %xmm1, %xmm2 -# CHECK-NEXT: [0,3] D==eER . .. vmovapd %xmm2, %xmm3 -# CHECK-NEXT: [0,4] .D==eER . .. vmovupd %xmm3, %xmm4 -# CHECK-NEXT: [0,5] .D===eER . .. vmovdqa %xmm4, %xmm5 -# CHECK-NEXT: [0,6] .D====eER . .. vmovdqu %xmm5, %xmm0 -# CHECK-NEXT: [1,0] .D------R . .. vxorps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [1,1] . D==eE-R . .. vmovaps %xmm0, %xmm1 -# CHECK-NEXT: [1,2] . D===eER . .. vmovups %xmm1, %xmm2 -# CHECK-NEXT: [1,3] . D====eER. .. vmovapd %xmm2, %xmm3 -# CHECK-NEXT: [1,4] . D=====eER .. vmovupd %xmm3, %xmm4 -# CHECK-NEXT: [1,5] . D=====eER .. vmovdqa %xmm4, %xmm5 -# CHECK-NEXT: [1,6] . D======eER .. vmovdqu %xmm5, %xmm0 -# CHECK-NEXT: [2,0] . D--------R .. vxorps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [2,1] . D=====eE-R .. vmovaps %xmm0, %xmm1 -# CHECK-NEXT: [2,2] . D=====eER .. vmovups %xmm1, %xmm2 -# CHECK-NEXT: [2,3] . D======eER .. vmovapd %xmm2, %xmm3 -# CHECK-NEXT: [2,4] . D=======eER.. vmovupd %xmm3, %xmm4 -# CHECK-NEXT: [2,5] . D========eER. vmovdqa %xmm4, %xmm5 -# CHECK-NEXT: [2,6] . D========eER vmovdqu %xmm5, %xmm0 +# CHECK: [0,0] .DR . . . . vxorps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [0,1] .DeER. . . . vmovaps %xmm0, %xmm1 +# CHECK-NEXT: [0,2] .D=eER . . . vmovups %xmm1, %xmm2 +# CHECK-NEXT: [0,3] .D==eER . . . vmovapd %xmm2, %xmm3 +# CHECK-NEXT: [0,4] . D==eER . . . vmovupd %xmm3, %xmm4 +# CHECK-NEXT: [0,5] . D===eER . . . vmovdqa %xmm4, %xmm5 +# CHECK-NEXT: [0,6] . D====eER. . . vmovdqu %xmm5, %xmm0 +# CHECK-NEXT: [1,0] . D------R. . . vxorps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [1,1] . D==eE-R. . . vmovaps %xmm0, %xmm1 +# CHECK-NEXT: [1,2] . D===eER. . . vmovups %xmm1, %xmm2 +# CHECK-NEXT: [1,3] . D====eER . . vmovapd %xmm2, %xmm3 +# CHECK-NEXT: [1,4] . D=====eER . . vmovupd %xmm3, %xmm4 +# CHECK-NEXT: [1,5] . D=====eER . . vmovdqa %xmm4, %xmm5 +# CHECK-NEXT: [1,6] . D======eER . . vmovdqu %xmm5, %xmm0 +# CHECK-NEXT: [2,0] . D--------R . . vxorps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [2,1] . D=====eE-R . . vmovaps %xmm0, %xmm1 +# CHECK-NEXT: [2,2] . D=====eER . . vmovups %xmm1, %xmm2 +# CHECK-NEXT: [2,3] . D======eER. . vmovapd %xmm2, %xmm3 +# CHECK-NEXT: [2,4] . D=======eER . vmovupd %xmm3, %xmm4 +# CHECK-NEXT: [2,5] . D========eER. vmovdqa %xmm4, %xmm5 +# CHECK-NEXT: [2,6] . .D========eER vmovdqu %xmm5, %xmm0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-4.s b/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-4.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-4.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-4.s @@ -9,12 +9,12 @@ # CHECK: Iterations: 3 # CHECK-NEXT: Instructions: 15 -# CHECK-NEXT: Total Cycles: 15 +# CHECK-NEXT: Total Cycles: 16 # CHECK-NEXT: Total uOps: 15 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.00 -# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: uOps Per Cycle: 0.94 +# CHECK-NEXT: IPC: 0.94 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: @@ -84,24 +84,24 @@ # CHECK-NEXT: - - - - - 2.00 - - - - - - - - - - - - - - - - - movl %edx, %eax # CHECK: Timeline view: -# CHECK-NEXT: 01234 +# CHECK-NEXT: 012345 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DR . . . xorl %eax, %eax -# CHECK-NEXT: [0,1] DeER . . . movl %eax, %ebx -# CHECK-NEXT: [0,2] D=eER. . . movl %ebx, %ecx -# CHECK-NEXT: [0,3] D==eER . . movl %ecx, %edx -# CHECK-NEXT: [0,4] .D==eER . . movl %edx, %eax -# CHECK-NEXT: [1,0] .D----R . . xorl %eax, %eax -# CHECK-NEXT: [1,1] .D===eER . . movl %eax, %ebx -# CHECK-NEXT: [1,2] .D====eER . . movl %ebx, %ecx -# CHECK-NEXT: [1,3] . D====eER. . movl %ecx, %edx -# CHECK-NEXT: [1,4] . D=====eER . movl %edx, %eax -# CHECK-NEXT: [2,0] . D-------R . xorl %eax, %eax -# CHECK-NEXT: [2,1] . D======eER . movl %eax, %ebx -# CHECK-NEXT: [2,2] . D======eER . movl %ebx, %ecx -# CHECK-NEXT: [2,3] . D=======eER. movl %ecx, %edx -# CHECK-NEXT: [2,4] . D========eER movl %edx, %eax +# CHECK: [0,0] .DR . . . xorl %eax, %eax +# CHECK-NEXT: [0,1] .DeER. . . movl %eax, %ebx +# CHECK-NEXT: [0,2] .D=eER . . movl %ebx, %ecx +# CHECK-NEXT: [0,3] .D==eER . . movl %ecx, %edx +# CHECK-NEXT: [0,4] . D==eER . . movl %edx, %eax +# CHECK-NEXT: [1,0] . D----R . . xorl %eax, %eax +# CHECK-NEXT: [1,1] . D===eER . . movl %eax, %ebx +# CHECK-NEXT: [1,2] . D====eER. . movl %ebx, %ecx +# CHECK-NEXT: [1,3] . D====eER . movl %ecx, %edx +# CHECK-NEXT: [1,4] . D=====eER . movl %edx, %eax +# CHECK-NEXT: [2,0] . D-------R . xorl %eax, %eax +# CHECK-NEXT: [2,1] . D======eER . movl %eax, %ebx +# CHECK-NEXT: [2,2] . D======eER . movl %ebx, %ecx +# CHECK-NEXT: [2,3] . D=======eER. movl %ecx, %edx +# CHECK-NEXT: [2,4] . D========eER movl %edx, %eax # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-5.s b/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-5.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-5.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/reg-move-elimination-5.s @@ -9,12 +9,12 @@ # CHECK: Iterations: 3 # CHECK-NEXT: Instructions: 15 -# CHECK-NEXT: Total Cycles: 15 +# CHECK-NEXT: Total Cycles: 16 # CHECK-NEXT: Total uOps: 15 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.00 -# CHECK-NEXT: IPC: 1.00 +# CHECK-NEXT: uOps Per Cycle: 0.94 +# CHECK-NEXT: IPC: 0.94 # CHECK-NEXT: Block RThroughput: 4.0 # CHECK: Instruction Info: @@ -84,24 +84,24 @@ # CHECK-NEXT: - - - - - 2.00 - - - - - - - - - - - - - - - - - movq %rdx, %rax # CHECK: Timeline view: -# CHECK-NEXT: 01234 +# CHECK-NEXT: 012345 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DR . . . xorq %rax, %rax -# CHECK-NEXT: [0,1] DeER . . . movq %rax, %rbx -# CHECK-NEXT: [0,2] D=eER. . . movq %rbx, %rcx -# CHECK-NEXT: [0,3] D==eER . . movq %rcx, %rdx -# CHECK-NEXT: [0,4] .D==eER . . movq %rdx, %rax -# CHECK-NEXT: [1,0] .D----R . . xorq %rax, %rax -# CHECK-NEXT: [1,1] .D===eER . . movq %rax, %rbx -# CHECK-NEXT: [1,2] .D====eER . . movq %rbx, %rcx -# CHECK-NEXT: [1,3] . D====eER. . movq %rcx, %rdx -# CHECK-NEXT: [1,4] . D=====eER . movq %rdx, %rax -# CHECK-NEXT: [2,0] . D-------R . xorq %rax, %rax -# CHECK-NEXT: [2,1] . D======eER . movq %rax, %rbx -# CHECK-NEXT: [2,2] . D======eER . movq %rbx, %rcx -# CHECK-NEXT: [2,3] . D=======eER. movq %rcx, %rdx -# CHECK-NEXT: [2,4] . D========eER movq %rdx, %rax +# CHECK: [0,0] .DR . . . xorq %rax, %rax +# CHECK-NEXT: [0,1] .DeER. . . movq %rax, %rbx +# CHECK-NEXT: [0,2] .D=eER . . movq %rbx, %rcx +# CHECK-NEXT: [0,3] .D==eER . . movq %rcx, %rdx +# CHECK-NEXT: [0,4] . D==eER . . movq %rdx, %rax +# CHECK-NEXT: [1,0] . D----R . . xorq %rax, %rax +# CHECK-NEXT: [1,1] . D===eER . . movq %rax, %rbx +# CHECK-NEXT: [1,2] . D====eER. . movq %rbx, %rcx +# CHECK-NEXT: [1,3] . D====eER . movq %rcx, %rdx +# CHECK-NEXT: [1,4] . D=====eER . movq %rdx, %rax +# CHECK-NEXT: [2,0] . D-------R . xorq %rax, %rax +# CHECK-NEXT: [2,1] . D======eER . movq %rax, %rbx +# CHECK-NEXT: [2,2] . D======eER . movq %rbx, %rcx +# CHECK-NEXT: [2,3] . D=======eER. movq %rcx, %rdx +# CHECK-NEXT: [2,4] . D========eER movq %rdx, %rax # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-1.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-1.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-1.s @@ -6,7 +6,7 @@ # CHECK: Iterations: 5 # CHECK-NEXT: Instructions: 10 -# CHECK-NEXT: Total Cycles: 53 +# CHECK-NEXT: Total Cycles: 54 # CHECK-NEXT: Total uOps: 10 # CHECK: Dispatch Width: 4 @@ -24,9 +24,9 @@ # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 50 (94.3%) +# CHECK-NEXT: 0, 51 (94.4%) # CHECK-NEXT: 2, 1 (1.9%) -# CHECK-NEXT: 4, 2 (3.8%) +# CHECK-NEXT: 4, 2 (3.7%) # CHECK: Register File statistics: # CHECK-NEXT: Total number of mappings created: 10 @@ -77,19 +77,19 @@ # CHECK-NEXT: - - - - - - - - 1.00 - - - - - - 1.00 - - - - - - - vmulps %xmm0, %xmm0, %xmm0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 012 +# CHECK-NEXT: 0123456789 0123456789 0123 # CHECK-NEXT: Index 0123456789 0123456789 0123456789 -# CHECK: [0,0] DeeeeeER . . . . . . . . . . vaddps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [0,1] D=====eeeeeER . . . . . . . . . vmulps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [1,0] D==========eeeeeER . . . . . . . . vaddps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [1,1] D===============eeeeeER . . . . . . . vmulps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [2,0] .D===================eeeeeER . . . . . . vaddps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [2,1] .D========================eeeeeER . . . . . vmulps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [3,0] .D=============================eeeeeER . . . . vaddps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [3,1] .D==================================eeeeeER . . . vmulps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [4,0] . D======================================eeeeeER . . vaddps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [4,1] . D===========================================eeeeeER vmulps %xmm0, %xmm0, %xmm0 +# CHECK: [0,0] .DeeeeeER . . . . . . . . . . vaddps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [0,1] .D=====eeeeeER . . . . . . . . . vmulps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [1,0] .D==========eeeeeER . . . . . . . . vaddps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [1,1] .D===============eeeeeER . . . . . . . vmulps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [2,0] . D===================eeeeeER . . . . . . vaddps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [2,1] . D========================eeeeeER . . . . . vmulps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [3,0] . D=============================eeeeeER . . . . vaddps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [3,1] . D==================================eeeeeER . . . vmulps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [4,0] . D======================================eeeeeER . . vaddps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [4,1] . D===========================================eeeeeER vmulps %xmm0, %xmm0, %xmm0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-2.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-2.s @@ -6,7 +6,7 @@ # CHECK: Iterations: 5 # CHECK-NEXT: Instructions: 10 -# CHECK-NEXT: Total Cycles: 53 +# CHECK-NEXT: Total Cycles: 54 # CHECK-NEXT: Total uOps: 10 # CHECK: Dispatch Width: 4 @@ -15,7 +15,7 @@ # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Dynamic Dispatch Stall Cycles: -# CHECK-NEXT: RAT - Register unavailable: 26 (49.1%) +# CHECK-NEXT: RAT - Register unavailable: 26 (48.1%) # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 @@ -24,8 +24,8 @@ # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 46 (86.8%) -# CHECK-NEXT: 1, 6 (11.3%) +# CHECK-NEXT: 0, 47 (87.0%) +# CHECK-NEXT: 1, 6 (11.1%) # CHECK-NEXT: 4, 1 (1.9%) # CHECK: Register File statistics: @@ -77,19 +77,19 @@ # CHECK-NEXT: - - - - - - - - 1.00 - - - - - - 1.00 - - - - - - - vmulps %xmm0, %xmm0, %xmm0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0123456789 012 +# CHECK-NEXT: 0123456789 0123456789 0123 # CHECK-NEXT: Index 0123456789 0123456789 0123456789 -# CHECK: [0,0] DeeeeeER . . . . . . . . . . vaddps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [0,1] D=====eeeeeER . . . . . . . . . vmulps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [1,0] D==========eeeeeER . . . . . . . . vaddps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [1,1] D===============eeeeeER . . . . . . . vmulps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [2,0] .D===================eeeeeER . . . . . . vaddps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [2,1] . . D==================eeeeeER . . . . . vmulps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [3,0] . . . D==================eeeeeER . . . . vaddps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [3,1] . . . . D==================eeeeeER . . . vmulps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [4,0] . . . . . D==================eeeeeER . . vaddps %xmm0, %xmm0, %xmm0 -# CHECK-NEXT: [4,1] . . . . . . D==================eeeeeER vmulps %xmm0, %xmm0, %xmm0 +# CHECK: [0,0] .DeeeeeER . . . . . . . . . . vaddps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [0,1] .D=====eeeeeER . . . . . . . . . vmulps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [1,0] .D==========eeeeeER . . . . . . . . vaddps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [1,1] .D===============eeeeeER . . . . . . . vmulps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [2,0] . D===================eeeeeER . . . . . . vaddps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [2,1] . . D==================eeeeeER . . . . . vmulps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [3,0] . . . D==================eeeeeER . . . . vaddps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [3,1] . . . . D==================eeeeeER . . . vmulps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [4,0] . . . . . D==================eeeeeER . . vaddps %xmm0, %xmm0, %xmm0 +# CHECK-NEXT: [4,1] . . . . . . D==================eeeeeER vmulps %xmm0, %xmm0, %xmm0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-3.s b/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-3.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-3.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-3.s @@ -5,11 +5,11 @@ # CHECK: Iterations: 2 # CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 42 +# CHECK-NEXT: Total Cycles: 43 # CHECK-NEXT: Total uOps: 4 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.10 +# CHECK-NEXT: uOps Per Cycle: 0.09 # CHECK-NEXT: IPC: 0.05 # CHECK-NEXT: Block RThroughput: 25.0 @@ -25,7 +25,7 @@ # CHECK-NEXT: 2 14 25.00 U idivl %eax # CHECK: Dynamic Dispatch Stall Cycles: -# CHECK-NEXT: RAT - Register unavailable: 16 (38.1%) +# CHECK-NEXT: RAT - Register unavailable: 16 (37.2%) # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 @@ -34,8 +34,8 @@ # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 40 (95.2%) -# CHECK-NEXT: 2, 2 (4.8%) +# CHECK-NEXT: 0, 41 (95.3%) +# CHECK-NEXT: 2, 2 (4.7%) # CHECK: Register File statistics: # CHECK-NEXT: Total number of mappings created: 6 @@ -86,10 +86,10 @@ # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 01 +# CHECK-NEXT: Index 0123456789 0123456789 012 -# CHECK: [0,0] DeeeeeeeeeeeeeeER . . . . .. idivl %eax -# CHECK-NEXT: [1,0] . . . .D=========eeeeeeeeeeeeeeER idivl %eax +# CHECK: [0,0] .DeeeeeeeeeeeeeeER . . . . . . idivl %eax +# CHECK-NEXT: [1,0] . . . . D=========eeeeeeeeeeeeeeER idivl %eax # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-4.s b/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-4.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-4.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-4.s @@ -5,7 +5,7 @@ # CHECK: Iterations: 22 # CHECK-NEXT: Instructions: 22 -# CHECK-NEXT: Total Cycles: 542 +# CHECK-NEXT: Total Cycles: 543 # CHECK-NEXT: Total uOps: 44 # CHECK: Dispatch Width: 4 @@ -34,7 +34,7 @@ # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 531 (98.0%) +# CHECK-NEXT: 0, 532 (98.0%) # CHECK-NEXT: 4, 11 (2.0%) # CHECK: Register File statistics: @@ -53,11 +53,11 @@ # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 01234567 -# CHECK: [0,0] DeeeeeeeeeeeeeeER . . . . . . . . . .. idivl %eax -# CHECK-NEXT: [1,0] D=========================eeeeeeeeeeeeeeER . . . . .. idivl %eax -# CHECK-NEXT: [2,0] .D=================================================eeeeeeeeeeeeeeER idivl %eax +# CHECK: [0,0] .DeeeeeeeeeeeeeeER . . . . . . . . . . . idivl %eax +# CHECK-NEXT: [1,0] .D=========================eeeeeeeeeeeeeeER . . . . . . idivl %eax +# CHECK-NEXT: [2,0] . D=================================================eeeeeeeeeeeeeeER idivl %eax # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-5.s b/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-5.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-5.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/register-files-5.s @@ -37,12 +37,12 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 33 -# CHECK-NEXT: Total Cycles: 48 +# CHECK-NEXT: Total Cycles: 49 # CHECK-NEXT: Total uOps: 66 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.38 -# CHECK-NEXT: IPC: 0.69 +# CHECK-NEXT: uOps Per Cycle: 1.35 +# CHECK-NEXT: IPC: 0.67 # CHECK-NEXT: Block RThroughput: 41.0 # CHECK: Dynamic Dispatch Stall Cycles: @@ -55,9 +55,9 @@ # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 31 (64.6%) -# CHECK-NEXT: 2, 1 (2.1%) -# CHECK-NEXT: 4, 16 (33.3%) +# CHECK-NEXT: 0, 32 (65.3%) +# CHECK-NEXT: 2, 1 (2.0%) +# CHECK-NEXT: 4, 16 (32.7%) # CHECK: Register File statistics: # CHECK-NEXT: Total number of mappings created: 66 @@ -75,41 +75,41 @@ # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 01234567 +# CHECK-NEXT: Index 0123456789 0123456789 012345678 -# CHECK: [0,0] DeeeeeeeeeER . . . . . . . . vdivps %ymm0, %ymm0, %ymm1 -# CHECK-NEXT: [0,1] DeeeeeE----R . . . . . . . . vaddps %ymm0, %ymm0, %ymm2 -# CHECK-NEXT: [0,2] .D=eeeeeE--R . . . . . . . . vaddps %ymm0, %ymm0, %ymm3 -# CHECK-NEXT: [0,3] .D===eeeeeER . . . . . . . . vaddps %ymm0, %ymm0, %ymm4 -# CHECK-NEXT: [0,4] . D====eeeeeER . . . . . . . . vaddps %ymm0, %ymm0, %ymm5 -# CHECK-NEXT: [0,5] . D======eeeeeER . . . . . . . vaddps %ymm0, %ymm0, %ymm6 -# CHECK-NEXT: [0,6] . D=======eeeeeER . . . . . . . vaddps %ymm0, %ymm0, %ymm7 -# CHECK-NEXT: [0,7] . D===========eeeeeER . . . . . . vaddps %ymm0, %ymm0, %ymm8 -# CHECK-NEXT: [0,8] . D============eeeeeER . . . . . . vaddps %ymm0, %ymm0, %ymm9 -# CHECK-NEXT: [0,9] . D==============eeeeeER . . . . . vaddps %ymm0, %ymm0, %ymm10 -# CHECK-NEXT: [0,10] . D==============eeeeeER . . . . . vaddps %ymm0, %ymm0, %ymm11 -# CHECK-NEXT: [0,11] . D===============eeeeeER . . . . . vaddps %ymm0, %ymm0, %ymm12 -# CHECK-NEXT: [0,12] . .D===============eeeeeER . . . . . vaddps %ymm0, %ymm0, %ymm13 -# CHECK-NEXT: [0,13] . .D=================eeeeeER . . . . vaddps %ymm0, %ymm0, %ymm14 -# CHECK-NEXT: [0,14] . . D=================eeeeeER . . . . vaddps %ymm0, %ymm0, %ymm15 -# CHECK-NEXT: [0,15] . . D=====eeeeeE------------R . . . . vaddps %ymm2, %ymm0, %ymm0 -# CHECK-NEXT: [0,16] . . D==============eeeeeE--R . . . . vaddps %ymm2, %ymm0, %ymm3 -# CHECK-NEXT: [0,17] . . D=================eeeeeER . . . . vaddps %ymm2, %ymm0, %ymm4 -# CHECK-NEXT: [0,18] . . D=================eeeeeER . . . . vaddps %ymm2, %ymm0, %ymm5 -# CHECK-NEXT: [0,19] . . D==================eeeeeER. . . . vaddps %ymm2, %ymm0, %ymm6 -# CHECK-NEXT: [0,20] . . D==================eeeeeER . . . vaddps %ymm2, %ymm0, %ymm7 -# CHECK-NEXT: [0,21] . . D===================eeeeeER . . . vaddps %ymm2, %ymm0, %ymm8 -# CHECK-NEXT: [0,22] . . .D===================eeeeeER . . . vaddps %ymm2, %ymm0, %ymm9 -# CHECK-NEXT: [0,23] . . .D====================eeeeeER . . . vaddps %ymm2, %ymm0, %ymm10 -# CHECK-NEXT: [0,24] . . . D====================eeeeeER. . . vaddps %ymm2, %ymm0, %ymm11 -# CHECK-NEXT: [0,25] . . . D=====================eeeeeER . . vaddps %ymm2, %ymm0, %ymm12 -# CHECK-NEXT: [0,26] . . . D=====================eeeeeER . . vaddps %ymm2, %ymm0, %ymm13 -# CHECK-NEXT: [0,27] . . . D======================eeeeeER . . vaddps %ymm2, %ymm0, %ymm14 -# CHECK-NEXT: [0,28] . . . D======================eeeeeER . . vaddps %ymm2, %ymm0, %ymm15 -# CHECK-NEXT: [0,29] . . . D=======================eeeeeER. . vaddps %ymm3, %ymm0, %ymm2 -# CHECK-NEXT: [0,30] . . . D=======================eeeeeER . vaddps %ymm3, %ymm0, %ymm4 -# CHECK-NEXT: [0,31] . . . D========================eeeeeER. vaddps %ymm3, %ymm0, %ymm5 -# CHECK-NEXT: [0,32] . . . .D========================eeeeeER vaddps %ymm3, %ymm0, %ymm6 +# CHECK: [0,0] .DeeeeeeeeeER . . . . . . . . vdivps %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: [0,1] .DeeeeeE----R . . . . . . . . vaddps %ymm0, %ymm0, %ymm2 +# CHECK-NEXT: [0,2] . D=eeeeeE--R . . . . . . . . vaddps %ymm0, %ymm0, %ymm3 +# CHECK-NEXT: [0,3] . D===eeeeeER . . . . . . . . vaddps %ymm0, %ymm0, %ymm4 +# CHECK-NEXT: [0,4] . D====eeeeeER. . . . . . . . vaddps %ymm0, %ymm0, %ymm5 +# CHECK-NEXT: [0,5] . D======eeeeeER . . . . . . . vaddps %ymm0, %ymm0, %ymm6 +# CHECK-NEXT: [0,6] . D=======eeeeeER . . . . . . . vaddps %ymm0, %ymm0, %ymm7 +# CHECK-NEXT: [0,7] . D===========eeeeeER . . . . . . vaddps %ymm0, %ymm0, %ymm8 +# CHECK-NEXT: [0,8] . D============eeeeeER. . . . . . vaddps %ymm0, %ymm0, %ymm9 +# CHECK-NEXT: [0,9] . D==============eeeeeER . . . . . vaddps %ymm0, %ymm0, %ymm10 +# CHECK-NEXT: [0,10] . .D==============eeeeeER . . . . . vaddps %ymm0, %ymm0, %ymm11 +# CHECK-NEXT: [0,11] . .D===============eeeeeER . . . . . vaddps %ymm0, %ymm0, %ymm12 +# CHECK-NEXT: [0,12] . . D===============eeeeeER. . . . . vaddps %ymm0, %ymm0, %ymm13 +# CHECK-NEXT: [0,13] . . D=================eeeeeER . . . . vaddps %ymm0, %ymm0, %ymm14 +# CHECK-NEXT: [0,14] . . D=================eeeeeER . . . . vaddps %ymm0, %ymm0, %ymm15 +# CHECK-NEXT: [0,15] . . D=====eeeeeE------------R . . . . vaddps %ymm2, %ymm0, %ymm0 +# CHECK-NEXT: [0,16] . . D==============eeeeeE--R . . . . vaddps %ymm2, %ymm0, %ymm3 +# CHECK-NEXT: [0,17] . . D=================eeeeeER . . . . vaddps %ymm2, %ymm0, %ymm4 +# CHECK-NEXT: [0,18] . . D=================eeeeeER. . . . vaddps %ymm2, %ymm0, %ymm5 +# CHECK-NEXT: [0,19] . . D==================eeeeeER . . . vaddps %ymm2, %ymm0, %ymm6 +# CHECK-NEXT: [0,20] . . .D==================eeeeeER . . . vaddps %ymm2, %ymm0, %ymm7 +# CHECK-NEXT: [0,21] . . .D===================eeeeeER . . . vaddps %ymm2, %ymm0, %ymm8 +# CHECK-NEXT: [0,22] . . . D===================eeeeeER . . . vaddps %ymm2, %ymm0, %ymm9 +# CHECK-NEXT: [0,23] . . . D====================eeeeeER. . . vaddps %ymm2, %ymm0, %ymm10 +# CHECK-NEXT: [0,24] . . . D====================eeeeeER . . vaddps %ymm2, %ymm0, %ymm11 +# CHECK-NEXT: [0,25] . . . D=====================eeeeeER . . vaddps %ymm2, %ymm0, %ymm12 +# CHECK-NEXT: [0,26] . . . D=====================eeeeeER . . vaddps %ymm2, %ymm0, %ymm13 +# CHECK-NEXT: [0,27] . . . D======================eeeeeER . . vaddps %ymm2, %ymm0, %ymm14 +# CHECK-NEXT: [0,28] . . . D======================eeeeeER. . vaddps %ymm2, %ymm0, %ymm15 +# CHECK-NEXT: [0,29] . . . D=======================eeeeeER . vaddps %ymm3, %ymm0, %ymm2 +# CHECK-NEXT: [0,30] . . . .D=======================eeeeeER . vaddps %ymm3, %ymm0, %ymm4 +# CHECK-NEXT: [0,31] . . . .D========================eeeeeER. vaddps %ymm3, %ymm0, %ymm5 +# CHECK-NEXT: [0,32] . . . . D========================eeeeeER vaddps %ymm3, %ymm0, %ymm6 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/X86/BdVer2/scheduler-queue-usage.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/scheduler-queue-usage.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/scheduler-queue-usage.s @@ -6,12 +6,12 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 13 +# CHECK-NEXT: Total Cycles: 14 # CHECK-NEXT: Total uOps: 2 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.15 -# CHECK-NEXT: IPC: 0.15 +# CHECK-NEXT: uOps Per Cycle: 0.14 +# CHECK-NEXT: IPC: 0.14 # CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Instruction Info: @@ -28,8 +28,8 @@ # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 12 (92.3%) -# CHECK-NEXT: 2, 1 (7.7%) +# CHECK-NEXT: 0, 13 (92.9%) +# CHECK-NEXT: 2, 1 (7.1%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/simple-test.s b/llvm/test/tools/llvm-mca/X86/BdVer2/simple-test.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/simple-test.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/simple-test.s @@ -5,12 +5,12 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 100 -# CHECK-NEXT: Total Cycles: 103 +# CHECK-NEXT: Total Cycles: 104 # CHECK-NEXT: Total uOps: 100 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.97 -# CHECK-NEXT: IPC: 0.97 +# CHECK-NEXT: uOps Per Cycle: 0.96 +# CHECK-NEXT: IPC: 0.96 # CHECK-NEXT: Block RThroughput: 1.0 # CHECK: Instruction Info: diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s b/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/store-throughput.s @@ -54,7 +54,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 403 +# CHECK-NEXT: Total Cycles: 404 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 @@ -81,19 +81,19 @@ # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 371 (92.1%) +# CHECK-NEXT: SQ - Store queue full: 371 (91.8%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 24 (6.0%) -# CHECK-NEXT: 1, 372 (92.3%) +# CHECK-NEXT: 0, 25 (6.2%) +# CHECK-NEXT: 1, 372 (92.1%) # CHECK-NEXT: 4, 7 (1.7%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 3 (0.7%) -# CHECK-NEXT: 1, 400 (99.3%) +# CHECK-NEXT: 0, 4 (1.0%) +# CHECK-NEXT: 1, 400 (99.0%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -144,12 +144,12 @@ # CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - - - 1.00 movb %dil, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456 +# CHECK-NEXT: Index 01234567 -# CHECK: [0,0] DeER .. movb %spl, (%rax) -# CHECK-NEXT: [0,1] D=eER.. movb %bpl, (%rcx) -# CHECK-NEXT: [0,2] D==eER. movb %sil, (%rdx) -# CHECK-NEXT: [0,3] D===eER movb %dil, (%rbx) +# CHECK: [0,0] .DeER. . movb %spl, (%rax) +# CHECK-NEXT: [0,1] .D=eER . movb %bpl, (%rcx) +# CHECK-NEXT: [0,2] .D==eER. movb %sil, (%rdx) +# CHECK-NEXT: [0,3] .D===eER movb %dil, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -168,7 +168,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 403 +# CHECK-NEXT: Total Cycles: 404 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 @@ -195,19 +195,19 @@ # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 371 (92.1%) +# CHECK-NEXT: SQ - Store queue full: 371 (91.8%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 24 (6.0%) -# CHECK-NEXT: 1, 372 (92.3%) +# CHECK-NEXT: 0, 25 (6.2%) +# CHECK-NEXT: 1, 372 (92.1%) # CHECK-NEXT: 4, 7 (1.7%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 3 (0.7%) -# CHECK-NEXT: 1, 400 (99.3%) +# CHECK-NEXT: 0, 4 (1.0%) +# CHECK-NEXT: 1, 400 (99.0%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -258,12 +258,12 @@ # CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - - - 1.00 movw %di, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456 +# CHECK-NEXT: Index 01234567 -# CHECK: [0,0] DeER .. movw %sp, (%rax) -# CHECK-NEXT: [0,1] D=eER.. movw %bp, (%rcx) -# CHECK-NEXT: [0,2] D==eER. movw %si, (%rdx) -# CHECK-NEXT: [0,3] D===eER movw %di, (%rbx) +# CHECK: [0,0] .DeER. . movw %sp, (%rax) +# CHECK-NEXT: [0,1] .D=eER . movw %bp, (%rcx) +# CHECK-NEXT: [0,2] .D==eER. movw %si, (%rdx) +# CHECK-NEXT: [0,3] .D===eER movw %di, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -282,7 +282,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 403 +# CHECK-NEXT: Total Cycles: 404 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 @@ -309,19 +309,19 @@ # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 371 (92.1%) +# CHECK-NEXT: SQ - Store queue full: 371 (91.8%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 24 (6.0%) -# CHECK-NEXT: 1, 372 (92.3%) +# CHECK-NEXT: 0, 25 (6.2%) +# CHECK-NEXT: 1, 372 (92.1%) # CHECK-NEXT: 4, 7 (1.7%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 3 (0.7%) -# CHECK-NEXT: 1, 400 (99.3%) +# CHECK-NEXT: 0, 4 (1.0%) +# CHECK-NEXT: 1, 400 (99.0%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -372,12 +372,12 @@ # CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - - - 1.00 movl %edi, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456 +# CHECK-NEXT: Index 01234567 -# CHECK: [0,0] DeER .. movl %esp, (%rax) -# CHECK-NEXT: [0,1] D=eER.. movl %ebp, (%rcx) -# CHECK-NEXT: [0,2] D==eER. movl %esi, (%rdx) -# CHECK-NEXT: [0,3] D===eER movl %edi, (%rbx) +# CHECK: [0,0] .DeER. . movl %esp, (%rax) +# CHECK-NEXT: [0,1] .D=eER . movl %ebp, (%rcx) +# CHECK-NEXT: [0,2] .D==eER. movl %esi, (%rdx) +# CHECK-NEXT: [0,3] .D===eER movl %edi, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -396,7 +396,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 403 +# CHECK-NEXT: Total Cycles: 404 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 @@ -423,19 +423,19 @@ # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 371 (92.1%) +# CHECK-NEXT: SQ - Store queue full: 371 (91.8%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 24 (6.0%) -# CHECK-NEXT: 1, 372 (92.3%) +# CHECK-NEXT: 0, 25 (6.2%) +# CHECK-NEXT: 1, 372 (92.1%) # CHECK-NEXT: 4, 7 (1.7%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 3 (0.7%) -# CHECK-NEXT: 1, 400 (99.3%) +# CHECK-NEXT: 0, 4 (1.0%) +# CHECK-NEXT: 1, 400 (99.0%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -486,12 +486,12 @@ # CHECK-NEXT: 1.00 - - - - - - - - - - - - - - - - - - - - - 1.00 movq %rdi, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 0123456 +# CHECK-NEXT: Index 01234567 -# CHECK: [0,0] DeER .. movq %rsp, (%rax) -# CHECK-NEXT: [0,1] D=eER.. movq %rbp, (%rcx) -# CHECK-NEXT: [0,2] D==eER. movq %rsi, (%rdx) -# CHECK-NEXT: [0,3] D===eER movq %rdi, (%rbx) +# CHECK: [0,0] .DeER. . movq %rsp, (%rax) +# CHECK-NEXT: [0,1] .D=eER . movq %rbp, (%rcx) +# CHECK-NEXT: [0,2] .D==eER. movq %rsi, (%rdx) +# CHECK-NEXT: [0,3] .D===eER movq %rdi, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -510,7 +510,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 803 +# CHECK-NEXT: Total Cycles: 804 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 @@ -537,19 +537,19 @@ # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 748 (93.2%) +# CHECK-NEXT: SQ - Store queue full: 748 (93.0%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 422 (52.6%) -# CHECK-NEXT: 1, 374 (46.6%) +# CHECK-NEXT: 0, 423 (52.6%) +# CHECK-NEXT: 1, 374 (46.5%) # CHECK-NEXT: 2, 1 (0.1%) # CHECK-NEXT: 4, 6 (0.7%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 403 (50.2%) +# CHECK-NEXT: 0, 404 (50.2%) # CHECK-NEXT: 1, 400 (49.8%) # CHECK: Scheduler's queue usage: @@ -601,13 +601,13 @@ # CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - - 3.00 - - - - - 1.00 movd %mm3, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: 0 +# CHECK-NEXT: 01 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . movd %mm0, (%rax) -# CHECK-NEXT: [0,1] D==eeER . movd %mm1, (%rcx) -# CHECK-NEXT: [0,2] D====eeER . movd %mm2, (%rdx) -# CHECK-NEXT: [0,3] D======eeER movd %mm3, (%rbx) +# CHECK: [0,0] .DeeER .. movd %mm0, (%rax) +# CHECK-NEXT: [0,1] .D==eeER .. movd %mm1, (%rcx) +# CHECK-NEXT: [0,2] .D====eeER.. movd %mm2, (%rdx) +# CHECK-NEXT: [0,3] .D======eeER movd %mm3, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -626,7 +626,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 602 +# CHECK-NEXT: Total Cycles: 603 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 @@ -653,20 +653,20 @@ # CHECK-NEXT: RCU - Retire tokens unavailable: 0 # CHECK-NEXT: SCHEDQ - Scheduler full: 0 # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 559 (92.9%) +# CHECK-NEXT: SQ - Store queue full: 559 (92.7%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 222 (36.9%) -# CHECK-NEXT: 1, 373 (62.0%) +# CHECK-NEXT: 0, 223 (37.0%) +# CHECK-NEXT: 1, 373 (61.9%) # CHECK-NEXT: 3, 1 (0.2%) # CHECK-NEXT: 4, 6 (1.0%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 202 (33.6%) -# CHECK-NEXT: 1, 400 (66.4%) +# CHECK-NEXT: 0, 203 (33.7%) +# CHECK-NEXT: 1, 400 (66.3%) # CHECK: Scheduler's queue usage: # CHECK-NEXT: [1] Resource name. @@ -717,12 +717,12 @@ # CHECK-NEXT: 1.00 - - - - - - - - - - - - 1.00 - - 3.00 - - - - - 1.00 movaps %xmm3, (%rbx) # CHECK: Timeline view: -# CHECK-NEXT: Index 01234567 +# CHECK-NEXT: Index 012345678 -# CHECK: [0,0] DeER . . movaps %xmm0, (%rax) -# CHECK-NEXT: [0,1] D=eER. . movaps %xmm1, (%rcx) -# CHECK-NEXT: [0,2] D===eER. movaps %xmm2, (%rdx) -# CHECK-NEXT: [0,3] D====eER movaps %xmm3, (%rbx) +# CHECK: [0,0] .DeER. . movaps %xmm0, (%rax) +# CHECK-NEXT: [0,1] .D=eER . movaps %xmm1, (%rcx) +# CHECK-NEXT: [0,2] .D===eER. movaps %xmm2, (%rdx) +# CHECK-NEXT: [0,3] .D====eER movaps %xmm3, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -741,7 +741,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 400 -# CHECK-NEXT: Total Cycles: 7170 +# CHECK-NEXT: Total Cycles: 7172 # CHECK-NEXT: Total uOps: 1600 # CHECK: Dispatch Width: 4 @@ -766,19 +766,19 @@ # CHECK: Dynamic Dispatch Stall Cycles: # CHECK-NEXT: RAT - Register unavailable: 0 # CHECK-NEXT: RCU - Retire tokens unavailable: 0 -# CHECK-NEXT: SCHEDQ - Scheduler full: 5777 (80.6%) +# CHECK-NEXT: SCHEDQ - Scheduler full: 5568 (77.6%) # CHECK-NEXT: LQ - Load queue full: 0 -# CHECK-NEXT: SQ - Store queue full: 561 (7.8%) +# CHECK-NEXT: SQ - Store queue full: 372 (5.2%) # CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0 # CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched: # CHECK-NEXT: [# dispatched], [# cycles] -# CHECK-NEXT: 0, 6770 (94.4%) +# CHECK-NEXT: 0, 6772 (94.4%) # CHECK-NEXT: 4, 400 (5.6%) # CHECK: Schedulers - number of cycles where we saw N micro opcodes issued: # CHECK-NEXT: [# issued], [# cycles] -# CHECK-NEXT: 0, 6770 (94.4%) +# CHECK-NEXT: 0, 6772 (94.4%) # CHECK-NEXT: 4, 400 (5.6%) # CHECK: Scheduler's queue usage: @@ -831,12 +831,12 @@ # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 01 +# CHECK-NEXT: Index 0123456789 0123456789 0123 -# CHECK: [0,0] DeER . . . . . . . .. vmovaps %ymm0, (%rax) -# CHECK-NEXT: [0,1] .D=eER . . . . . . .. vmovaps %ymm1, (%rcx) -# CHECK-NEXT: [0,2] . D==================================eER.. vmovaps %ymm2, (%rdx) -# CHECK-NEXT: [0,3] . D===================================eER vmovaps %ymm3, (%rbx) +# CHECK: [0,0] . DeER . . . . . . . . vmovaps %ymm0, (%rax) +# CHECK-NEXT: [0,1] . DeER . . . . . . . . vmovaps %ymm1, (%rcx) +# CHECK-NEXT: [0,2] . .D================================eER . vmovaps %ymm2, (%rdx) +# CHECK-NEXT: [0,3] . . D================================eER vmovaps %ymm3, (%rbx) # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -846,7 +846,7 @@ # CHECK: [0] [1] [2] [3] # CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps %ymm0, (%rax) -# CHECK-NEXT: 1. 1 2.0 1.0 0.0 vmovaps %ymm1, (%rcx) -# CHECK-NEXT: 2. 1 35.0 33.0 0.0 vmovaps %ymm2, (%rdx) -# CHECK-NEXT: 3. 1 36.0 1.0 0.0 vmovaps %ymm3, (%rbx) -# CHECK-NEXT: 1 18.5 9.0 0.0 +# CHECK-NEXT: 1. 1 1.0 1.0 0.0 vmovaps %ymm1, (%rcx) +# CHECK-NEXT: 2. 1 33.0 33.0 0.0 vmovaps %ymm2, (%rdx) +# CHECK-NEXT: 3. 1 33.0 1.0 0.0 vmovaps %ymm3, (%rbx) +# CHECK-NEXT: 1 17.0 9.0 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/vbroadcast-operand-latency.s b/llvm/test/tools/llvm-mca/X86/BdVer2/vbroadcast-operand-latency.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/vbroadcast-operand-latency.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/vbroadcast-operand-latency.s @@ -6,11 +6,11 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 200 -# CHECK-NEXT: Total Cycles: 206 +# CHECK-NEXT: Total Cycles: 207 # CHECK-NEXT: Total uOps: 300 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.46 +# CHECK-NEXT: uOps Per Cycle: 1.45 # CHECK-NEXT: IPC: 0.97 # CHECK-NEXT: Block RThroughput: 2.0 @@ -61,15 +61,15 @@ # CHECK-NEXT: 0.50 0.50 - - - - - - 2.00 2.00 - - - - 1.00 1.00 - - - 0.50 0.50 - - vbroadcastss (%rax), %ymm0 # CHECK: Timeline view: -# CHECK-NEXT: 012 +# CHECK-NEXT: 0123 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeER. . . leaq 8(%rsp,%rdi,2), %rax -# CHECK-NEXT: [0,1] DeeeeeeER . . vbroadcastss (%rax), %ymm0 -# CHECK-NEXT: [1,0] DeeE----R . . leaq 8(%rsp,%rdi,2), %rax -# CHECK-NEXT: [1,1] .DeeeeeeER. . vbroadcastss (%rax), %ymm0 -# CHECK-NEXT: [2,0] .D=eeE---R. . leaq 8(%rsp,%rdi,2), %rax -# CHECK-NEXT: [2,1] . D==eeeeeeER vbroadcastss (%rax), %ymm0 +# CHECK: [0,0] .DeeER . . leaq 8(%rsp,%rdi,2), %rax +# CHECK-NEXT: [0,1] .DeeeeeeER. . vbroadcastss (%rax), %ymm0 +# CHECK-NEXT: [1,0] .DeeE----R. . leaq 8(%rsp,%rdi,2), %rax +# CHECK-NEXT: [1,1] . DeeeeeeER . vbroadcastss (%rax), %ymm0 +# CHECK-NEXT: [2,0] . D=eeE---R . leaq 8(%rsp,%rdi,2), %rax +# CHECK-NEXT: [2,1] . D==eeeeeeER vbroadcastss (%rax), %ymm0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-1.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-1.s @@ -6,12 +6,12 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 10 +# CHECK-NEXT: Total Cycles: 11 # CHECK-NEXT: Total uOps: 2 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.20 -# CHECK-NEXT: IPC: 0.20 +# CHECK-NEXT: uOps Per Cycle: 0.18 +# CHECK-NEXT: IPC: 0.18 # CHECK-NEXT: Block RThroughput: 1.5 # CHECK: Instruction Info: @@ -27,10 +27,11 @@ # CHECK-NEXT: 1 7 1.50 * vandps (%rdi), %xmm1, %xmm2 # CHECK: Timeline view: +# CHECK-NEXT: 0 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . vaddps %xmm0, %xmm0, %xmm1 -# CHECK-NEXT: [0,1] DeeeeeeeER vandps (%rdi), %xmm1, %xmm2 +# CHECK: [0,0] .DeeeeeER . vaddps %xmm0, %xmm0, %xmm1 +# CHECK-NEXT: [0,1] .DeeeeeeeER vandps (%rdi), %xmm1, %xmm2 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-2.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/vec-logic-read-after-ld-2.s @@ -6,12 +6,12 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 2 -# CHECK-NEXT: Total Cycles: 10 +# CHECK-NEXT: Total Cycles: 11 # CHECK-NEXT: Total uOps: 4 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 0.40 -# CHECK-NEXT: IPC: 0.20 +# CHECK-NEXT: uOps Per Cycle: 0.36 +# CHECK-NEXT: IPC: 0.18 # CHECK-NEXT: Block RThroughput: 2.0 # CHECK: Instruction Info: @@ -27,10 +27,11 @@ # CHECK-NEXT: 2 7 1.50 * vandps (%rdi), %ymm1, %ymm2 # CHECK: Timeline view: +# CHECK-NEXT: 0 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . vaddps %ymm0, %ymm0, %ymm1 -# CHECK-NEXT: [0,1] DeeeeeeeER vandps (%rdi), %ymm1, %ymm2 +# CHECK: [0,0] .DeeeeeER . vaddps %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: [0,1] .DeeeeeeeER vandps (%rdi), %ymm1, %ymm2 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/xop-super-registers-1.s b/llvm/test/tools/llvm-mca/X86/BdVer2/xop-super-registers-1.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/xop-super-registers-1.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/xop-super-registers-1.s @@ -10,7 +10,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 600 -# CHECK-NEXT: Total Cycles: 550 +# CHECK-NEXT: Total Cycles: 551 # CHECK-NEXT: Total uOps: 1200 # CHECK: Dispatch Width: 4 @@ -74,20 +74,20 @@ # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 +# CHECK-NEXT: Index 0123456789 0123456789 0 -# CHECK: [0,0] DeeeeeER . . . . . . . vmulps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: [0,1] D=eeeeeeeeeeER . . . . . . vfrczpd %xmm1, %xmm2 -# CHECK-NEXT: [0,2] .D==========eeeeeER . . . . . vmulps %ymm2, %ymm3, %ymm4 -# CHECK-NEXT: [0,3] .D===============eeeeeER . . . . vaddps %ymm4, %ymm5, %ymm6 -# CHECK-NEXT: [0,4] . D===================eeeeeER . . . vmulps %ymm6, %ymm3, %ymm4 -# CHECK-NEXT: [0,5] . D========================eeeeeER . . vaddps %ymm4, %ymm5, %ymm0 -# CHECK-NEXT: [1,0] . D============================eeeeeER. vmulps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: [1,1] . DeeeeeeeeeeE-----------------------R. vfrczpd %xmm1, %xmm2 -# CHECK-NEXT: [1,2] . D==========eeeeeE-----------------R. vmulps %ymm2, %ymm3, %ymm4 -# CHECK-NEXT: [1,3] . D===============eeeeeE------------R. vaddps %ymm4, %ymm5, %ymm6 -# CHECK-NEXT: [1,4] . D===================eeeeeE--------R vmulps %ymm6, %ymm3, %ymm4 -# CHECK-NEXT: [1,5] . D========================eeeeeE---R vaddps %ymm4, %ymm5, %ymm0 +# CHECK: [0,0] .DeeeeeER . . . . . . . vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: [0,1] .D=eeeeeeeeeeER. . . . . . vfrczpd %xmm1, %xmm2 +# CHECK-NEXT: [0,2] . D==========eeeeeER. . . . . vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: [0,3] . D===============eeeeeER. . . . vaddps %ymm4, %ymm5, %ymm6 +# CHECK-NEXT: [0,4] . D===================eeeeeER. . . vmulps %ymm6, %ymm3, %ymm4 +# CHECK-NEXT: [0,5] . D========================eeeeeER. . vaddps %ymm4, %ymm5, %ymm0 +# CHECK-NEXT: [1,0] . D============================eeeeeER. vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: [1,1] . DeeeeeeeeeeE-----------------------R. vfrczpd %xmm1, %xmm2 +# CHECK-NEXT: [1,2] . D==========eeeeeE-----------------R. vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: [1,3] . D===============eeeeeE------------R. vaddps %ymm4, %ymm5, %ymm6 +# CHECK-NEXT: [1,4] . .D===================eeeeeE--------R vmulps %ymm6, %ymm3, %ymm4 +# CHECK-NEXT: [1,5] . .D========================eeeeeE---R vaddps %ymm4, %ymm5, %ymm0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/xop-super-registers-2.s b/llvm/test/tools/llvm-mca/X86/BdVer2/xop-super-registers-2.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/xop-super-registers-2.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/xop-super-registers-2.s @@ -10,7 +10,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 600 -# CHECK-NEXT: Total Cycles: 614 +# CHECK-NEXT: Total Cycles: 615 # CHECK-NEXT: Total uOps: 1100 # CHECK: Dispatch Width: 4 @@ -73,21 +73,21 @@ # CHECK-NEXT: - - - - - - - - 0.42 1.58 - - - - 1.00 - - - - - - - - vaddps %ymm4, %ymm5, %ymm0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 01 +# CHECK-NEXT: 0123456789 012 # CHECK-NEXT: Index 0123456789 0123456789 -# CHECK: [0,0] DeeeeeER . . . . .. vmulps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: [0,1] DeeeE--R . . . . .. vpermil2pd $15, %xmm3, %xmm5, %xmm1, %xmm2 -# CHECK-NEXT: [0,2] .D==eeeeeER . . . .. vmulps %ymm2, %ymm3, %ymm4 -# CHECK-NEXT: [0,3] .D=======eeeeeER . . .. vaddps %ymm4, %ymm5, %ymm6 -# CHECK-NEXT: [0,4] . D===========eeeeeER . .. vmulps %ymm6, %ymm3, %ymm4 -# CHECK-NEXT: [0,5] . D================eeeeeER .. vaddps %ymm4, %ymm5, %ymm0 -# CHECK-NEXT: [1,0] . D====================eeeeeER. vmulps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: [1,1] . DeeeE----------------------R. vpermil2pd $15, %xmm3, %xmm5, %xmm1, %xmm2 -# CHECK-NEXT: [1,2] . D==eeeeeE-----------------R. vmulps %ymm2, %ymm3, %ymm4 -# CHECK-NEXT: [1,3] . D=======eeeeeE------------R. vaddps %ymm4, %ymm5, %ymm6 -# CHECK-NEXT: [1,4] . D===========eeeeeE--------R vmulps %ymm6, %ymm3, %ymm4 -# CHECK-NEXT: [1,5] . D================eeeeeE---R vaddps %ymm4, %ymm5, %ymm0 +# CHECK: [0,0] .DeeeeeER . . . . . . vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: [0,1] .DeeeE--R . . . . . . vpermil2pd $15, %xmm3, %xmm5, %xmm1, %xmm2 +# CHECK-NEXT: [0,2] . D==eeeeeER . . . . . vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: [0,3] . D=======eeeeeER . . . . vaddps %ymm4, %ymm5, %ymm6 +# CHECK-NEXT: [0,4] . D===========eeeeeER . . . vmulps %ymm6, %ymm3, %ymm4 +# CHECK-NEXT: [0,5] . D================eeeeeER . . vaddps %ymm4, %ymm5, %ymm0 +# CHECK-NEXT: [1,0] . D====================eeeeeER. vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: [1,1] . DeeeE----------------------R. vpermil2pd $15, %xmm3, %xmm5, %xmm1, %xmm2 +# CHECK-NEXT: [1,2] . D==eeeeeE-----------------R. vmulps %ymm2, %ymm3, %ymm4 +# CHECK-NEXT: [1,3] . D=======eeeeeE------------R. vaddps %ymm4, %ymm5, %ymm6 +# CHECK-NEXT: [1,4] . .D===========eeeeeE--------R vmulps %ymm6, %ymm3, %ymm4 +# CHECK-NEXT: [1,5] . .D================eeeeeE---R vaddps %ymm4, %ymm5, %ymm0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/zero-idioms-avx-256.s b/llvm/test/tools/llvm-mca/X86/BdVer2/zero-idioms-avx-256.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/zero-idioms-avx-256.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/zero-idioms-avx-256.s @@ -40,7 +40,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 300 -# CHECK-NEXT: Total Cycles: 354 +# CHECK-NEXT: Total Cycles: 355 # CHECK-NEXT: Total uOps: 600 # CHECK: Dispatch Width: 4 @@ -97,18 +97,18 @@ # CHECK-NEXT: - - - - - - - - 1.53 1.47 - - - - 0.02 1.98 - - - - - - - vblendps $2, %ymm1, %ymm2, %ymm3 # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 01234 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . . vaddps %ymm0, %ymm0, %ymm1 -# CHECK-NEXT: [0,1] DeeE---R . . vxorps %ymm1, %ymm1, %ymm1 -# CHECK-NEXT: [0,2] .D=eeE-R . . vblendps $2, %ymm1, %ymm2, %ymm3 -# CHECK-NEXT: [1,0] .D=eeeeeER. . vaddps %ymm0, %ymm0, %ymm1 -# CHECK-NEXT: [1,1] . D==eeE-R. . vxorps %ymm1, %ymm1, %ymm1 -# CHECK-NEXT: [1,2] . D====eeER . vblendps $2, %ymm1, %ymm2, %ymm3 -# CHECK-NEXT: [2,0] . D==eeeeeER. vaddps %ymm0, %ymm0, %ymm1 -# CHECK-NEXT: [2,1] . D====eeE-R. vxorps %ymm1, %ymm1, %ymm1 -# CHECK-NEXT: [2,2] . D=====eeER vblendps $2, %ymm1, %ymm2, %ymm3 +# CHECK: [0,0] .DeeeeeER . . vaddps %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: [0,1] .DeeE---R . . vxorps %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [0,2] . D=eeE-R . . vblendps $2, %ymm1, %ymm2, %ymm3 +# CHECK-NEXT: [1,0] . D=eeeeeER . vaddps %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: [1,1] . D==eeE-R . vxorps %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [1,2] . D====eeER . vblendps $2, %ymm1, %ymm2, %ymm3 +# CHECK-NEXT: [2,0] . D==eeeeeER. vaddps %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: [2,1] . D====eeE-R. vxorps %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [2,2] . D=====eeER vblendps $2, %ymm1, %ymm2, %ymm3 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -126,7 +126,7 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 300 -# CHECK-NEXT: Total Cycles: 354 +# CHECK-NEXT: Total Cycles: 355 # CHECK-NEXT: Total uOps: 600 # CHECK: Dispatch Width: 4 @@ -183,18 +183,18 @@ # CHECK-NEXT: - - - - - - - - 1.53 1.47 - - - - 0.02 1.98 - - - - - - - vblendpd $2, %ymm1, %ymm2, %ymm3 # CHECK: Timeline view: -# CHECK-NEXT: 0123 +# CHECK-NEXT: 01234 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER . . vaddpd %ymm0, %ymm0, %ymm1 -# CHECK-NEXT: [0,1] DeeE---R . . vxorpd %ymm1, %ymm1, %ymm1 -# CHECK-NEXT: [0,2] .D=eeE-R . . vblendpd $2, %ymm1, %ymm2, %ymm3 -# CHECK-NEXT: [1,0] .D=eeeeeER. . vaddpd %ymm0, %ymm0, %ymm1 -# CHECK-NEXT: [1,1] . D==eeE-R. . vxorpd %ymm1, %ymm1, %ymm1 -# CHECK-NEXT: [1,2] . D====eeER . vblendpd $2, %ymm1, %ymm2, %ymm3 -# CHECK-NEXT: [2,0] . D==eeeeeER. vaddpd %ymm0, %ymm0, %ymm1 -# CHECK-NEXT: [2,1] . D====eeE-R. vxorpd %ymm1, %ymm1, %ymm1 -# CHECK-NEXT: [2,2] . D=====eeER vblendpd $2, %ymm1, %ymm2, %ymm3 +# CHECK: [0,0] .DeeeeeER . . vaddpd %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: [0,1] .DeeE---R . . vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [0,2] . D=eeE-R . . vblendpd $2, %ymm1, %ymm2, %ymm3 +# CHECK-NEXT: [1,0] . D=eeeeeER . vaddpd %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: [1,1] . D==eeE-R . vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [1,2] . D====eeER . vblendpd $2, %ymm1, %ymm2, %ymm3 +# CHECK-NEXT: [2,0] . D==eeeeeER. vaddpd %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: [2,1] . D====eeE-R. vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [2,2] . D=====eeER vblendpd $2, %ymm1, %ymm2, %ymm3 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -212,11 +212,11 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 200 -# CHECK-NEXT: Total Cycles: 206 +# CHECK-NEXT: Total Cycles: 207 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.94 +# CHECK-NEXT: uOps Per Cycle: 1.93 # CHECK-NEXT: IPC: 0.97 # CHECK-NEXT: Block RThroughput: 2.0 @@ -267,15 +267,15 @@ # CHECK-NEXT: - - - - - - - - 2.00 - - - - - - 2.00 - - - - - - - vandnps %ymm2, %ymm2, %ymm3 # CHECK: Timeline view: -# CHECK-NEXT: 01 +# CHECK-NEXT: 012 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER .. vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: [0,1] DeeE---R .. vandnps %ymm2, %ymm2, %ymm3 -# CHECK-NEXT: [1,0] .D=eeeeeER.. vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: [1,1] .D=eeE---R.. vandnps %ymm2, %ymm2, %ymm3 -# CHECK-NEXT: [2,0] . D==eeeeeER vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: [2,1] . D==eeE---R vandnps %ymm2, %ymm2, %ymm3 +# CHECK: [0,0] .DeeeeeER . . vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: [0,1] .DeeE---R . . vandnps %ymm2, %ymm2, %ymm3 +# CHECK-NEXT: [1,0] . D=eeeeeER . vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: [1,1] . D=eeE---R . vandnps %ymm2, %ymm2, %ymm3 +# CHECK-NEXT: [2,0] . D==eeeeeER vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: [2,1] . D==eeE---R vandnps %ymm2, %ymm2, %ymm3 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -292,11 +292,11 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 200 -# CHECK-NEXT: Total Cycles: 206 +# CHECK-NEXT: Total Cycles: 207 # CHECK-NEXT: Total uOps: 400 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.94 +# CHECK-NEXT: uOps Per Cycle: 1.93 # CHECK-NEXT: IPC: 0.97 # CHECK-NEXT: Block RThroughput: 2.0 @@ -347,15 +347,15 @@ # CHECK-NEXT: - - - - - - - - 2.00 - - - - - - 2.00 - - - - - - - vandnps %ymm2, %ymm2, %ymm3 # CHECK: Timeline view: -# CHECK-NEXT: 01 +# CHECK-NEXT: 012 # CHECK-NEXT: Index 0123456789 -# CHECK: [0,0] DeeeeeER .. vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: [0,1] DeeE---R .. vandnps %ymm2, %ymm2, %ymm3 -# CHECK-NEXT: [1,0] .D=eeeeeER.. vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: [1,1] .D=eeE---R.. vandnps %ymm2, %ymm2, %ymm3 -# CHECK-NEXT: [2,0] . D==eeeeeER vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: [2,1] . D==eeE---R vandnps %ymm2, %ymm2, %ymm3 +# CHECK: [0,0] .DeeeeeER . . vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: [0,1] .DeeE---R . . vandnps %ymm2, %ymm2, %ymm3 +# CHECK-NEXT: [1,0] . D=eeeeeER . vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: [1,1] . D=eeE---R . vandnps %ymm2, %ymm2, %ymm3 +# CHECK-NEXT: [2,0] . D==eeeeeER vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: [2,1] . D==eeE---R vandnps %ymm2, %ymm2, %ymm3 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -372,11 +372,11 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 200 -# CHECK-NEXT: Total Cycles: 903 +# CHECK-NEXT: Total Cycles: 907 # CHECK-NEXT: Total uOps: 1000 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 1.11 +# CHECK-NEXT: uOps Per Cycle: 1.10 # CHECK-NEXT: IPC: 0.22 # CHECK-NEXT: Block RThroughput: 4.0 @@ -427,15 +427,15 @@ # CHECK-NEXT: - - - - - - - - 2.00 - - - - - 1.00 - - - - - - - - vaddps %ymm1, %ymm1, %ymm0 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 +# CHECK-NEXT: 0123456789 0123 # CHECK-NEXT: Index 0123456789 0123456789 -# CHECK: [0,0] DeeeeER . . . . . vperm2f128 $136, %ymm0, %ymm0, %ymm1 -# CHECK-NEXT: [0,1] . D==eeeeeER . . . . vaddps %ymm1, %ymm1, %ymm0 -# CHECK-NEXT: [1,0] . D======eeeeER . . . vperm2f128 $136, %ymm0, %ymm0, %ymm1 -# CHECK-NEXT: [1,1] . D========eeeeeER . . vaddps %ymm1, %ymm1, %ymm0 -# CHECK-NEXT: [2,0] . .D============eeeeER. . vperm2f128 $136, %ymm0, %ymm0, %ymm1 -# CHECK-NEXT: [2,1] . . D==============eeeeeER vaddps %ymm1, %ymm1, %ymm0 +# CHECK: [0,0] . DeeeeER . . . . . vperm2f128 $136, %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: [0,1] . .D==eeeeeER . . . . vaddps %ymm1, %ymm1, %ymm0 +# CHECK-NEXT: [1,0] . . D===eeeeER. . . . vperm2f128 $136, %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: [1,1] . . . D=====eeeeeER. . . vaddps %ymm1, %ymm1, %ymm0 +# CHECK-NEXT: [2,0] . . . .D======eeeeER . . vperm2f128 $136, %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: [2,1] . . . . D========eeeeeER vaddps %ymm1, %ymm1, %ymm0 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions @@ -444,6 +444,6 @@ # CHECK-NEXT: [3]: Average time elapsed from WB until retire stage # CHECK: [0] [1] [2] [3] -# CHECK-NEXT: 0. 3 7.0 0.3 0.0 vperm2f128 $136, %ymm0, %ymm0, %ymm1 -# CHECK-NEXT: 1. 3 9.0 0.0 0.0 vaddps %ymm1, %ymm1, %ymm0 -# CHECK-NEXT: 3 8.0 0.2 0.0 +# CHECK-NEXT: 0. 3 4.0 0.3 0.0 vperm2f128 $136, %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: 1. 3 6.0 0.0 0.0 vaddps %ymm1, %ymm1, %ymm0 +# CHECK-NEXT: 3 5.0 0.2 0.0 diff --git a/llvm/test/tools/llvm-mca/X86/BdVer2/zero-idioms.s b/llvm/test/tools/llvm-mca/X86/BdVer2/zero-idioms.s --- a/llvm/test/tools/llvm-mca/X86/BdVer2/zero-idioms.s +++ b/llvm/test/tools/llvm-mca/X86/BdVer2/zero-idioms.s @@ -90,12 +90,12 @@ # CHECK: Iterations: 1 # CHECK-NEXT: Instructions: 71 -# CHECK-NEXT: Total Cycles: 31 +# CHECK-NEXT: Total Cycles: 32 # CHECK-NEXT: Total uOps: 71 # CHECK: Dispatch Width: 4 -# CHECK-NEXT: uOps Per Cycle: 2.29 -# CHECK-NEXT: IPC: 2.29 +# CHECK-NEXT: uOps Per Cycle: 2.22 +# CHECK-NEXT: IPC: 2.22 # CHECK-NEXT: Block RThroughput: 17.8 # CHECK: Instruction Info: @@ -297,80 +297,80 @@ # CHECK-NEXT: - - - - - - - - - - - - - - - - - - - - - - - vpxor %xmm3, %xmm3, %xmm5 # CHECK: Timeline view: -# CHECK-NEXT: 0123456789 0 +# CHECK-NEXT: 0123456789 01 # CHECK-NEXT: Index 0123456789 0123456789 -# CHECK: [0,0] DR . . . . . . subl %eax, %eax -# CHECK-NEXT: [0,1] DR . . . . . . subq %rax, %rax -# CHECK-NEXT: [0,2] DR . . . . . . xorl %eax, %eax -# CHECK-NEXT: [0,3] DR . . . . . . xorq %rax, %rax -# CHECK-NEXT: [0,4] .DR . . . . . . pcmpgtb %mm2, %mm2 -# CHECK-NEXT: [0,5] .DR . . . . . . pcmpgtd %mm2, %mm2 -# CHECK-NEXT: [0,6] .DR . . . . . . pcmpgtw %mm2, %mm2 -# CHECK-NEXT: [0,7] .DR . . . . . . pcmpgtb %xmm2, %xmm2 -# CHECK-NEXT: [0,8] . DR . . . . . . pcmpgtd %xmm2, %xmm2 -# CHECK-NEXT: [0,9] . DeeER . . . . . pcmpgtq %xmm2, %xmm2 -# CHECK-NEXT: [0,10] . D---R . . . . . pcmpgtw %xmm2, %xmm2 -# CHECK-NEXT: [0,11] . D---R . . . . . vpcmpgtb %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,12] . D--R . . . . . vpcmpgtd %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,13] . DeeER . . . . . vpcmpgtq %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,14] . D---R . . . . . vpcmpgtw %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,15] . D---R . . . . . vpcmpgtb %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,16] . D--R . . . . . vpcmpgtd %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,17] . DeeER . . . . . vpcmpgtq %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,18] . D---R . . . . . vpcmpgtw %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,19] . D---R . . . . . psubb %mm2, %mm2 -# CHECK-NEXT: [0,20] . D--R . . . . . psubd %mm2, %mm2 -# CHECK-NEXT: [0,21] . D---R. . . . . psubq %mm2, %mm2 -# CHECK-NEXT: [0,22] . D---R. . . . . psubw %mm2, %mm2 -# CHECK-NEXT: [0,23] . D---R. . . . . psubb %xmm2, %xmm2 -# CHECK-NEXT: [0,24] . .D--R. . . . . psubd %xmm2, %xmm2 -# CHECK-NEXT: [0,25] . .D---R . . . . psubq %xmm2, %xmm2 -# CHECK-NEXT: [0,26] . .D---R . . . . psubw %xmm2, %xmm2 -# CHECK-NEXT: [0,27] . .D---R . . . . vpsubb %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,28] . . D--R . . . . vpsubd %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,29] . . D---R . . . . vpsubq %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,30] . . D---R . . . . vpsubw %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,31] . . D---R . . . . vpsubb %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,32] . . D--R . . . . vpsubd %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,33] . . D---R . . . . vpsubq %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,34] . . D---R . . . . vpsubw %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,35] . . DeeER . . . . psubsb %mm2, %mm2 -# CHECK-NEXT: [0,36] . . DeeER . . . . psubsw %mm2, %mm2 -# CHECK-NEXT: [0,37] . . DeeER . . . . psubsb %xmm2, %xmm2 -# CHECK-NEXT: [0,38] . . D=eeER. . . . psubsw %xmm2, %xmm2 -# CHECK-NEXT: [0,39] . . D==eeER . . . vpsubsb %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,40] . . D==eeER . . . vpsubsw %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,41] . . D===eeER . . . vpsubsb %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,42] . . D====eeER . . . vpsubsw %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,43] . . D=====eeER. . . psubusb %mm2, %mm2 -# CHECK-NEXT: [0,44] . . .D=====eeER . . psubusw %mm2, %mm2 -# CHECK-NEXT: [0,45] . . .D=====eeER . . psubusb %xmm2, %xmm2 -# CHECK-NEXT: [0,46] . . .D======eeER . . psubusw %xmm2, %xmm2 -# CHECK-NEXT: [0,47] . . .D=======eeER . . vpsubusb %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,48] . . . D=======eeER . . vpsubusw %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,49] . . . D========eeER. . vpsubsb %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,50] . . . D=========eeER . vpsubsw %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,51] . . . D------------R . andnps %xmm0, %xmm0 -# CHECK-NEXT: [0,52] . . . D-----------R . andnpd %xmm1, %xmm1 -# CHECK-NEXT: [0,53] . . . D-----------R . vandnps %xmm2, %xmm2, %xmm2 -# CHECK-NEXT: [0,54] . . . D------------R . vandnpd %xmm1, %xmm1, %xmm1 -# CHECK-NEXT: [0,55] . . . D------------R . pandn %mm2, %mm2 -# CHECK-NEXT: [0,56] . . . D-----------R . pandn %xmm2, %xmm2 -# CHECK-NEXT: [0,57] . . . D-----------R . vpandn %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,58] . . . D------------R . vandnps %xmm2, %xmm2, %xmm5 -# CHECK-NEXT: [0,59] . . . D------------R . vandnpd %xmm1, %xmm1, %xmm5 -# CHECK-NEXT: [0,60] . . . D-----------R . vpandn %xmm3, %xmm3, %xmm5 -# CHECK-NEXT: [0,61] . . . D-----------R . xorps %xmm0, %xmm0 -# CHECK-NEXT: [0,62] . . . D------------R . xorpd %xmm1, %xmm1 -# CHECK-NEXT: [0,63] . . . D------------R . vxorps %xmm2, %xmm2, %xmm2 -# CHECK-NEXT: [0,64] . . . .D-----------R . vxorpd %xmm1, %xmm1, %xmm1 -# CHECK-NEXT: [0,65] . . . .D-----------R . pxor %mm2, %mm2 -# CHECK-NEXT: [0,66] . . . .D------------R. pxor %xmm2, %xmm2 -# CHECK-NEXT: [0,67] . . . .D------------R. vpxor %xmm3, %xmm3, %xmm3 -# CHECK-NEXT: [0,68] . . . . D-----------R. vxorps %xmm4, %xmm4, %xmm5 -# CHECK-NEXT: [0,69] . . . . D-----------R. vxorpd %xmm1, %xmm1, %xmm3 -# CHECK-NEXT: [0,70] . . . . D------------R vpxor %xmm3, %xmm3, %xmm5 +# CHECK: [0,0] .DR . . . . . .. subl %eax, %eax +# CHECK-NEXT: [0,1] .DR . . . . . .. subq %rax, %rax +# CHECK-NEXT: [0,2] .DR . . . . . .. xorl %eax, %eax +# CHECK-NEXT: [0,3] .DR . . . . . .. xorq %rax, %rax +# CHECK-NEXT: [0,4] . DR . . . . . .. pcmpgtb %mm2, %mm2 +# CHECK-NEXT: [0,5] . DR . . . . . .. pcmpgtd %mm2, %mm2 +# CHECK-NEXT: [0,6] . DR . . . . . .. pcmpgtw %mm2, %mm2 +# CHECK-NEXT: [0,7] . DR . . . . . .. pcmpgtb %xmm2, %xmm2 +# CHECK-NEXT: [0,8] . DR. . . . . .. pcmpgtd %xmm2, %xmm2 +# CHECK-NEXT: [0,9] . DeeER . . . . .. pcmpgtq %xmm2, %xmm2 +# CHECK-NEXT: [0,10] . D---R . . . . .. pcmpgtw %xmm2, %xmm2 +# CHECK-NEXT: [0,11] . D---R . . . . .. vpcmpgtb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,12] . D--R . . . . .. vpcmpgtd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,13] . DeeER . . . . .. vpcmpgtq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,14] . D---R . . . . .. vpcmpgtw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,15] . D---R . . . . .. vpcmpgtb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,16] . D--R . . . . .. vpcmpgtd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,17] . DeeER. . . . .. vpcmpgtq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,18] . D---R. . . . .. vpcmpgtw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,19] . D---R. . . . .. psubb %mm2, %mm2 +# CHECK-NEXT: [0,20] . .D--R. . . . .. psubd %mm2, %mm2 +# CHECK-NEXT: [0,21] . .D---R . . . .. psubq %mm2, %mm2 +# CHECK-NEXT: [0,22] . .D---R . . . .. psubw %mm2, %mm2 +# CHECK-NEXT: [0,23] . .D---R . . . .. psubb %xmm2, %xmm2 +# CHECK-NEXT: [0,24] . . D--R . . . .. psubd %xmm2, %xmm2 +# CHECK-NEXT: [0,25] . . D---R . . . .. psubq %xmm2, %xmm2 +# CHECK-NEXT: [0,26] . . D---R . . . .. psubw %xmm2, %xmm2 +# CHECK-NEXT: [0,27] . . D---R . . . .. vpsubb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,28] . . D--R . . . .. vpsubd %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,29] . . D---R . . . .. vpsubq %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,30] . . D---R . . . .. vpsubw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,31] . . D---R . . . .. vpsubb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,32] . . D--R . . . .. vpsubd %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,33] . . D---R . . . .. vpsubq %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,34] . . D---R . . . .. vpsubw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,35] . . DeeER . . . .. psubsb %mm2, %mm2 +# CHECK-NEXT: [0,36] . . DeeER. . . .. psubsw %mm2, %mm2 +# CHECK-NEXT: [0,37] . . DeeER. . . .. psubsb %xmm2, %xmm2 +# CHECK-NEXT: [0,38] . . D=eeER . . .. psubsw %xmm2, %xmm2 +# CHECK-NEXT: [0,39] . . D==eeER . . .. vpsubsb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,40] . . .D==eeER . . .. vpsubsw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,41] . . .D===eeER . . .. vpsubsb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,42] . . .D====eeER. . .. vpsubsw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,43] . . .D=====eeER . .. psubusb %mm2, %mm2 +# CHECK-NEXT: [0,44] . . . D=====eeER . .. psubusw %mm2, %mm2 +# CHECK-NEXT: [0,45] . . . D=====eeER . .. psubusb %xmm2, %xmm2 +# CHECK-NEXT: [0,46] . . . D======eeER . .. psubusw %xmm2, %xmm2 +# CHECK-NEXT: [0,47] . . . D=======eeER . .. vpsubusb %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,48] . . . D=======eeER. .. vpsubusw %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,49] . . . D========eeER .. vpsubsb %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,50] . . . D=========eeER .. vpsubsw %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,51] . . . D------------R .. andnps %xmm0, %xmm0 +# CHECK-NEXT: [0,52] . . . D-----------R .. andnpd %xmm1, %xmm1 +# CHECK-NEXT: [0,53] . . . D-----------R .. vandnps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: [0,54] . . . D------------R .. vandnpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: [0,55] . . . D------------R .. pandn %mm2, %mm2 +# CHECK-NEXT: [0,56] . . . D-----------R .. pandn %xmm2, %xmm2 +# CHECK-NEXT: [0,57] . . . D-----------R .. vpandn %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,58] . . . D------------R .. vandnps %xmm2, %xmm2, %xmm5 +# CHECK-NEXT: [0,59] . . . D------------R .. vandnpd %xmm1, %xmm1, %xmm5 +# CHECK-NEXT: [0,60] . . . .D-----------R .. vpandn %xmm3, %xmm3, %xmm5 +# CHECK-NEXT: [0,61] . . . .D-----------R .. xorps %xmm0, %xmm0 +# CHECK-NEXT: [0,62] . . . .D------------R.. xorpd %xmm1, %xmm1 +# CHECK-NEXT: [0,63] . . . .D------------R.. vxorps %xmm2, %xmm2, %xmm2 +# CHECK-NEXT: [0,64] . . . . D-----------R.. vxorpd %xmm1, %xmm1, %xmm1 +# CHECK-NEXT: [0,65] . . . . D-----------R.. pxor %mm2, %mm2 +# CHECK-NEXT: [0,66] . . . . D------------R. pxor %xmm2, %xmm2 +# CHECK-NEXT: [0,67] . . . . D------------R. vpxor %xmm3, %xmm3, %xmm3 +# CHECK-NEXT: [0,68] . . . . D-----------R. vxorps %xmm4, %xmm4, %xmm5 +# CHECK-NEXT: [0,69] . . . . D-----------R. vxorpd %xmm1, %xmm1, %xmm3 +# CHECK-NEXT: [0,70] . . . . D------------R vpxor %xmm3, %xmm3, %xmm5 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s b/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s --- a/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s +++ b/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s @@ -13,7 +13,7 @@ # ALL: Iterations: 1 # ALL-NEXT: Instructions: 2 -# BDVER2-NEXT: Total Cycles: 9 +# BDVER2-NEXT: Total Cycles: 10 # BDVER2-NEXT: Total uOps: 3 # BDWELL-NEXT: Total Cycles: 10 @@ -35,8 +35,8 @@ # ZNVER2-NEXT: Total uOps: 3 # BDVER2: Dispatch Width: 4 -# BDVER2-NEXT: uOps Per Cycle: 0.33 -# BDVER2-NEXT: IPC: 0.22 +# BDVER2-NEXT: uOps Per Cycle: 0.30 +# BDVER2-NEXT: IPC: 0.20 # BDVER2-NEXT: Block RThroughput: 2.0 # BDWELL: Dispatch Width: 4 @@ -102,7 +102,7 @@ # ALL: Timeline view: -# BDVER2-NEXT: Index 012345678 +# BDVER2-NEXT: Index 0123456789 # BDWELL-NEXT: Index 0123456789 # BTVER2-NEXT: Index 0123456 # HASWELL-NEXT: Index 0123456789 @@ -110,8 +110,8 @@ # ZNVER1-NEXT: Index 01234567 # ZNVER2-NEXT: Index 01234567 -# BDVER2: [0,0] DeER . . addl %edi, %esi -# BDVER2-NEXT: [0,1] DeeeeeeER bextrl %esi, (%rdi), %eax +# BDVER2: [0,0] .DeER. . addl %edi, %esi +# BDVER2-NEXT: [0,1] .DeeeeeeER bextrl %esi, (%rdi), %eax # BDWELL: [0,0] DeER . . addl %edi, %esi # BDWELL-NEXT: [0,1] DeeeeeeeER bextrl %esi, (%rdi), %eax diff --git a/llvm/test/tools/llvm-mca/X86/cpus.s b/llvm/test/tools/llvm-mca/X86/cpus.s --- a/llvm/test/tools/llvm-mca/X86/cpus.s +++ b/llvm/test/tools/llvm-mca/X86/cpus.s @@ -17,7 +17,21 @@ # ALL: Iterations: 100 # ALL-NEXT: Instructions: 100 -# ALL-NEXT: Total Cycles: 103 + +# BARCELONA-NEXT: Total Cycles: 103 +# BDVER2-NEXT: Total Cycles: 104 +# BROADWELL-NEXT: Total Cycles: 103 +# BTVER2-NEXT: Total Cycles: 103 +# HASWELL-NEXT: Total Cycles: 103 +# IVYBRIDGE-NEXT: Total Cycles: 103 +# KNL-NEXT: Total Cycles: 103 +# SANDYBRIDGE-NEXT: Total Cycles: 103 +# SKX-NEXT: Total Cycles: 103 +# SKX-AVX512-NEXT: Total Cycles: 103 +# SLM-NEXT: Total Cycles: 103 +# ZNVER1-NEXT: Total Cycles: 103 +# ZNVER2-NEXT: Total Cycles: 103 + # ALL-NEXT: Total uOps: 100 # BARCELONA: Dispatch Width: 4 @@ -26,8 +40,8 @@ # BARCELONA-NEXT: Block RThroughput: 0.3 # BDVER2: Dispatch Width: 4 -# BDVER2-NEXT: uOps Per Cycle: 0.97 -# BDVER2-NEXT: IPC: 0.97 +# BDVER2-NEXT: uOps Per Cycle: 0.96 +# BDVER2-NEXT: IPC: 0.96 # BDVER2-NEXT: Block RThroughput: 1.0 # BROADWELL: Dispatch Width: 4 diff --git a/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s --- a/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s @@ -18,7 +18,7 @@ # BARCELONA-NEXT: Total Cycles: 20 # BARCELONA-NEXT: Total uOps: 3 -# BDVER2-NEXT: Total Cycles: 17 +# BDVER2-NEXT: Total Cycles: 18 # BDVER2-NEXT: Total uOps: 2 # BDWELL-NEXT: Total Cycles: 17 @@ -48,8 +48,8 @@ # BARCELONA-NEXT: Block RThroughput: 14.0 # BDVER2: Dispatch Width: 4 -# BDVER2-NEXT: uOps Per Cycle: 0.12 -# BDVER2-NEXT: IPC: 0.12 +# BDVER2-NEXT: uOps Per Cycle: 0.11 +# BDVER2-NEXT: IPC: 0.11 # BDVER2-NEXT: Block RThroughput: 5.0 # BDWELL: Dispatch Width: 4 @@ -92,7 +92,7 @@ # BARCELONA-NEXT: 0123456789 # BARCELONA-NEXT: Index 0123456789 -# BDVER2-NEXT: 0123456 +# BDVER2-NEXT: 01234567 # BDVER2-NEXT: Index 0123456789 # BDWELL-NEXT: 0123456 @@ -119,8 +119,8 @@ # BARCELONA: [0,0] DeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1 # BARCELONA-NEXT: [0,1] D========eeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 -# BDVER2: [0,0] DeeeeeeeeeER .. vdivps %xmm0, %xmm1, %xmm1 -# BDVER2-NEXT: [0,1] D====eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 +# BDVER2: [0,0] .DeeeeeeeeeER . . vdivps %xmm0, %xmm1, %xmm1 +# BDVER2-NEXT: [0,1] .D====eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 # BDWELL: [0,0] DeeeeeeeeeeeER .. vdivps %xmm0, %xmm1, %xmm1 # BDWELL-NEXT: [0,1] D======eeeeeeeeER vaddps (%rax), %xmm1, %xmm1 diff --git a/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s --- a/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s +++ b/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s @@ -17,8 +17,45 @@ # ALL: Schedulers - number of cycles where we saw N micro opcodes issued: # ALL-NEXT: [# issued], [# cycles] -# ALL-NEXT: 0, 3 (75.0%) -# ALL-NEXT: 1, 1 (25.0%) + +# BARCELONA-NEXT: 0, 3 (75.0%) +# BARCELONA-NEXT: 1, 1 (25.0%) + +# BDVER2-NEXT: 0, 4 (80.0%) +# BDVER2-NEXT: 1, 1 (20.0%) + +# BDW-NEXT: 0, 3 (75.0%) +# BDW-NEXT: 1, 1 (25.0%) + +# BTVER2-NEXT: 0, 3 (75.0%) +# BTVER2-NEXT: 1, 1 (25.0%) + +# HSW-NEXT: 0, 3 (75.0%) +# HSW-NEXT: 1, 1 (25.0%) + +# IVB-NEXT: 0, 3 (75.0%) +# IVB-NEXT: 1, 1 (25.0%) + +# KNL-NEXT: 0, 3 (75.0%) +# KNL-NEXT: 1, 1 (25.0%) + +# SKX-NEXT: 0, 3 (75.0%) +# SKX-NEXT: 1, 1 (25.0%) + +# SKX-AVX512-NEXT: 0, 3 (75.0%) +# SKX-AVX512-NEXT: 1, 1 (25.0%) + +# SLM-NEXT: 0, 3 (75.0%) +# SLM-NEXT: 1, 1 (25.0%) + +# SNB-NEXT: 0, 3 (75.0%) +# SNB-NEXT: 1, 1 (25.0%) + +# ZNVER1-NEXT: 0, 3 (75.0%) +# ZNVER1-NEXT: 1, 1 (25.0%) + +# ZNVER2-NEXT: 0, 3 (75.0%) +# ZNVER2-NEXT: 1, 1 (25.0%) # BARCELONA: Scheduler's queue usage: # BARCELONA-NEXT: [1] Resource name. diff --git a/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s b/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s --- a/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s +++ b/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s @@ -35,7 +35,7 @@ # BARCELONA-NEXT: 0123456789 # BARCELONA-NEXT: Index 0123456789 0123 -# BDVER2-NEXT: 012345678 +# BDVER2-NEXT: 0123456789 # BDVER2-NEXT: Index 0123456789 # BROADWELL-NEXT: 0123456789 @@ -59,8 +59,8 @@ # BARCELONA: [0,0] DeER . . . . . leaq 8(%rsp,%rdi,2), %rax # BARCELONA-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1 -# BDVER2: [0,0] DeeER. . . . leaq 8(%rsp,%rdi,2), %rax -# BDVER2-NEXT: [0,1] D==eeeeeeeeeeeeeeER sqrtss (%rax), %xmm1 +# BDVER2: [0,0] .DeeER . . . leaq 8(%rsp,%rdi,2), %rax +# BDVER2-NEXT: [0,1] .D==eeeeeeeeeeeeeeER sqrtss (%rax), %xmm1 # BROADWELL: [0,0] DeER . . . . leaq 8(%rsp,%rdi,2), %rax # BROADWELL-NEXT: [0,1] D=eeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1 @@ -120,7 +120,7 @@ # BARCELONA-NEXT: 0123456789 0 # BARCELONA-NEXT: Index 0123456789 0123456789 -# BDVER2-NEXT: 012345678 +# BDVER2-NEXT: 0123456789 # BDVER2-NEXT: Index 0123456789 # BROADWELL-NEXT: 0123456789 @@ -144,8 +144,8 @@ # BARCELONA: [0,0] DeER . . . . . . leaq 8(%rsp,%rdi,2), %rax # BARCELONA-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeeeeeeeER sqrtsd (%rax), %xmm1 -# BDVER2: [0,0] DeeER. . . . leaq 8(%rsp,%rdi,2), %rax -# BDVER2-NEXT: [0,1] D==eeeeeeeeeeeeeeER sqrtsd (%rax), %xmm1 +# BDVER2: [0,0] .DeeER . . . leaq 8(%rsp,%rdi,2), %rax +# BDVER2-NEXT: [0,1] .D==eeeeeeeeeeeeeeER sqrtsd (%rax), %xmm1 # BROADWELL: [0,0] DeER . . . . . leaq 8(%rsp,%rdi,2), %rax # BROADWELL-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeER sqrtsd (%rax), %xmm1 @@ -203,7 +203,7 @@ # ALL: Timeline view: # BARCELONA-NEXT: 01234 -# BDVER2-NEXT: 01234 +# BDVER2-NEXT: 012345 # BROADWELL-NEXT: 0123 # BTVER2-NEXT: 01 # HASWELL-NEXT: 0123 @@ -216,8 +216,8 @@ # BARCELONA: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax # BARCELONA-NEXT: [0,1] D=eeeeeeeeeeeER rsqrtss (%rax), %xmm1 -# BDVER2: [0,0] DeeER. . . leaq 8(%rsp,%rdi,2), %rax -# BDVER2-NEXT: [0,1] D==eeeeeeeeeeER rsqrtss (%rax), %xmm1 +# BDVER2: [0,0] .DeeER . . leaq 8(%rsp,%rdi,2), %rax +# BDVER2-NEXT: [0,1] .D==eeeeeeeeeeER rsqrtss (%rax), %xmm1 # BROADWELL: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax # BROADWELL-NEXT: [0,1] D=eeeeeeeeeeER rsqrtss (%rax), %xmm1 @@ -275,7 +275,7 @@ # ALL: Timeline view: # BARCELONA-NEXT: 01234 -# BDVER2-NEXT: 01234 +# BDVER2-NEXT: 012345 # BROADWELL-NEXT: 0123 # BTVER2-NEXT: 01 # HASWELL-NEXT: 0123 @@ -288,8 +288,8 @@ # BARCELONA: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax # BARCELONA-NEXT: [0,1] D=eeeeeeeeeeeER rcpss (%rax), %xmm1 -# BDVER2: [0,0] DeeER. . . leaq 8(%rsp,%rdi,2), %rax -# BDVER2-NEXT: [0,1] D==eeeeeeeeeeER rcpss (%rax), %xmm1 +# BDVER2: [0,0] .DeeER . . leaq 8(%rsp,%rdi,2), %rax +# BDVER2-NEXT: [0,1] .D==eeeeeeeeeeER rcpss (%rax), %xmm1 # BROADWELL: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax # BROADWELL-NEXT: [0,1] D=eeeeeeeeeeER rcpss (%rax), %xmm1 diff --git a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s --- a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s @@ -23,7 +23,7 @@ # ALL: Iterations: 1 # ALL-NEXT: Instructions: 2 -# BDVER2-NEXT: Total Cycles: 10 +# BDVER2-NEXT: Total Cycles: 11 # BDVER2-NEXT: Total uOps: 2 # BDWELL-NEXT: Total Cycles: 10 @@ -51,8 +51,8 @@ # ZNVER2-NEXT: Total uOps: 2 # BDVER2: Dispatch Width: 4 -# BDVER2-NEXT: uOps Per Cycle: 0.20 -# BDVER2-NEXT: IPC: 0.20 +# BDVER2-NEXT: uOps Per Cycle: 0.18 +# BDVER2-NEXT: IPC: 0.18 # BDVER2-NEXT: Block RThroughput: 2.0 # BDWELL: Dispatch Width: 4 @@ -96,6 +96,7 @@ # ZNVER2-NEXT: Block RThroughput: 1.0 # BDVER2: Timeline view: +# BDVER2-NEXT: 0 # BDVER2-NEXT: Index 0123456789 # BDWELL: Timeline view: @@ -129,8 +130,8 @@ # ZNVER2-NEXT: 0 # ZNVER2-NEXT: Index 0123456789 -# BDVER2: [0,0] DeeeeeER . vaddps %xmm0, %xmm0, %xmm1 -# BDVER2-NEXT: [0,1] DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# BDVER2: [0,0] .DeeeeeER . vaddps %xmm0, %xmm0, %xmm1 +# BDVER2-NEXT: [0,1] .DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # BDWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 # BDWELL-NEXT: [0,1] DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 diff --git a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s --- a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s @@ -23,7 +23,7 @@ # ALL: Iterations: 1 # ALL-NEXT: Instructions: 2 -# BDVER2-NEXT: Total Cycles: 10 +# BDVER2-NEXT: Total Cycles: 11 # BDVER2-NEXT: Total uOps: 2 # BDWELL-NEXT: Total Cycles: 10 @@ -51,8 +51,8 @@ # ZNVER2-NEXT: Total uOps: 2 # BDVER2: Dispatch Width: 4 -# BDVER2-NEXT: uOps Per Cycle: 0.20 -# BDVER2-NEXT: IPC: 0.20 +# BDVER2-NEXT: uOps Per Cycle: 0.18 +# BDVER2-NEXT: IPC: 0.18 # BDVER2-NEXT: Block RThroughput: 2.0 # BDWELL: Dispatch Width: 4 @@ -96,6 +96,7 @@ # ZNVER2-NEXT: Block RThroughput: 1.0 # BDVER2: Timeline view: +# BDVER2-NEXT: 0 # BDVER2-NEXT: Index 0123456789 # BDWELL: Timeline view: @@ -129,8 +130,8 @@ # ZNVER2-NEXT: 0 # ZNVER2-NEXT: Index 0123456789 -# BDVER2: [0,0] DeeeeeER . vaddps %xmm0, %xmm0, %xmm2 -# BDVER2-NEXT: [0,1] DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# BDVER2: [0,0] .DeeeeeER . vaddps %xmm0, %xmm0, %xmm2 +# BDVER2-NEXT: [0,1] .DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # BDWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2 # BDWELL-NEXT: [0,1] DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp --- a/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/llvm/tools/llvm-mca/llvm-mca.cpp @@ -32,6 +32,7 @@ #include "Views/SchedulerStatistics.h" #include "Views/SummaryView.h" #include "Views/TimelineView.h" +#include "llvm/ADT/Sequence.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" @@ -464,9 +465,9 @@ ArrayRef Insts = Region->getInstructions(); mca::CodeEmitter CE(*STI, *MAB, *MCE, Insts); std::vector> LoweredSequence; - for (const MCInst &MCI : Insts) { + for (unsigned MCID : llvm::seq(0U, (unsigned)Insts.size())) { Expected> Inst = - IB.createInstruction(MCI); + IB.createInstruction(CE, MCID); if (!Inst) { if (auto NewE = handleErrors( Inst.takeError(),