Index: llvm/trunk/include/llvm/CodeGen/TargetSubtargetInfo.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/TargetSubtargetInfo.h +++ llvm/trunk/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -14,6 +14,7 @@ #ifndef LLVM_CODEGEN_TARGETSUBTARGETINFO_H #define LLVM_CODEGEN_TARGETSUBTARGETINFO_H +#include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -144,6 +145,31 @@ return 0; } + /// Returns true if \param MI is a dependency breaking zero-idiom instruction + /// for the subtarget. + /// + /// This function also sets bits in \param Mask related to input operands that + /// are not in a data dependency relationship. There is one bit for each + /// machine operand; implicit operands follow explicit operands in the bit + /// representation used for \param Mask. An empty \param Mask (i.e. a mask + /// with all bits cleared) means: data dependencies are "broken" for all the + /// explicit input machine operands of \param MI. + virtual bool isZeroIdiom(const MachineInstr *MI, APInt &Mask) const { + return false; + } + + /// Returns true if \param MI is a dependency breaking instruction for the + /// subtarget. + /// + /// Similar in behavior to `isZeroIdiom`. However, it knows how to identify + /// all dependency breaking instructions (i.e. not just zero-idioms). + /// + /// As for `isZeroIdiom`, this method returns a mask of "broken" dependencies. + /// (See method `isZeroIdiom` for a detailed description of \param Mask). + virtual bool isDependencyBreaking(const MachineInstr *MI, APInt &Mask) const { + return isZeroIdiom(MI, Mask); + } + /// True if the subtarget should run MachineScheduler after aggressive /// coalescing. /// Index: llvm/trunk/include/llvm/MC/MCInstrAnalysis.h =================================================================== --- llvm/trunk/include/llvm/MC/MCInstrAnalysis.h +++ llvm/trunk/include/llvm/MC/MCInstrAnalysis.h @@ -88,18 +88,53 @@ const MCInst &Inst, APInt &Writes) const; - /// Returns true if \param Inst is a dependency breaking instruction for the + /// Returns true if \param MI is a dependency breaking zero-idiom for the /// given subtarget. /// + /// \param Mask is used to identify input operands that have their dependency + /// broken. Each bit of the mask is associated with a specific input operand. + /// Bits associated with explicit input operands are laid out first in the + /// mask; implicit operands come after explicit operands. + /// + /// Dependencies are broken only for operands that have their corresponding bit + /// set. Operands that have their bit cleared, or that don't have a + /// corresponding bit in the mask don't have their dependency broken. + /// Note that \param Mask may not be big enough to describe all operands. + /// The assumption for operands that don't have a correspondent bit in the + /// mask is that those are still data dependent. + /// + /// The only exception to the rule is for when \param Mask has all zeroes. + /// A zero mask means: dependencies are broken for all explicit register + /// operands. + virtual bool isZeroIdiom(const MCInst &MI, APInt &Mask, + unsigned CPUID) const { + return false; + } + + /// Returns true if \param MI is a dependency breaking instruction for the + /// subtarget associated with \param CPUID. + /// /// The value computed by a dependency breaking instruction is not dependent /// on the inputs. An example of dependency breaking instruction on X86 is /// `XOR %eax, %eax`. - /// TODO: In future, we could implement an alternative approach where this - /// method returns `true` if the input instruction is not dependent on - /// some/all of its input operands. An APInt mask could then be used to - /// identify independent operands. - virtual bool isDependencyBreaking(const MCSubtargetInfo &STI, - const MCInst &Inst) const; + /// + /// If \param MI is a dependency breaking instruction for subtarget \param + /// CPUID, then \param Mask can be inspected to identify independent operands. + /// + /// Essentially, each bit of the mask corresponds to an input operand. + /// Explicit operands are laid out first in the mask; implicit operands follow + /// explicit operands. Bits are set for operands that are independent. + /// + /// Note that the number of bits in Mask may not be equivalent to the sum of + /// explicit and implicit operands in \param MI. Operands that don't have a + /// corresponding bit in Mask are assumed "not independente". + /// + /// The only exception is for when \param Mask is all zeroes. That means: + /// explicit input operands of \param MI are independent. + virtual bool isDependencyBreaking(const MCInst &MI, APInt &Mask, + unsigned CPUID) const { + return isZeroIdiom(MI, Mask, CPUID); + } /// Given a branch instruction try to get the address the branch /// targets. Return true on success, and the address in Target. Index: llvm/trunk/include/llvm/Target/TargetInstrPredicate.td =================================================================== --- llvm/trunk/include/llvm/Target/TargetInstrPredicate.td +++ llvm/trunk/include/llvm/Target/TargetInstrPredicate.td @@ -68,6 +68,7 @@ // Forward declarations. class Instruction; +class SchedMachineModel; // A generic machine instruction predicate. class MCInstPredicate; @@ -230,3 +231,100 @@ string MCInstFnName = MCInstFn; string MachineInstrFnName = MachineInstrFn; } + +// Used to classify machine instructions based on a machine instruction +// predicate. +// +// Let IC be an InstructionEquivalenceClass definition, and MI a machine +// instruction. We say that MI belongs to the equivalence class described by IC +// if and only if the following two conditions are met: +// a) MI's opcode is in the `opcodes` set, and +// b) `Predicate` evaluates to true when applied to MI. +// +// Instances of this class can be used by processor scheduling models to +// describe instructions that have a property in common. For example, +// InstructionEquivalenceClass definitions can be used to identify the set of +// dependency breaking instructions for a processor model. +// +// An (optional) list of operand indices can be used to further describe +// properties that apply to instruction operands. For example, it can be used to +// identify register uses of a dependency breaking instructions that are not in +// a RAW dependency. +class InstructionEquivalenceClass opcodes, + MCInstPredicate pred, + list operands = []> { + list Opcodes = opcodes; + MCInstPredicate Predicate = pred; + list OperandIndices = operands; +} + +// Used by processor models to describe dependency breaking instructions. +// +// This is mainly an alias for InstructionEquivalenceClass. Input operand +// `BrokenDeps` identifies the set of "broken dependencies". There is one bit +// per each implicit and explicit input operand. An empty set of broken +// dependencies means: "explicit input register operands are independent." +class DepBreakingClass opcodes, MCInstPredicate pred, + list BrokenDeps = []> + : InstructionEquivalenceClass; + +// A function descriptor used to describe the signature of a predicate methods +// which will be expanded by the STIPredicateExpander into a tablegen'd +// XXXGenSubtargetInfo class member definition (here, XXX is a target name). +// +// It describes the signature of a TargetSubtarget hook, as well as a few extra +// properties. Examples of extra properties are: +// - The default return value for the auto-generate function hook. +// - A list of subtarget hooks (Delegates) that are called from this function. +// +class STIPredicateDecl delegates = []> { + string Name = name; + + MCInstPredicate DefaultReturnValue = default; + + // True if this method is declared as virtual in class TargetSubtargetInfo. + bit OverridesBaseClassMember = overrides; + + // True if we need an equivalent predicate function in the MC layer. + bit ExpandForMC = expandForMC; + + // True if the autogenerated method has a extra in/out APInt param used as a + // mask of operands. + bit UpdatesOpcodeMask = updatesOpcodeMask; + + // A list of STIPredicates used by this definition to delegate part of the + // computation. For example, STIPredicateFunction `isDependencyBreaking()` + // delegates to `isZeroIdiom()` part of its computation. + list Delegates = delegates; +} + +// A predicate function definition member of class `XXXGenSubtargetInfo`. +// +// If `Declaration.ExpandForMC` is true, then SubtargetEmitter +// will also expand another definition of this method that accepts a MCInst. +class STIPredicate classes> { + STIPredicateDecl Declaration = declaration; + list Classes = classes; + SchedMachineModel SchedModel = ?; +} + +// Convenience classes and definitions used by processor scheduling models to +// describe dependency breaking instructions. +let UpdatesOpcodeMask = 1 in { + +def IsZeroIdiomDecl : STIPredicateDecl<"isZeroIdiom">; + +let Delegates = [IsZeroIdiomDecl] in +def IsDepBreakingDecl : STIPredicateDecl<"isDependencyBreaking">; + +} // UpdatesOpcodeMask + +class IsZeroIdiomFunction classes> + : STIPredicate; + +class IsDepBreakingFunction classes> + : STIPredicate; Index: llvm/trunk/lib/MC/MCInstrAnalysis.cpp =================================================================== --- llvm/trunk/lib/MC/MCInstrAnalysis.cpp +++ llvm/trunk/lib/MC/MCInstrAnalysis.cpp @@ -24,11 +24,6 @@ return false; } -bool MCInstrAnalysis::isDependencyBreaking(const MCSubtargetInfo &STI, - const MCInst &Inst) const { - return false; -} - bool MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, uint64_t &Target) const { if (Inst.getNumOperands() == 0 || Index: llvm/trunk/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp =================================================================== --- llvm/trunk/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ llvm/trunk/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -380,8 +380,9 @@ public: X86MCInstrAnalysis(const MCInstrInfo *MCII) : MCInstrAnalysis(MCII) {} - bool isDependencyBreaking(const MCSubtargetInfo &STI, - const MCInst &Inst) const override; +#define GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS +#include "X86GenSubtargetInfo.inc" + bool clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst, APInt &Mask) const override; std::vector> @@ -390,77 +391,8 @@ const Triple &TargetTriple) const override; }; -bool X86MCInstrAnalysis::isDependencyBreaking(const MCSubtargetInfo &STI, - const MCInst &Inst) const { - if (STI.getCPU() == "btver2") { - // Reference: Agner Fog's microarchitecture.pdf - Section 20 "AMD Bobcat and - // Jaguar pipeline", subsection 8 "Dependency-breaking instructions". - switch (Inst.getOpcode()) { - default: - return false; - case X86::SUB32rr: - case X86::SUB64rr: - case X86::SBB32rr: - case X86::SBB64rr: - case X86::XOR32rr: - case X86::XOR64rr: - case X86::XORPSrr: - case X86::XORPDrr: - case X86::VXORPSrr: - case X86::VXORPDrr: - case X86::ANDNPSrr: - case X86::VANDNPSrr: - case X86::ANDNPDrr: - case X86::VANDNPDrr: - case X86::PXORrr: - case X86::VPXORrr: - case X86::PANDNrr: - case X86::VPANDNrr: - case X86::PSUBBrr: - case X86::PSUBWrr: - case X86::PSUBDrr: - case X86::PSUBQrr: - case X86::VPSUBBrr: - case X86::VPSUBWrr: - case X86::VPSUBDrr: - case X86::VPSUBQrr: - case X86::PCMPEQBrr: - case X86::PCMPEQWrr: - case X86::PCMPEQDrr: - case X86::PCMPEQQrr: - case X86::VPCMPEQBrr: - case X86::VPCMPEQWrr: - case X86::VPCMPEQDrr: - case X86::VPCMPEQQrr: - case X86::PCMPGTBrr: - case X86::PCMPGTWrr: - case X86::PCMPGTDrr: - case X86::PCMPGTQrr: - case X86::VPCMPGTBrr: - case X86::VPCMPGTWrr: - case X86::VPCMPGTDrr: - case X86::VPCMPGTQrr: - case X86::MMX_PXORirr: - case X86::MMX_PANDNirr: - case X86::MMX_PSUBBirr: - case X86::MMX_PSUBDirr: - case X86::MMX_PSUBQirr: - case X86::MMX_PSUBWirr: - case X86::MMX_PCMPGTBirr: - case X86::MMX_PCMPGTDirr: - case X86::MMX_PCMPGTWirr: - case X86::MMX_PCMPEQBirr: - case X86::MMX_PCMPEQDirr: - case X86::MMX_PCMPEQWirr: - return Inst.getOperand(1).getReg() == Inst.getOperand(2).getReg(); - case X86::CMP32rr: - case X86::CMP64rr: - return Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg(); - } - } - - return false; -} +#define GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS +#include "X86GenSubtargetInfo.inc" bool X86MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst, Index: llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td +++ llvm/trunk/lib/Target/X86/X86ScheduleBtVer2.td @@ -687,4 +687,66 @@ def : InstRW<[JSlowLEA16r], (instrs LEA16r)>; +/////////////////////////////////////////////////////////////////////////////// +// Dependency breaking instructions. +/////////////////////////////////////////////////////////////////////////////// + +def : IsZeroIdiomFunction<[ + // GPR Zero-idioms. + DepBreakingClass<[ SUB32rr, SUB64rr, XOR32rr, XOR64rr ], ZeroIdiomPredicate>, + + // MMX Zero-idioms. + DepBreakingClass<[ + MMX_PXORirr, MMX_PANDNirr, MMX_PSUBBirr, + MMX_PSUBDirr, MMX_PSUBQirr, MMX_PSUBWirr, + MMX_PCMPGTBirr, MMX_PCMPGTDirr, MMX_PCMPGTWirr + ], ZeroIdiomPredicate>, + + // SSE Zero-idioms. + DepBreakingClass<[ + // fp variants. + XORPSrr, XORPDrr, ANDNPSrr, ANDNPDrr, + + // int variants. + PXORrr, PANDNrr, + PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr, + PCMPGTBrr, PCMPGTDrr, PCMPGTQrr, PCMPGTWrr + ], ZeroIdiomPredicate>, + + // AVX Zero-idioms. + DepBreakingClass<[ + // xmm fp variants. + VXORPSrr, VXORPDrr, VANDNPSrr, VANDNPDrr, + + // xmm int variants. + VPXORrr, VPANDNrr, + VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr, + VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr, + + // ymm variants. + VXORPSYrr, VXORPDYrr, VANDNPSYrr, VANDNPDYrr + ], ZeroIdiomPredicate> +]>; + +def : IsDepBreakingFunction<[ + // GPR + DepBreakingClass<[ SBB32rr, SBB64rr ], ZeroIdiomPredicate>, + DepBreakingClass<[ CMP32rr, CMP64rr ], CheckSameRegOperand<0, 1> >, + + // MMX + DepBreakingClass<[ + MMX_PCMPEQBirr, MMX_PCMPEQDirr, MMX_PCMPEQWirr + ], ZeroIdiomPredicate>, + + // SSE + DepBreakingClass<[ + PCMPEQBrr, PCMPEQWrr, PCMPEQDrr, PCMPEQQrr + ], ZeroIdiomPredicate>, + + // AVX + DepBreakingClass<[ + VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr, VPCMPEQQrr + ], ZeroIdiomPredicate> +]>; + } // SchedModel Index: llvm/trunk/test/tools/llvm-mca/X86/BtVer2/zero-idioms-avx-256.s =================================================================== --- llvm/trunk/test/tools/llvm-mca/X86/BtVer2/zero-idioms-avx-256.s +++ llvm/trunk/test/tools/llvm-mca/X86/BtVer2/zero-idioms-avx-256.s @@ -0,0 +1,322 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -timeline -timeline-max-iterations=3 < %s | FileCheck %s + +# TODO: Fix the processor resource usage for zero-idiom YMM XOR instructions. +# Those vector XOR instructions should only consume 1cy of JFPU1 (instead +# of 2cy). + +# LLVM-MCA-BEGIN ZERO-IDIOM-1 + +vaddps %ymm0, %ymm0, %ymm1 +vxorps %ymm1, %ymm1, %ymm1 +vblendps $2, %ymm1, %ymm2, %ymm3 + +# LLVM-MCA-END + +# LLVM-MCA-BEGIN ZERO-IDIOM-2 + +vaddpd %ymm0, %ymm0, %ymm1 +vxorpd %ymm1, %ymm1, %ymm1 +vblendpd $2, %ymm1, %ymm2, %ymm3 + +# LLVM-MCA-END + +# LLVM-MCA-BEGIN ZERO-IDIOM-3 +vaddps %xmm0, %xmm1, %xmm2 +vandnps %xmm2, %xmm2, %xmm3 +# LLVM-MCA-END + +# LLVM-MCA-BEGIN ZERO-IDIOM-4 +vaddps %xmm0, %xmm1, %xmm2 +vandnps %xmm2, %xmm2, %xmm3 +# LLVM-MCA-END + +# CHECK: [0] Code Region - ZERO-IDIOM-1 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 300 +# CHECK-NEXT: Total Cycles: 306 +# CHECK-NEXT: Total uOps: 600 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 1.96 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 3.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 2 3 2.00 vaddps %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: 2 1 1.00 vxorps %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 2 1 1.00 vblendps $2, %ymm1, %ymm2, %ymm3 + +# CHECK: Resources: +# CHECK-NEXT: [0] - JALU0 +# CHECK-NEXT: [1] - JALU1 +# CHECK-NEXT: [2] - JDiv +# CHECK-NEXT: [3] - JFPA +# CHECK-NEXT: [4] - JFPM +# CHECK-NEXT: [5] - JFPU0 +# CHECK-NEXT: [6] - JFPU1 +# CHECK-NEXT: [7] - JLAGU +# CHECK-NEXT: [8] - JMul +# CHECK-NEXT: [9] - JSAGU +# CHECK-NEXT: [10] - JSTC +# CHECK-NEXT: [11] - JVALU0 +# CHECK-NEXT: [12] - JVALU1 +# CHECK-NEXT: [13] - JVIMUL + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: - - - 3.00 3.00 3.00 3.00 - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddps %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: - - - - 2.00 - 2.00 - - - - - - - vxorps %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vblendps $2, %ymm1, %ymm2, %ymm3 + +# CHECK: Timeline view: +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeER . . vaddps %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: [0,1] .DeE-R . . vxorps %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [0,2] . DeE-R . . vblendps $2, %ymm1, %ymm2, %ymm3 +# CHECK-NEXT: [1,0] . D=eeeER. . vaddps %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: [1,1] . DeE--R. . vxorps %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [1,2] . D=eE-R . vblendps $2, %ymm1, %ymm2, %ymm3 +# CHECK-NEXT: [2,0] . .DeeeER. vaddps %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: [2,1] . . D=eER. vxorps %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [2,2] . . D=eER vblendps $2, %ymm1, %ymm2, %ymm3 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 3 1.3 1.3 0.0 vaddps %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: 1. 3 1.3 1.3 1.0 vxorps %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 2. 3 1.7 0.3 0.7 vblendps $2, %ymm1, %ymm2, %ymm3 + +# CHECK: [1] Code Region - ZERO-IDIOM-2 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 300 +# CHECK-NEXT: Total Cycles: 306 +# CHECK-NEXT: Total uOps: 600 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 1.96 +# CHECK-NEXT: IPC: 0.98 +# CHECK-NEXT: Block RThroughput: 3.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 2 3 2.00 vaddpd %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: 2 1 1.00 vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 2 1 1.00 vblendpd $2, %ymm1, %ymm2, %ymm3 + +# CHECK: Resources: +# CHECK-NEXT: [0] - JALU0 +# CHECK-NEXT: [1] - JALU1 +# CHECK-NEXT: [2] - JDiv +# CHECK-NEXT: [3] - JFPA +# CHECK-NEXT: [4] - JFPM +# CHECK-NEXT: [5] - JFPU0 +# CHECK-NEXT: [6] - JFPU1 +# CHECK-NEXT: [7] - JLAGU +# CHECK-NEXT: [8] - JMul +# CHECK-NEXT: [9] - JSAGU +# CHECK-NEXT: [10] - JSTC +# CHECK-NEXT: [11] - JVALU0 +# CHECK-NEXT: [12] - JVALU1 +# CHECK-NEXT: [13] - JVIMUL + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: - - - 3.00 3.00 3.00 3.00 - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vaddpd %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: - - - - 2.00 - 2.00 - - - - - - - vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vblendpd $2, %ymm1, %ymm2, %ymm3 + +# CHECK: Timeline view: +# CHECK-NEXT: 012 +# CHECK-NEXT: Index 0123456789 + +# CHECK: [0,0] DeeeER . . vaddpd %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: [0,1] .DeE-R . . vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [0,2] . DeE-R . . vblendpd $2, %ymm1, %ymm2, %ymm3 +# CHECK-NEXT: [1,0] . D=eeeER. . vaddpd %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: [1,1] . DeE--R. . vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [1,2] . D=eE-R . vblendpd $2, %ymm1, %ymm2, %ymm3 +# CHECK-NEXT: [2,0] . .DeeeER. vaddpd %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: [2,1] . . D=eER. vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: [2,2] . . D=eER vblendpd $2, %ymm1, %ymm2, %ymm3 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 3 1.3 1.3 0.0 vaddpd %ymm0, %ymm0, %ymm1 +# CHECK-NEXT: 1. 3 1.3 1.3 1.0 vxorpd %ymm1, %ymm1, %ymm1 +# CHECK-NEXT: 2. 3 1.7 0.3 0.7 vblendpd $2, %ymm1, %ymm2, %ymm3 + +# CHECK: [2] Code Region - ZERO-IDIOM-3 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 200 +# CHECK-NEXT: Total Cycles: 105 +# CHECK-NEXT: Total uOps: 200 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 1.90 +# CHECK-NEXT: IPC: 1.90 +# CHECK-NEXT: Block RThroughput: 1.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 0 0.50 vandnps %xmm2, %xmm2, %xmm3 + +# CHECK: Resources: +# CHECK-NEXT: [0] - JALU0 +# CHECK-NEXT: [1] - JALU1 +# CHECK-NEXT: [2] - JDiv +# CHECK-NEXT: [3] - JFPA +# CHECK-NEXT: [4] - JFPM +# CHECK-NEXT: [5] - JFPU0 +# CHECK-NEXT: [6] - JFPU1 +# CHECK-NEXT: [7] - JLAGU +# CHECK-NEXT: [8] - JMul +# CHECK-NEXT: [9] - JSAGU +# CHECK-NEXT: [10] - JSTC +# CHECK-NEXT: [11] - JVALU0 +# CHECK-NEXT: [12] - JVALU1 +# CHECK-NEXT: [13] - JVIMUL + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - - - vandnps %xmm2, %xmm2, %xmm3 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 01234567 + +# CHECK: [0,0] DeeeER . vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [0,1] D----R . vandnps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [1,0] .DeeeER. vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [1,1] .D----R. vandnps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [2,0] . DeeeER vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [2,1] . D----R vandnps %xmm2, %xmm2, %xmm3 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 3 1.0 1.0 0.0 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1. 3 0.0 0.0 4.0 vandnps %xmm2, %xmm2, %xmm3 + +# CHECK: [3] Code Region - ZERO-IDIOM-4 + +# CHECK: Iterations: 100 +# CHECK-NEXT: Instructions: 200 +# CHECK-NEXT: Total Cycles: 105 +# CHECK-NEXT: Total uOps: 200 + +# CHECK: Dispatch Width: 2 +# CHECK-NEXT: uOps Per Cycle: 1.90 +# CHECK-NEXT: IPC: 1.90 +# CHECK-NEXT: Block RThroughput: 1.0 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 0 0.50 vandnps %xmm2, %xmm2, %xmm3 + +# CHECK: Resources: +# CHECK-NEXT: [0] - JALU0 +# CHECK-NEXT: [1] - JALU1 +# CHECK-NEXT: [2] - JDiv +# CHECK-NEXT: [3] - JFPA +# CHECK-NEXT: [4] - JFPM +# CHECK-NEXT: [5] - JFPU0 +# CHECK-NEXT: [6] - JFPU1 +# CHECK-NEXT: [7] - JLAGU +# CHECK-NEXT: [8] - JMul +# CHECK-NEXT: [9] - JSAGU +# CHECK-NEXT: [10] - JSTC +# CHECK-NEXT: [11] - JVALU0 +# CHECK-NEXT: [12] - JVALU1 +# CHECK-NEXT: [13] - JVIMUL + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: - - - 1.00 - 1.00 - - - - - - - - vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - - - - - - - - - - - - vandnps %xmm2, %xmm2, %xmm3 + +# CHECK: Timeline view: +# CHECK-NEXT: Index 01234567 + +# CHECK: [0,0] DeeeER . vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [0,1] D----R . vandnps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [1,0] .DeeeER. vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [1,1] .D----R. vandnps %xmm2, %xmm2, %xmm3 +# CHECK-NEXT: [2,0] . DeeeER vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: [2,1] . D----R vandnps %xmm2, %xmm2, %xmm3 + +# CHECK: Average Wait times (based on the timeline view): +# CHECK-NEXT: [0]: Executions +# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue +# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready +# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage + +# CHECK: [0] [1] [2] [3] +# CHECK-NEXT: 0. 3 1.0 1.0 0.0 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1. 3 0.0 0.0 4.0 vandnps %xmm2, %xmm2, %xmm3 Index: llvm/trunk/tools/llvm-mca/lib/InstrBuilder.cpp =================================================================== --- llvm/trunk/tools/llvm-mca/lib/InstrBuilder.cpp +++ llvm/trunk/tools/llvm-mca/lib/InstrBuilder.cpp @@ -424,9 +424,12 @@ std::unique_ptr NewIS = llvm::make_unique(D); // Check if this is a dependency breaking instruction. - bool IsDepBreaking = MCIA.isDependencyBreaking(STI, MCI); - // FIXME: this is a temporary hack to identify zero-idioms. - bool IsZeroIdiom = D.isZeroLatency() && IsDepBreaking; + APInt Mask; + + unsigned ProcID = STI.getSchedModel().getProcessorID(); + bool IsZeroIdiom = MCIA.isZeroIdiom(MCI, Mask, ProcID); + bool IsDepBreaking = + IsZeroIdiom || MCIA.isDependencyBreaking(MCI, Mask, ProcID); // Initialize Reads first. for (const ReadDescriptor &RD : D.Reads) { @@ -451,8 +454,25 @@ assert(RegID > 0 && "Invalid register ID found!"); auto RS = llvm::make_unique(RD, RegID); - if (IsDepBreaking && !RD.isImplicitRead()) - RS->setIndependentFromDef(); + if (IsDepBreaking) { + // A mask of all zeroes means: explicit input operands are not + // independent. + if (Mask.isNullValue()) { + if (!RD.isImplicitRead()) + RS->setIndependentFromDef(); + } else { + // Check if this register operand is independent according to `Mask`. + // Note that Mask may not have enough bits to describe all explicit and + // implicit input operands. If this register operand doesn't have a + // corresponding bit in Mask, then conservatively assume that it is + // dependent. + if (Mask.getBitWidth() > RD.UseIndex) { + // Okay. This map describe register use `RD.UseIndex`. + if (Mask[RD.UseIndex]) + RS->setIndependentFromDef(); + } + } + } NewIS->getUses().emplace_back(std::move(RS)); } Index: llvm/trunk/utils/TableGen/CodeGenSchedule.h =================================================================== --- llvm/trunk/utils/TableGen/CodeGenSchedule.h +++ llvm/trunk/utils/TableGen/CodeGenSchedule.h @@ -15,6 +15,7 @@ #ifndef LLVM_UTILS_TABLEGEN_CODEGENSCHEDULE_H #define LLVM_UTILS_TABLEGEN_CODEGENSCHEDULE_H +#include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/ErrorHandling.h" @@ -270,6 +271,137 @@ #endif }; +/// Used to correlate instructions to MCInstPredicates specified by +/// InstructionEquivalentClass tablegen definitions. +/// +/// Example: a XOR of a register with self, is a known zero-idiom for most +/// X86 processors. +/// +/// Each processor can use a (potentially different) InstructionEquivalenceClass +/// definition to classify zero-idioms. That means, XORrr is likely to appear +/// in more than one equivalence class (where each class definition is +/// contributed by a different processor). +/// +/// There is no guarantee that the same MCInstPredicate will be used to describe +/// equivalence classes that identify XORrr as a zero-idiom. +/// +/// To be more specific, the requirements for being a zero-idiom XORrr may be +/// different for different processors. +/// +/// Class PredicateInfo identifies a subset of processors that specify the same +/// requirements (i.e. same MCInstPredicate and OperandMask) for an instruction +/// opcode. +/// +/// Back to the example. Field `ProcModelMask` will have one bit set for every +/// processor model that sees XORrr as a zero-idiom, and that specifies the same +/// set of constraints. +/// +/// By construction, there can be multiple instances of PredicateInfo associated +/// with a same instruction opcode. For example, different processors may define +/// different constraints on the same opcode. +/// +/// Field OperandMask can be used as an extra constraint. +/// It may be used to describe conditions that appy only to a subset of the +/// operands of a machine instruction, and the operands subset may not be the +/// same for all processor models. +struct PredicateInfo { + llvm::APInt ProcModelMask; // A set of processor model indices. + llvm::APInt OperandMask; // An operand mask. + const Record *Predicate; // MCInstrPredicate definition. + PredicateInfo(llvm::APInt CpuMask, llvm::APInt Operands, const Record *Pred) + : ProcModelMask(CpuMask), OperandMask(Operands), Predicate(Pred) {} + + bool operator==(const PredicateInfo &Other) const { + return ProcModelMask == Other.ProcModelMask && + OperandMask == Other.OperandMask && Predicate == Other.Predicate; + } +}; + +/// A collection of PredicateInfo objects. +/// +/// There is at least one OpcodeInfo object for every opcode specified by a +/// TIPredicate definition. +class OpcodeInfo { + llvm::SmallVector Predicates; + + OpcodeInfo(const OpcodeInfo &Other) = delete; + OpcodeInfo &operator=(const OpcodeInfo &Other) = delete; + +public: + OpcodeInfo() = default; + OpcodeInfo &operator=(OpcodeInfo &&Other) = default; + OpcodeInfo(OpcodeInfo &&Other) = default; + + ArrayRef getPredicates() const { return Predicates; } + + void addPredicateForProcModel(const llvm::APInt &CpuMask, + const llvm::APInt &OperandMask, + const Record *Predicate); +}; + +/// Used to group together tablegen instruction definitions that are subject +/// to a same set of constraints (identified by an instance of OpcodeInfo). +class OpcodeGroup { + OpcodeInfo Info; + std::vector Opcodes; + + OpcodeGroup(const OpcodeGroup &Other) = delete; + OpcodeGroup &operator=(const OpcodeGroup &Other) = delete; + +public: + OpcodeGroup(OpcodeInfo &&OpInfo) : Info(std::move(OpInfo)) {} + OpcodeGroup(OpcodeGroup &&Other) = default; + + void addOpcode(const Record *Opcode) { + assert(std::find(Opcodes.begin(), Opcodes.end(), Opcode) == Opcodes.end() && + "Opcode already in set!"); + Opcodes.push_back(Opcode); + } + + ArrayRef getOpcodes() const { return Opcodes; } + const OpcodeInfo &getOpcodeInfo() const { return Info; } +}; + +/// An STIPredicateFunction descriptor used by tablegen backends to +/// auto-generate the body of a predicate function as a member of tablegen'd +/// class XXXGenSubtargetInfo. +class STIPredicateFunction { + const Record *FunctionDeclaration; + + std::vector Definitions; + std::vector Groups; + + STIPredicateFunction(const STIPredicateFunction &Other) = delete; + STIPredicateFunction &operator=(const STIPredicateFunction &Other) = delete; + +public: + STIPredicateFunction(const Record *Rec) : FunctionDeclaration(Rec) {} + STIPredicateFunction(STIPredicateFunction &&Other) = default; + + bool isCompatibleWith(const STIPredicateFunction &Other) const { + return FunctionDeclaration == Other.FunctionDeclaration; + } + + void addDefinition(const Record *Def) { Definitions.push_back(Def); } + void addOpcode(const Record *OpcodeRec, OpcodeInfo &&Info) { + if (Groups.empty() || + Groups.back().getOpcodeInfo().getPredicates() != Info.getPredicates()) + Groups.emplace_back(std::move(Info)); + Groups.back().addOpcode(OpcodeRec); + } + + StringRef getName() const { + return FunctionDeclaration->getValueAsString("Name"); + } + const Record *getDefaultReturnPredicate() const { + return FunctionDeclaration->getValueAsDef("DefaultReturnValue"); + } + + const Record *getDeclaration() const { return FunctionDeclaration; } + ArrayRef getDefinitions() const { return Definitions; } + ArrayRef getGroups() const { return Groups; } +}; + /// Top level container for machine model data. class CodeGenSchedModels { RecordKeeper &Records; @@ -303,6 +435,8 @@ using InstClassMapTy = DenseMap; InstClassMapTy InstrClassMap; + std::vector STIPredicates; + public: CodeGenSchedModels(RecordKeeper& RK, const CodeGenTarget &TGT); @@ -430,6 +564,9 @@ Record *findProcResUnits(Record *ProcResKind, const CodeGenProcModel &PM, ArrayRef Loc) const; + ArrayRef getSTIPredicates() const { + return STIPredicates; + } private: void collectProcModels(); @@ -467,6 +604,10 @@ void checkMCInstPredicates() const; + void checkSTIPredicates() const; + + void collectSTIPredicates(); + void checkCompleteness(); void inferFromRW(ArrayRef OperWrites, ArrayRef OperReads, Index: llvm/trunk/utils/TableGen/CodeGenSchedule.cpp =================================================================== --- llvm/trunk/utils/TableGen/CodeGenSchedule.cpp +++ llvm/trunk/utils/TableGen/CodeGenSchedule.cpp @@ -225,9 +225,221 @@ // Check MCInstPredicate definitions. checkMCInstPredicates(); + // Check STIPredicate definitions. + checkSTIPredicates(); + + // Find STIPredicate definitions for each processor model, and construct + // STIPredicateFunction objects. + collectSTIPredicates(); + checkCompleteness(); } +void CodeGenSchedModels::checkSTIPredicates() const { + DenseMap Declarations; + + // There cannot be multiple declarations with the same name. + const RecVec Decls = Records.getAllDerivedDefinitions("STIPredicateDecl"); + for (const Record *R : Decls) { + StringRef Name = R->getValueAsString("Name"); + const auto It = Declarations.find(Name); + if (It == Declarations.end()) { + Declarations[Name] = R; + continue; + } + + PrintError(R->getLoc(), "STIPredicate " + Name + " multiply declared."); + PrintNote(It->second->getLoc(), "Previous declaration was here."); + PrintFatalError(R->getLoc(), "Invalid STIPredicateDecl found."); + } + + // Disallow InstructionEquivalenceClasses with an empty instruction list. + const RecVec Defs = + Records.getAllDerivedDefinitions("InstructionEquivalenceClass"); + for (const Record *R : Defs) { + RecVec Opcodes = R->getValueAsListOfDefs("Opcodes"); + if (Opcodes.empty()) { + PrintFatalError(R->getLoc(), "Invalid InstructionEquivalenceClass " + "defined with an empty opcode list."); + } + } +} + +// Used by function `processSTIPredicate` to construct a mask of machine +// instruction operands. +static APInt constructOperandMask(ArrayRef Indices) { + APInt OperandMask; + if (Indices.empty()) + return OperandMask; + + int64_t MaxIndex = *std::max_element(Indices.begin(), Indices.end()); + assert(MaxIndex >= 0 && "Invalid negative indices in input!"); + OperandMask = OperandMask.zext(MaxIndex + 1); + for (const int64_t Index : Indices) { + assert(Index >= 0 && "Invalid negative indices!"); + OperandMask.setBit(Index); + } + + return OperandMask; +} + +static void +processSTIPredicate(STIPredicateFunction &Fn, + const DenseMap &ProcModelMap) { + DenseMap Opcode2Index; + using OpcodeMapPair = std::pair; + std::vector OpcodeMappings; + std::vector> OpcodeMasks; + + DenseMap Predicate2Index; + unsigned NumUniquePredicates = 0; + + // Number unique predicates and opcodes used by InstructionEquivalenceClass + // definitions. Each unique opcode will be associated with an OpcodeInfo + // object. + for (const Record *Def : Fn.getDefinitions()) { + RecVec Classes = Def->getValueAsListOfDefs("Classes"); + for (const Record *EC : Classes) { + const Record *Pred = EC->getValueAsDef("Predicate"); + if (Predicate2Index.find(Pred) == Predicate2Index.end()) + Predicate2Index[Pred] = NumUniquePredicates++; + + RecVec Opcodes = EC->getValueAsListOfDefs("Opcodes"); + for (const Record *Opcode : Opcodes) { + if (Opcode2Index.find(Opcode) == Opcode2Index.end()) { + Opcode2Index[Opcode] = OpcodeMappings.size(); + OpcodeMappings.emplace_back(Opcode, OpcodeInfo()); + } + } + } + } + + // Initialize vector `OpcodeMasks` with default values. We want to keep track + // of which processors "use" which opcodes. We also want to be able to + // identify predicates that are used by different processors for a same + // opcode. + // This information is used later on by this algorithm to sort OpcodeMapping + // elements based on their processor and predicate sets. + OpcodeMasks.resize(OpcodeMappings.size()); + APInt DefaultProcMask(ProcModelMap.size(), 0); + APInt DefaultPredMask(NumUniquePredicates, 0); + for (std::pair &MaskPair : OpcodeMasks) + MaskPair = std::make_pair(DefaultProcMask, DefaultPredMask); + + // Construct a OpcodeInfo object for every unique opcode declared by an + // InstructionEquivalenceClass definition. + for (const Record *Def : Fn.getDefinitions()) { + RecVec Classes = Def->getValueAsListOfDefs("Classes"); + const Record *SchedModel = Def->getValueAsDef("SchedModel"); + unsigned ProcIndex = ProcModelMap.find(SchedModel)->second; + APInt ProcMask(ProcModelMap.size(), 0); + ProcMask.setBit(ProcIndex); + + for (const Record *EC : Classes) { + RecVec Opcodes = EC->getValueAsListOfDefs("Opcodes"); + + std::vector OpIndices = + EC->getValueAsListOfInts("OperandIndices"); + APInt OperandMask = constructOperandMask(OpIndices); + + const Record *Pred = EC->getValueAsDef("Predicate"); + APInt PredMask(NumUniquePredicates, 0); + PredMask.setBit(Predicate2Index[Pred]); + + for (const Record *Opcode : Opcodes) { + unsigned OpcodeIdx = Opcode2Index[Opcode]; + if (OpcodeMasks[OpcodeIdx].first[ProcIndex]) { + std::string Message = + "Opcode " + Opcode->getName().str() + + " used by multiple InstructionEquivalenceClass definitions."; + PrintFatalError(EC->getLoc(), Message); + } + OpcodeMasks[OpcodeIdx].first |= ProcMask; + OpcodeMasks[OpcodeIdx].second |= PredMask; + OpcodeInfo &OI = OpcodeMappings[OpcodeIdx].second; + + OI.addPredicateForProcModel(ProcMask, OperandMask, Pred); + } + } + } + + // Sort OpcodeMappings elements based on their CPU and predicate masks. + // As a last resort, order elements by opcode identifier. + llvm::sort(OpcodeMappings.begin(), OpcodeMappings.end(), + [&](const OpcodeMapPair &Lhs, const OpcodeMapPair &Rhs) { + unsigned LhsIdx = Opcode2Index[Lhs.first]; + unsigned RhsIdx = Opcode2Index[Rhs.first]; + std::pair &LhsMasks = OpcodeMasks[LhsIdx]; + std::pair &RhsMasks = OpcodeMasks[RhsIdx]; + + if (LhsMasks.first != RhsMasks.first) { + if (LhsMasks.first.countPopulation() < + RhsMasks.first.countPopulation()) + return true; + return LhsMasks.first.countLeadingZeros() > + RhsMasks.first.countLeadingZeros(); + } + + if (LhsMasks.second != RhsMasks.second) { + if (LhsMasks.second.countPopulation() < + RhsMasks.second.countPopulation()) + return true; + return LhsMasks.second.countLeadingZeros() > + RhsMasks.second.countLeadingZeros(); + } + + return LhsIdx < RhsIdx; + }); + + // Now construct opcode groups. Groups are used by the SubtargetEmitter when + // expanding the body of a STIPredicate function. In particular, each opcode + // group is expanded into a sequence of labels in a switch statement. + // It identifies opcodes for which different processors define same predicates + // and same opcode masks. + for (OpcodeMapPair &Info : OpcodeMappings) + Fn.addOpcode(Info.first, std::move(Info.second)); +} + +void CodeGenSchedModels::collectSTIPredicates() { + // Map STIPredicateDecl records to elements of vector + // CodeGenSchedModels::STIPredicates. + DenseMap Decl2Index; + + RecVec RV = Records.getAllDerivedDefinitions("STIPredicate"); + for (const Record *R : RV) { + const Record *Decl = R->getValueAsDef("Declaration"); + + const auto It = Decl2Index.find(Decl); + if (It == Decl2Index.end()) { + Decl2Index[Decl] = STIPredicates.size(); + STIPredicateFunction Predicate(Decl); + Predicate.addDefinition(R); + STIPredicates.emplace_back(std::move(Predicate)); + continue; + } + + STIPredicateFunction &PreviousDef = STIPredicates[It->second]; + PreviousDef.addDefinition(R); + } + + for (STIPredicateFunction &Fn : STIPredicates) + processSTIPredicate(Fn, ProcModelMap); +} + +void OpcodeInfo::addPredicateForProcModel(const llvm::APInt &CpuMask, + const llvm::APInt &OperandMask, + const Record *Predicate) { + auto It = llvm::find_if( + Predicates, [&OperandMask, &Predicate](const PredicateInfo &P) { + return P.Predicate == Predicate && P.OperandMask == OperandMask; + }); + if (It == Predicates.end()) { + Predicates.emplace_back(CpuMask, OperandMask, Predicate); + return; + } + It->ProcModelMask |= CpuMask; +} + void CodeGenSchedModels::checkMCInstPredicates() const { RecVec MCPredicates = Records.getAllDerivedDefinitions("TIIPredicate"); if (MCPredicates.empty()) Index: llvm/trunk/utils/TableGen/PredicateExpander.h =================================================================== --- llvm/trunk/utils/TableGen/PredicateExpander.h +++ llvm/trunk/utils/TableGen/PredicateExpander.h @@ -43,14 +43,15 @@ bool shouldNegate() const { return NegatePredicate; } bool shouldExpandForMC() const { return ExpandForMC; } unsigned getIndentLevel() const { return IndentLevel; } + StringRef getTargetName() const { return TargetName; } void setByRef(bool Value) { EmitCallsByRef = Value; } void flipNegatePredicate() { NegatePredicate = !NegatePredicate; } void setNegatePredicate(bool Value) { NegatePredicate = Value; } void setExpandForMC(bool Value) { ExpandForMC = Value; } + void setIndentLevel(unsigned Level) { IndentLevel = Level; } void increaseIndentLevel() { ++IndentLevel; } void decreaseIndentLevel() { --IndentLevel; } - void setIndentLevel(unsigned Level) { IndentLevel = Level; } using RecVec = std::vector; void expandTrue(raw_ostream &OS); @@ -81,6 +82,36 @@ void expandStatement(raw_ostream &OS, const Record *Rec); }; +// Forward declarations. +class STIPredicateFunction; +class OpcodeGroup; + +class STIPredicateExpander : public PredicateExpander { + StringRef ClassPrefix; + bool ExpandDefinition; + + STIPredicateExpander(const PredicateExpander &) = delete; + STIPredicateExpander &operator=(const PredicateExpander &) = delete; + + void expandHeader(raw_ostream &OS, const STIPredicateFunction &Fn); + void expandPrologue(raw_ostream &OS, const STIPredicateFunction &Fn); + void expandOpcodeGroup(raw_ostream &OS, const OpcodeGroup &Group, + bool ShouldUpdateOpcodeMask); + void expandBody(raw_ostream &OS, const STIPredicateFunction &Fn); + void expandEpilogue(raw_ostream &OS, const STIPredicateFunction &Fn); + +public: + STIPredicateExpander(StringRef Target) + : PredicateExpander(Target), ClassPrefix(), ExpandDefinition(false) {} + + bool shouldExpandDefinition() const { return ExpandDefinition; } + StringRef getClassPrefix() const { return ClassPrefix; } + void setClassPrefix(StringRef S) { ClassPrefix = S; } + void setExpandDefinition(bool Value) { ExpandDefinition = Value; } + + void expandSTIPredicate(raw_ostream &OS, const STIPredicateFunction &Fn); +}; + } // namespace llvm #endif Index: llvm/trunk/utils/TableGen/PredicateExpander.cpp =================================================================== --- llvm/trunk/utils/TableGen/PredicateExpander.cpp +++ llvm/trunk/utils/TableGen/PredicateExpander.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "PredicateExpander.h" +#include "CodeGenSchedule.h" // Definition of STIPredicateFunction. namespace llvm { @@ -313,4 +314,158 @@ llvm_unreachable("No known rules to expand this MCInstPredicate"); } +void STIPredicateExpander::expandHeader(raw_ostream &OS, + const STIPredicateFunction &Fn) { + const Record *Rec = Fn.getDeclaration(); + StringRef FunctionName = Rec->getValueAsString("Name"); + + OS.indent(getIndentLevel() * 2); + OS << "bool "; + if (shouldExpandDefinition()) + OS << getClassPrefix() << "::"; + OS << FunctionName << "("; + if (shouldExpandForMC()) + OS << "const MCInst " << (isByRef() ? "&" : "*") << "MI"; + else + OS << "const MachineInstr " << (isByRef() ? "&" : "*") << "MI"; + if (Rec->getValueAsBit("UpdatesOpcodeMask")) + OS << ", APInt &Mask"; + OS << (shouldExpandForMC() ? ", unsigned ProcessorID) const " : ") const "); + if (shouldExpandDefinition()) { + OS << "{\n"; + return; + } + + if (Rec->getValueAsBit("OverridesBaseClassMember")) + OS << "override"; + OS << ";\n"; +} + +void STIPredicateExpander::expandPrologue(raw_ostream &OS, + const STIPredicateFunction &Fn) { + RecVec Delegates = Fn.getDeclaration()->getValueAsListOfDefs("Delegates"); + bool UpdatesOpcodeMask = + Fn.getDeclaration()->getValueAsBit("UpdatesOpcodeMask"); + + increaseIndentLevel(); + unsigned IndentLevel = getIndentLevel(); + for (const Record *Delegate : Delegates) { + OS.indent(IndentLevel * 2); + OS << "if (" << Delegate->getValueAsString("Name") << "(MI"; + if (UpdatesOpcodeMask) + OS << ", Mask"; + if (shouldExpandForMC()) + OS << ", ProcessorID"; + OS << "))\n"; + OS.indent((1 + IndentLevel) * 2); + OS << "return true;\n\n"; + } + + if (shouldExpandForMC()) + return; + + OS.indent(IndentLevel * 2); + OS << "unsigned ProcessorID = getSchedModel().getProcessorID();\n"; +} + +void STIPredicateExpander::expandOpcodeGroup(raw_ostream &OS, const OpcodeGroup &Group, + bool ShouldUpdateOpcodeMask) { + const OpcodeInfo &OI = Group.getOpcodeInfo(); + for (const PredicateInfo &PI : OI.getPredicates()) { + const APInt &ProcModelMask = PI.ProcModelMask; + bool FirstProcID = true; + for (unsigned I = 0, E = ProcModelMask.getActiveBits(); I < E; ++I) { + if (!ProcModelMask[I]) + continue; + + if (FirstProcID) { + OS.indent(getIndentLevel() * 2); + OS << "if (ProcessorID == " << I; + } else { + OS << " || ProcessorID == " << I; + } + FirstProcID = false; + } + + OS << ") {\n"; + + increaseIndentLevel(); + OS.indent(getIndentLevel() * 2); + if (ShouldUpdateOpcodeMask) { + if (PI.OperandMask.isNullValue()) + OS << "Mask.clearAllBits();\n"; + else + OS << "Mask = " << PI.OperandMask << ";\n"; + OS.indent(getIndentLevel() * 2); + } + OS << "return "; + expandPredicate(OS, PI.Predicate); + OS << ";\n"; + decreaseIndentLevel(); + OS.indent(getIndentLevel() * 2); + OS << "}\n"; + } +} + +void STIPredicateExpander::expandBody(raw_ostream &OS, + const STIPredicateFunction &Fn) { + bool UpdatesOpcodeMask = + Fn.getDeclaration()->getValueAsBit("UpdatesOpcodeMask"); + + unsigned IndentLevel = getIndentLevel(); + OS.indent(IndentLevel * 2); + OS << "switch(MI" << (isByRef() ? "." : "->") << "getOpcode()) {\n"; + OS.indent(IndentLevel * 2); + OS << "default:\n"; + OS.indent(IndentLevel * 2); + OS << " break;"; + + for (const OpcodeGroup &Group : Fn.getGroups()) { + for (const Record *Opcode : Group.getOpcodes()) { + OS << '\n'; + OS.indent(IndentLevel * 2); + OS << "case " << getTargetName() << "::" << Opcode->getName() << ":"; + } + + OS << '\n'; + increaseIndentLevel(); + expandOpcodeGroup(OS, Group, UpdatesOpcodeMask); + + OS.indent(getIndentLevel() * 2); + OS << "break;\n"; + decreaseIndentLevel(); + } + + OS.indent(IndentLevel * 2); + OS << "}\n"; +} + +void STIPredicateExpander::expandEpilogue(raw_ostream &OS, + const STIPredicateFunction &Fn) { + OS << '\n'; + OS.indent(getIndentLevel() * 2); + OS << "return "; + expandPredicate(OS, Fn.getDefaultReturnPredicate()); + OS << ";\n"; + + decreaseIndentLevel(); + OS.indent(getIndentLevel() * 2); + StringRef FunctionName = Fn.getDeclaration()->getValueAsString("Name"); + OS << "} // " << ClassPrefix << "::" << FunctionName << "\n\n"; +} + +void STIPredicateExpander::expandSTIPredicate(raw_ostream &OS, + const STIPredicateFunction &Fn) { + const Record *Rec = Fn.getDeclaration(); + if (shouldExpandForMC() && !Rec->getValueAsBit("ExpandForMC")) + return; + + expandHeader(OS, Fn); + if (shouldExpandDefinition()) { + expandPrologue(OS, Fn); + expandBody(OS, Fn); + expandEpilogue(OS, Fn); + } +} + } // namespace llvm Index: llvm/trunk/utils/TableGen/SubtargetEmitter.cpp =================================================================== --- llvm/trunk/utils/TableGen/SubtargetEmitter.cpp +++ llvm/trunk/utils/TableGen/SubtargetEmitter.cpp @@ -116,6 +116,7 @@ void emitSchedModelHelpersImpl(raw_ostream &OS, bool OnlyExpandMCInstPredicates = false); void emitGenMCSubtargetInfo(raw_ostream &OS); + void EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS); void EmitSchedModel(raw_ostream &OS); void EmitHwModeCheck(const std::string &ClassName, raw_ostream &OS); @@ -1672,7 +1673,16 @@ << " unsigned CPUID) const {\n" << " return " << Target << "_MC" << "::resolveVariantSchedClassImpl(SchedClass, MI, CPUID);\n" - << "} // " << ClassName << "::resolveVariantSchedClass\n"; + << "} // " << ClassName << "::resolveVariantSchedClass\n\n"; + + STIPredicateExpander PE(Target); + PE.setClassPrefix(ClassName); + PE.setExpandDefinition(true); + PE.setByRef(false); + PE.setIndentLevel(0); + + for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates()) + PE.expandSTIPredicate(OS, Fn); } void SubtargetEmitter::EmitHwModeCheck(const std::string &ClassName, @@ -1766,6 +1776,31 @@ OS << "};\n"; } +void SubtargetEmitter::EmitMCInstrAnalysisPredicateFunctions(raw_ostream &OS) { + OS << "\n#ifdef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n"; + OS << "#undef GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n\n"; + + STIPredicateExpander PE(Target); + PE.setExpandForMC(true); + PE.setByRef(true); + for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates()) + PE.expandSTIPredicate(OS, Fn); + + OS << "#endif // GET_STIPREDICATE_DECLS_FOR_MC_ANALYSIS\n\n"; + + OS << "\n#ifdef GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS\n"; + OS << "#undef GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS\n\n"; + + std::string ClassPrefix = Target + "MCInstrAnalysis"; + PE.setExpandDefinition(true); + PE.setClassPrefix(ClassPrefix); + PE.setIndentLevel(0); + for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates()) + PE.expandSTIPredicate(OS, Fn); + + OS << "#endif // GET_STIPREDICATE_DEFS_FOR_MC_ANALYSIS\n\n"; +} + // // SubtargetEmitter::run - Main subtarget enumeration emitter. // @@ -1863,6 +1898,12 @@ << " const;\n"; if (TGT.getHwModes().getNumModeIds() > 1) OS << " unsigned getHwMode() const override;\n"; + + STIPredicateExpander PE(Target); + PE.setByRef(false); + for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates()) + PE.expandSTIPredicate(OS, Fn); + OS << "};\n" << "} // end namespace llvm\n\n"; @@ -1920,6 +1961,8 @@ OS << "} // end namespace llvm\n\n"; OS << "#endif // GET_SUBTARGETINFO_CTOR\n\n"; + + EmitMCInstrAnalysisPredicateFunctions(OS); } namespace llvm {