Index: lib/Target/X86/CMakeLists.txt =================================================================== --- lib/Target/X86/CMakeLists.txt +++ lib/Target/X86/CMakeLists.txt @@ -24,7 +24,6 @@ X86FrameLowering.cpp X86ISelDAGToDAG.cpp X86ISelLowering.cpp - X86InstrFMA3Info.cpp X86InstrInfo.cpp X86MCInstLower.cpp X86MachineFunctionInfo.cpp Index: lib/Target/X86/MCTargetDesc/X86BaseInfo.h =================================================================== --- lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -51,6 +51,13 @@ TO_ZERO = 3, CUR_DIRECTION = 4 }; + + /// FMA3 form constants. + enum { + FMA3Form132 = 0, + FMA3Form213 = 1, + FMA3Form231 = 2, + }; } // end namespace X86; /// X86II - This namespace holds all of the target specific flags that @@ -553,9 +560,47 @@ /// Explicitly specified rounding control EVEX_RCShift = Has3DNow0F0FOpcodeShift + 1, - EVEX_RC = 1ULL << EVEX_RCShift + EVEX_RC = 1ULL << EVEX_RCShift, + + /// FMA3Form - If this an FMA3 instruction indicates whether this is the + /// 132, 213, or 231 form. 0 means non FMA instruction. + FMA3FormShift = EVEX_RCShift + 1, + FMA3FormMask = 3ULL << FMA3FormShift, + FMA3_132 = (uint64_t)(X86::FMA3Form132 + 1) << FMA3FormShift, + FMA3_213 = (uint64_t)(X86::FMA3Form213 + 1) << FMA3FormShift, + FMA3_231 = (uint64_t)(X86::FMA3Form231 + 1) << FMA3FormShift, + + /// FMA3Intrinsic - Indicates if this an FMA3 scalar intrinsic instruction. + FMA3IntrinsicShift = FMA3FormShift + 2, + FMA3IntrinsicMask = 1ULL << FMA3IntrinsicShift, }; + /// isFMA3 - Is this an FMA3 instruction. + inline bool isFMA3(uint64_t TSFlags) { + return (TSFlags & X86II::FMA3FormMask) != 0; + } + + /// getFMA3Form - Returns whether this a 132, 213, or 231 FMA3 form. + inline unsigned getFMA3Form(uint64_t TSFlags) { + assert(isFMA3(TSFlags) && "Not an FMA3 instruction?"); + return ((TSFlags & X86II::FMA3FormMask) >> FMA3FormShift) - 1; + } + + /// isFMA3Intrinsics - Is this an FMA3 scalar intrinsic instruction. + inline bool isFMA3Intrinsic(uint64_t TSFlags) { + return (TSFlags & X86II::FMA3IntrinsicMask) != 0; + } + + /// isKMasked - Is this a masked instruction. + inline bool isKMasked(uint64_t TSFlags) { + return (TSFlags & X86II::EVEX_K) != 0; + } + + /// isKMergedMasked - Is this a merge masked instruction. + inline bool isKMergeMasked(uint64_t TSFlags) { + return isKMasked(TSFlags) && (TSFlags & X86II::EVEX_Z) == 0; + } + // getBaseOpcodeFor - This function returns the "base" X86 opcode for the // specified machine instruction. // Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -4939,9 +4939,9 @@ multiclass avx512_fma3p_213_f opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd > { defm PS : avx512_fma3p_213_common; + avx512vl_f32_info, "PS">, FMA3_213; defm PD : avx512_fma3p_213_common, VEX_W; + avx512vl_f64_info, "PD">, VEX_W, FMA3_213; } defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>; @@ -5024,9 +5024,9 @@ multiclass avx512_fma3p_231_f opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd > { defm PS : avx512_fma3p_231_common; + avx512vl_f32_info, "PS">, FMA3_231; defm PD : avx512_fma3p_231_common, VEX_W; + avx512vl_f64_info, "PD">, VEX_W, FMA3_231; } defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>; @@ -5098,9 +5098,9 @@ multiclass avx512_fma3p_132_f opc, string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd > { defm PS : avx512_fma3p_132_common; + avx512vl_f32_info, "PS">, FMA3_132; defm PD : avx512_fma3p_132_common, VEX_W; + avx512vl_f64_info, "PD">, VEX_W, FMA3_132; } defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>; @@ -5115,6 +5115,7 @@ multiclass avx512_fma3s_common opc, string OpcodeStr, X86VectorVTInfo _, dag RHS_VEC_r, dag RHS_VEC_m, dag RHS_VEC_rb, dag RHS_r, dag RHS_m > { + let FMA3Intrinsic = 1 in { defm r_Int: AVX512_maskable_3src_scalar, AVX512FMA3Base; @@ -5127,6 +5128,7 @@ (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc), OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb, 1, 1>, AVX512FMA3Base, EVEX_B, EVEX_RC; + } let isCodeGenOnly = 1, isCommutable = 1 in { def r : AVX512FMA3; + (_.ScalarLdFrag addr:$src3))))>, FMA3_213; defm NAME#231#SUFF#Z: avx512_fma3s_common; + (_.ScalarLdFrag addr:$src3), _.FRC:$src1)))>, + FMA3_231; defm NAME#132#SUFF#Z: avx512_fma3s_common; + (_.ScalarLdFrag addr:$src3), _.FRC:$src2)))>, + FMA3_132; } multiclass avx512_fma3s opc213, bits<8> opc231, bits<8> opc132, Index: lib/Target/X86/X86InstrFMA.td =================================================================== --- lib/Target/X86/X86InstrFMA.td +++ lib/Target/X86/X86InstrFMA.td @@ -77,13 +77,16 @@ SDNode Op, ValueType OpTy128, ValueType OpTy256> { defm NAME#213#Suff : fma3p_rm; + MemFrag128, MemFrag256, OpTy128, OpTy256, Op>, + FMA3_213; defm NAME#132#Suff : fma3p_rm; + MemFrag128, MemFrag256, OpTy128, OpTy256>, + FMA3_132; defm NAME#231#Suff : fma3p_rm; + MemFrag128, MemFrag256, OpTy128, OpTy256>, + FMA3_231; } // Fused Multiply-Add @@ -171,7 +174,7 @@ // may be not implemented yet we allow the routines doing the actual commute // transformation to decide if one or another instruction is commutable or not. let Constraints = "$src1 = $dst", isCommutable = 1, isCodeGenOnly = 1, - hasSideEffects = 0 in + hasSideEffects = 0, FMA3Intrinsic = 1 in multiclass fma3s_rm_int opc, string OpcodeStr, Operand memopr, RegisterClass RC> { def r_Int : FMA3 { defm NAME#132#Suff : fma3s_rm; + x86memop, RC>, FMA3_132; defm NAME#213#Suff : fma3s_rm; + x86memop, RC, OpNode>, FMA3_213; defm NAME#231#Suff : fma3s_rm; + x86memop, RC>, FMA3_231; } // The FMA 213 form is created for lowering of scalar FMA intrinscis @@ -213,11 +216,11 @@ string OpStr, string PackTy, string Suff, RegisterClass RC, Operand memop> { defm NAME#132#Suff : fma3s_rm_int; + memop, RC>, FMA3_132; defm NAME#213#Suff : fma3s_rm_int; + memop, RC>, FMA3_213; defm NAME#231#Suff : fma3s_rm_int; + memop, RC>, FMA3_231; } multiclass fma3s opc132, bits<8> opc213, bits<8> opc231, Index: lib/Target/X86/X86InstrFMA3Info.h =================================================================== --- lib/Target/X86/X86InstrFMA3Info.h +++ lib/Target/X86/X86InstrFMA3Info.h @@ -7,309 +7,209 @@ // //===----------------------------------------------------------------------===// // -// This file contains the implementation of the classes providing information -// about existing X86 FMA3 opcodes, classifying and grouping them. +// This file contains tables groups FMA3 instructions together. // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_TARGET_X86_UTILS_X86INSTRFMA3INFO_H -#define LLVM_LIB_TARGET_X86_UTILS_X86INSTRFMA3INFO_H +#ifndef LLVM_LIB_TARGET_X86_X86INSTRFMA3INFO_H +#define LLVM_LIB_TARGET_X86_X86INSTRFMA3INFO_H -#include "X86.h" -#include "llvm/ADT/DenseMap.h" +#include "MCTargetDesc/X86BaseInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include #include -#include using namespace llvm; -/// This class is used to group {132, 213, 231} forms of FMA opcodes together. -/// Each of the groups has either 3 register opcodes, 3 memory opcodes, -/// or 6 register and memory opcodes. Also, each group has an attrubutes field -/// describing it. -class X86InstrFMA3Group { -private: - /// Reference to an array holding 3 forms of register FMA opcodes. - /// It may be set to nullptr if the group of FMA opcodes does not have - /// any register form opcodes. - const uint16_t *RegOpcodes; - - /// Reference to an array holding 3 forms of memory FMA opcodes. - /// It may be set to nullptr if the group of FMA opcodes does not have - /// any register form opcodes. - const uint16_t *MemOpcodes; - - /// This bitfield specifies the attributes associated with the created - /// FMA groups of opcodes. - unsigned Attributes; - - static const unsigned Form132 = 0; - static const unsigned Form213 = 1; - static const unsigned Form231 = 2; - -public: - /// This bit must be set in the 'Attributes' field of FMA group if such - /// group of FMA opcodes consists of FMA intrinsic opcodes. - static const unsigned X86FMA3Intrinsic = 0x1; - - /// This bit must be set in the 'Attributes' field of FMA group if such - /// group of FMA opcodes consists of AVX512 opcodes accepting a k-mask and - /// passing the elements from the 1st operand to the result of the operation - /// when the correpondings bits in the k-mask are unset. - static const unsigned X86FMA3KMergeMasked = 0x2; - - /// This bit must be set in the 'Attributes' field of FMA group if such - /// group of FMA opcodes consists of AVX512 opcodes accepting a k-zeromask. - static const unsigned X86FMA3KZeroMasked = 0x4; - - /// Constructor. Creates a new group of FMA opcodes with three register form - /// FMA opcodes \p RegOpcodes and three memory form FMA opcodes \p MemOpcodes. - /// The parameters \p RegOpcodes and \p MemOpcodes may be set to nullptr, - /// which means that the created group of FMA opcodes does not have the - /// corresponding (register or memory) opcodes. - /// The parameter \p Attr specifies the attributes describing the created - /// group. - X86InstrFMA3Group(const uint16_t *RegOpcodes, const uint16_t *MemOpcodes, - unsigned Attr) - : RegOpcodes(RegOpcodes), MemOpcodes(MemOpcodes), Attributes(Attr) { - assert((RegOpcodes || MemOpcodes) && - "Cannot create a group not having any opcodes."); - } - - /// Returns a memory form opcode that is the equivalent of the given register - /// form opcode \p RegOpcode. 0 is returned if the group does not have - /// either register of memory opcodes. - unsigned getMemOpcode(unsigned RegOpcode) const { - if (!RegOpcodes || !MemOpcodes) - return 0; - for (unsigned Form = 0; Form < 3; Form++) - if (RegOpcodes[Form] == RegOpcode) - return MemOpcodes[Form]; - return 0; - } - - /// Returns the 132 form of FMA register opcode. - unsigned getReg132Opcode() const { - assert(RegOpcodes && "The group does not have register opcodes."); - return RegOpcodes[Form132]; - } - - /// Returns the 213 form of FMA register opcode. - unsigned getReg213Opcode() const { - assert(RegOpcodes && "The group does not have register opcodes."); - return RegOpcodes[Form213]; - } - - /// Returns the 231 form of FMA register opcode. - unsigned getReg231Opcode() const { - assert(RegOpcodes && "The group does not have register opcodes."); - return RegOpcodes[Form231]; - } - - /// Returns the 132 form of FMA memory opcode. - unsigned getMem132Opcode() const { - assert(MemOpcodes && "The group does not have memory opcodes."); - return MemOpcodes[Form132]; - } - - /// Returns the 213 form of FMA memory opcode. - unsigned getMem213Opcode() const { - assert(MemOpcodes && "The group does not have memory opcodes."); - return MemOpcodes[Form213]; - } - - /// Returns the 231 form of FMA memory opcode. - unsigned getMem231Opcode() const { - assert(MemOpcodes && "The group does not have memory opcodes."); - return MemOpcodes[Form231]; - } - - /// Returns true iff the group of FMA opcodes holds intrinsic opcodes. - bool isIntrinsic() const { return (Attributes & X86FMA3Intrinsic) != 0; } - - /// Returns true iff the group of FMA opcodes holds k-merge-masked opcodes. - bool isKMergeMasked() const { - return (Attributes & X86FMA3KMergeMasked) != 0; - } - - /// Returns true iff the group of FMA opcodes holds k-zero-masked opcodes. - bool isKZeroMasked() const { return (Attributes & X86FMA3KZeroMasked) != 0; } - - /// Returns true iff the group of FMA opcodes holds any of k-masked opcodes. - bool isKMasked() const { - return (Attributes & (X86FMA3KMergeMasked | X86FMA3KZeroMasked)) != 0; - } - - /// Returns true iff the given \p Opcode is a register opcode from the - /// groups of FMA opcodes. - bool isRegOpcodeFromGroup(unsigned Opcode) const { - if (!RegOpcodes) - return false; - for (unsigned Form = 0; Form < 3; Form++) - if (Opcode == RegOpcodes[Form]) - return true; - return false; - } - - /// Returns true iff the given \p Opcode is a memory opcode from the - /// groups of FMA opcodes. - bool isMemOpcodeFromGroup(unsigned Opcode) const { - if (!MemOpcodes) - return false; - for (unsigned Form = 0; Form < 3; Form++) - if (Opcode == MemOpcodes[Form]) - return true; - return false; - } +#define FMA3SET(Name, Suffix) \ + { { X86::Name##132##Suffix, X86::Name##213##Suffix, X86::Name##231##Suffix } }, + +#define FMA3_PACKED_SIZES_AVX512(Name, Type, Suffix) \ + FMA3SET(Name, Type##Z128##Suffix) \ + FMA3SET(Name, Type##Z128##Suffix##k) \ + FMA3SET(Name, Type##Z128##Suffix##kz) \ + FMA3SET(Name, Type##Z256##Suffix) \ + FMA3SET(Name, Type##Z256##Suffix##k) \ + FMA3SET(Name, Type##Z256##Suffix##kz) \ + FMA3SET(Name, Type##Z##Suffix) \ + FMA3SET(Name, Type##Z##Suffix##k) \ + FMA3SET(Name, Type##Z##Suffix##kz) + +#define FMA3_PACKED_GROUP_AVX512(Name, Suffix) \ + FMA3_PACKED_SIZES_AVX512(Name, PD, Suffix) \ + FMA3_PACKED_SIZES_AVX512(Name, PS, Suffix) + +#define FMA3_FULL_GROUP_AVX512(Name, Suffix) \ + FMA3_PACKED_GROUP_AVX512(Name, Suffix) + +#define FMA3_PACKED_SIZES_MASKED(Name, Type, Suffix) \ + FMA3SET(Name, Type##Z128##Suffix##k) \ + FMA3SET(Name, Type##Z128##Suffix##kz) \ + FMA3SET(Name, Type##Z256##Suffix##k) \ + FMA3SET(Name, Type##Z256##Suffix##kz) \ + FMA3SET(Name, Type##Z##Suffix##k) \ + FMA3SET(Name, Type##Z##Suffix##kz) + +#define FMA3_PACKED_GROUP_MASKED(Name, Suffix) \ + FMA3_PACKED_SIZES_MASKED(Name, PD, Suffix) \ + FMA3_PACKED_SIZES_MASKED(Name, PS, Suffix) + +#define FMA3_FULL_GROUP_MASKED(Name, Suffix) \ + FMA3_PACKED_GROUP_MASKED(Name, Suffix) \ + FMA3SET(Name, SDZ##Suffix##_Intk) \ + FMA3SET(Name, SDZ##Suffix##_Intkz) \ + FMA3SET(Name, SSZ##Suffix##_Intk) \ + FMA3SET(Name, SSZ##Suffix##_Intkz) + +#define FMA3_OPCODES_MASKED(Suffix) \ + FMA3_FULL_GROUP_MASKED(VFMADD, Suffix) \ + FMA3_PACKED_GROUP_MASKED(VFMADDSUB, Suffix) \ + FMA3_FULL_GROUP_MASKED(VFMSUB, Suffix) \ + FMA3_PACKED_GROUP_MASKED(VFMSUBADD, Suffix) \ + FMA3_FULL_GROUP_MASKED(VFNMADD, Suffix) \ + FMA3_FULL_GROUP_MASKED(VFNMSUB, Suffix) + +#define FMA3_PACKED_GROUP_ROUND(Name, Suffix) \ + FMA3SET(Name, PDZ##Suffix) \ + FMA3SET(Name, PDZ##Suffix##k) \ + FMA3SET(Name, PDZ##Suffix##kz) \ + FMA3SET(Name, PSZ##Suffix) \ + FMA3SET(Name, PSZ##Suffix##k) \ + FMA3SET(Name, PSZ##Suffix##kz) + +#define FMA3_FULL_GROUP_ROUND(Name, Suffix) \ + FMA3_PACKED_GROUP_ROUND(Name, Suffix) \ + FMA3SET(Name, SDZ##Suffix##_Int) \ + FMA3SET(Name, SDZ##Suffix##_Intk) \ + FMA3SET(Name, SDZ##Suffix##_Intkz) \ + FMA3SET(Name, SSZ##Suffix##_Int) \ + FMA3SET(Name, SSZ##Suffix##_Intk) \ + FMA3SET(Name, SSZ##Suffix##_Intkz) + +#define FMA3_PACKED_SIZES(Name, Type, Suffix) \ + FMA3SET(Name, Type##Y##Suffix) \ + FMA3SET(Name, Type##Z128##Suffix) \ + FMA3SET(Name, Type##Z256##Suffix) \ + FMA3SET(Name, Type##Z##Suffix) \ + FMA3SET(Name, Type##Suffix) + +#define FMA3_SCALAR_SIZES(Name, Type, Suffix) \ + FMA3SET(Name, Type##Z##Suffix) \ + FMA3SET(Name, Type##Z##Suffix##_Int) \ + FMA3SET(Name, Type##Suffix) \ + FMA3SET(Name, Type##Suffix##_Int) + +#define FMA3_PACKED_GROUP(Name, Suffix) \ + FMA3_PACKED_SIZES(Name, PD, Suffix) \ + FMA3_PACKED_SIZES(Name, PS, Suffix) + +#define FMA3_FULL_GROUP(Name, Suffix) \ + FMA3_PACKED_GROUP(Name, Suffix) \ + FMA3_SCALAR_SIZES(Name, SD, Suffix) \ + FMA3_SCALAR_SIZES(Name, SS, Suffix) + +#define FMA3_OPCODES(Suffix) \ + FMA3_FULL_GROUP(VFMADD, Suffix) \ + FMA3_PACKED_GROUP(VFMADDSUB, Suffix) \ + FMA3_FULL_GROUP(VFMSUB, Suffix) \ + FMA3_PACKED_GROUP(VFMSUBADD, Suffix) \ + FMA3_FULL_GROUP(VFNMADD, Suffix) \ + FMA3_FULL_GROUP(VFNMSUB, Suffix) + +// All of the simple unmasked register opcodes. +static const X86FMA3Group FMA3RegOpcodes[] = { + FMA3_OPCODES(r) }; -/// This class provides information about all existing FMA3 opcodes -/// -class X86InstrFMA3Info { -private: - /// A map that is used to find the group of FMA opcodes using any FMA opcode - /// from the group. - DenseMap OpcodeToGroup; - - /// Creates groups of FMA opcodes and initializes Opcode-to-Group map. - /// This method can be called many times, but the actual initialization is - /// called only once. - static void initGroupsOnce(); - - /// Creates groups of FMA opcodes and initializes Opcode-to-Group map. - /// This method must be called ONLY from initGroupsOnce(). Otherwise, such - /// call is not thread safe. - void initGroupsOnceImpl(); - - /// Creates one group of FMA opcodes having the register opcodes - /// \p RegOpcodes and memory opcodes \p MemOpcodes. The parameter \p Attr - /// specifies the attributes describing the created group. - void initRMGroup(const uint16_t *RegOpcodes, - const uint16_t *MemOpcodes, unsigned Attr = 0); - - /// Creates one group of FMA opcodes having only the register opcodes - /// \p RegOpcodes. The parameter \p Attr specifies the attributes describing - /// the created group. - void initRGroup(const uint16_t *RegOpcodes, unsigned Attr = 0); - - /// Creates one group of FMA opcodes having only the memory opcodes - /// \p MemOpcodes. The parameter \p Attr specifies the attributes describing - /// the created group. - void initMGroup(const uint16_t *MemOpcodes, unsigned Attr = 0); - -public: - /// Returns the reference to an object of this class. It is assumed that - /// only one object may exist. - static X86InstrFMA3Info *getX86InstrFMA3Info(); - - /// Constructor. Just creates an object of the class. - X86InstrFMA3Info() {} - - /// Destructor. Deallocates the memory used for FMA3 Groups. - ~X86InstrFMA3Info() { - std::set DeletedGroups; - auto E = OpcodeToGroup.end(); - for (auto I = OpcodeToGroup.begin(); I != E; I++) { - const X86InstrFMA3Group *G = I->second; - if (DeletedGroups.find(G) == DeletedGroups.end()) { - DeletedGroups.insert(G); - delete G; - } - } - } - - /// Returns a reference to a group of FMA3 opcodes to where the given - /// \p Opcode is included. If the given \p Opcode is not recognized as FMA3 - /// and not included into any FMA3 group, then nullptr is returned. - static const X86InstrFMA3Group *getFMA3Group(unsigned Opcode) { - // Ensure that the groups of opcodes are initialized. - initGroupsOnce(); - - // Find the group including the given opcode. - const X86InstrFMA3Info *FMA3Info = getX86InstrFMA3Info(); - auto I = FMA3Info->OpcodeToGroup.find(Opcode); - if (I == FMA3Info->OpcodeToGroup.end()) - return nullptr; - - return I->second; - } - - /// Returns true iff the given \p Opcode is recognized as FMA3 by this class. - static bool isFMA3(unsigned Opcode) { - return getFMA3Group(Opcode) != nullptr; - } - - /// Iterator that is used to walk on FMA register opcodes having memory - /// form equivalents. - class rm_iterator { - private: - /// Iterator associated with the OpcodeToGroup map. It must always be - /// initialized with an entry from OpcodeToGroup for which I->first - /// points to a register FMA opcode and I->second points to a group of - /// FMA opcodes having memory form equivalent of I->first. - DenseMap::const_iterator I; - - public: - /// Constructor. Creates rm_iterator. The parameter \p I must be an - /// iterator to OpcodeToGroup map entry having I->first pointing to - /// register form FMA opcode and I->second pointing to a group of FMA - /// opcodes holding memory form equivalent for I->fist. - rm_iterator(DenseMap::const_iterator I) - : I(I) {} - - /// Returns the register form FMA opcode. - unsigned getRegOpcode() const { return I->first; }; - - /// Returns the memory form equivalent opcode for FMA register opcode - /// referenced by I->first. - unsigned getMemOpcode() const { - unsigned Opcode = I->first; - const X86InstrFMA3Group *Group = I->second; - return Group->getMemOpcode(Opcode); - } - - /// Returns a reference to a group of FMA opcodes. - const X86InstrFMA3Group *getGroup() const { return I->second; } +// All of the simple unmasked memory opcodes. +static const X86FMA3Group FMA3MemOpcodes[] = { + FMA3_OPCODES(m) +}; - bool operator==(const rm_iterator &OtherIt) const { return I == OtherIt.I; } - bool operator!=(const rm_iterator &OtherIt) const { return I != OtherIt.I; } +// All of the simple masked register opcodes. +static const X86FMA3Group FMA3RegMaskedOpcodes[] = { + FMA3_OPCODES_MASKED(r) +}; - /// Increment. Advances the 'I' iterator to the next OpcodeToGroup entry - /// having I->first pointing to register form FMA and I->second pointing - /// to a group of FMA opcodes holding memory form equivalen for I->first. - rm_iterator &operator++() { - auto E = getX86InstrFMA3Info()->OpcodeToGroup.end(); - for (++I; I != E; ++I) { - unsigned RegOpcode = I->first; - const X86InstrFMA3Group *Group = I->second; - if (Group->getMemOpcode(RegOpcode) != 0) - break; - } - return *this; - } - }; +// All of the simple masked memory opcodes. +static const X86FMA3Group FMA3MemMaskedOpcodes[] = { + FMA3_OPCODES_MASKED(m) +}; - /// Returns rm_iterator pointing to the first entry of OpcodeToGroup map - /// with a register FMA opcode having memory form opcode equivalent. - static rm_iterator rm_begin() { - initGroupsOnce(); - const X86InstrFMA3Info *FMA3Info = getX86InstrFMA3Info(); - auto I = FMA3Info->OpcodeToGroup.begin(); - auto E = FMA3Info->OpcodeToGroup.end(); - while (I != E) { - unsigned Opcode = I->first; - const X86InstrFMA3Group *G = I->second; - if (G->getMemOpcode(Opcode) != 0) - break; - I++; - } - return rm_iterator(I); - } +// All of the opcodes with builtin rounding control. +static const X86FMA3Group FMA3RoundOpcodes[] = { + FMA3_FULL_GROUP_ROUND(VFMADD, rb) \ + FMA3_PACKED_GROUP_ROUND(VFMADDSUB, rb) \ + FMA3_FULL_GROUP_ROUND(VFMSUB, rb) \ + FMA3_PACKED_GROUP_ROUND(VFMSUBADD, rb) \ + FMA3_FULL_GROUP_ROUND(VFNMADD, rb) \ + FMA3_FULL_GROUP_ROUND(VFNMSUB, rb) +}; - /// Returns the last rm_iterator. - static rm_iterator rm_end() { - initGroupsOnce(); - return rm_iterator(getX86InstrFMA3Info()->OpcodeToGroup.end()); - } +// All of the broadcast opcodes. +static const X86FMA3Group FMA3BroadcastOpcodes[] = { + FMA3_FULL_GROUP_AVX512(VFMADD, mb) \ + FMA3_PACKED_GROUP_AVX512(VFMADDSUB, mb) \ + FMA3_FULL_GROUP_AVX512(VFMSUB, mb) \ + FMA3_PACKED_GROUP_AVX512(VFMSUBADD, mb) \ + FMA3_FULL_GROUP_AVX512(VFNMADD, mb) \ + FMA3_FULL_GROUP_AVX512(VFNMSUB, mb) }; +static const X86FMA3Group *getFMA3Group(unsigned Opcode, uint64_t TSFlags) { + if (!X86II::isFMA3(TSFlags)) + return nullptr; + + bool IsMem = X86II::getMemoryOperandNo(TSFlags) != -1; + + // Determine which array we need to search based on a few attributes. + ArrayRef Groups; + if (TSFlags & X86II::EVEX_B) { + if (IsMem) + Groups = FMA3BroadcastOpcodes; + else + Groups = FMA3RoundOpcodes; + } else if (X86II::isKMasked(TSFlags)) { + if (IsMem) + Groups = FMA3MemMaskedOpcodes; + else + Groups = FMA3RegMaskedOpcodes; + } else { + if (IsMem) + Groups = FMA3MemOpcodes; + else + Groups = FMA3RegOpcodes; + } + + unsigned FMA3Form = X86II::getFMA3Form(TSFlags); + + auto I = std::lower_bound(Groups.begin(), Groups.end(), + Opcode, + [&](const X86FMA3Group &Group, unsigned Opcode) { + return Group.Opcodes[FMA3Form] < Opcode; + }); + assert(I != Groups.end() && I->Opcodes[FMA3Form] == Opcode && + "Couldn't find FMA3 opcode!"); + return I; +} + +static void verifyFMA3Tables() { + assert((array_lengthof(FMA3RegOpcodes) == array_lengthof(FMA3MemOpcodes)) && + (array_lengthof(FMA3RegMaskedOpcodes) == + array_lengthof(FMA3MemMaskedOpcodes)) && + "FMA3 reg and mem opcodes tables should be the same size"); + assert(std::is_sorted(std::begin(FMA3RegOpcodes), std::end(FMA3RegOpcodes)) && + std::is_sorted(std::begin(FMA3RegMaskedOpcodes), + std::end(FMA3RegMaskedOpcodes)) && + std::is_sorted(std::begin(FMA3RoundOpcodes), + std::end(FMA3RoundOpcodes)) && + std::is_sorted(std::begin(FMA3MemOpcodes), std::end(FMA3MemOpcodes)) && + std::is_sorted(std::begin(FMA3MemMaskedOpcodes), + std::end(FMA3MemMaskedOpcodes)) && + std::is_sorted(std::begin(FMA3BroadcastOpcodes), + std::end(FMA3BroadcastOpcodes)) && + "FMA3 arrays should be sorted by opcode!"); +} + #endif Index: lib/Target/X86/X86InstrFMA3Info.cpp =================================================================== --- lib/Target/X86/X86InstrFMA3Info.cpp +++ /dev/null @@ -1,284 +0,0 @@ -//===-- X86InstrFMA3Info.cpp - X86 FMA3 Instruction Information -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the implementation of the classes providing information -// about existing X86 FMA3 opcodes, classifying and grouping them. -// -//===----------------------------------------------------------------------===// - -#include "X86InstrFMA3Info.h" -#include "X86InstrInfo.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Threading.h" - -/// This flag is used in the method llvm::call_once() used below to make the -/// initialization of the map 'OpcodeToGroup' thread safe. -LLVM_DEFINE_ONCE_FLAG(InitGroupsOnceFlag); - -static ManagedStatic X86InstrFMA3InfoObj; -X86InstrFMA3Info *X86InstrFMA3Info::getX86InstrFMA3Info() { - return &*X86InstrFMA3InfoObj; -} - -void X86InstrFMA3Info::initRMGroup(const uint16_t *RegOpcodes, - const uint16_t *MemOpcodes, unsigned Attr) { - // Create a new instance of this class that would hold a group of FMA opcodes. - X86InstrFMA3Group *G = new X86InstrFMA3Group(RegOpcodes, MemOpcodes, Attr); - - // Add the references from indvidual opcodes to the group holding them. - assert((!OpcodeToGroup[RegOpcodes[0]] && !OpcodeToGroup[RegOpcodes[1]] && - !OpcodeToGroup[RegOpcodes[2]] && !OpcodeToGroup[MemOpcodes[0]] && - !OpcodeToGroup[MemOpcodes[1]] && !OpcodeToGroup[MemOpcodes[2]]) && - "Duplication or rewrite of elements in OpcodeToGroup."); - OpcodeToGroup[RegOpcodes[0]] = G; - OpcodeToGroup[RegOpcodes[1]] = G; - OpcodeToGroup[RegOpcodes[2]] = G; - OpcodeToGroup[MemOpcodes[0]] = G; - OpcodeToGroup[MemOpcodes[1]] = G; - OpcodeToGroup[MemOpcodes[2]] = G; -} - -void X86InstrFMA3Info::initRGroup(const uint16_t *RegOpcodes, unsigned Attr) { - // Create a new instance of this class that would hold a group of FMA opcodes. - X86InstrFMA3Group *G = new X86InstrFMA3Group(RegOpcodes, nullptr, Attr); - - // Add the references from indvidual opcodes to the group holding them. - assert((!OpcodeToGroup[RegOpcodes[0]] && !OpcodeToGroup[RegOpcodes[1]] && - !OpcodeToGroup[RegOpcodes[2]]) && - "Duplication or rewrite of elements in OpcodeToGroup."); - OpcodeToGroup[RegOpcodes[0]] = G; - OpcodeToGroup[RegOpcodes[1]] = G; - OpcodeToGroup[RegOpcodes[2]] = G; -} - -void X86InstrFMA3Info::initMGroup(const uint16_t *MemOpcodes, unsigned Attr) { - // Create a new instance of this class that would hold a group of FMA opcodes. - X86InstrFMA3Group *G = new X86InstrFMA3Group(nullptr, MemOpcodes, Attr); - - // Add the references from indvidual opcodes to the group holding them. - assert((!OpcodeToGroup[MemOpcodes[0]] && !OpcodeToGroup[MemOpcodes[1]] && - !OpcodeToGroup[MemOpcodes[2]]) && - "Duplication or rewrite of elements in OpcodeToGroup."); - OpcodeToGroup[MemOpcodes[0]] = G; - OpcodeToGroup[MemOpcodes[1]] = G; - OpcodeToGroup[MemOpcodes[2]] = G; -} - -#define FMA3RM(R132, R213, R231, M132, M213, M231) \ - static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231}; \ - static const uint16_t Mem##R132[3] = {X86::M132, X86::M213, X86::M231}; \ - initRMGroup(Reg##R132, Mem##R132); - -#define FMA3RMA(R132, R213, R231, M132, M213, M231, Attrs) \ - static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231}; \ - static const uint16_t Mem##R132[3] = {X86::M132, X86::M213, X86::M231}; \ - initRMGroup(Reg##R132, Mem##R132, (Attrs)); - -#define FMA3R(R132, R213, R231) \ - static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231}; \ - initRGroup(Reg##R132); - -#define FMA3RA(R132, R213, R231, Attrs) \ - static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231}; \ - initRGroup(Reg##R132, (Attrs)); - -#define FMA3M(M132, M213, M231) \ - static const uint16_t Mem##M132[3] = {X86::M132, X86::M213, X86::M231}; \ - initMGroup(Mem##M132); - -#define FMA3MA(M132, M213, M231, Attrs) \ - static const uint16_t Mem##M132[3] = {X86::M132, X86::M213, X86::M231}; \ - initMGroup(Mem##M132, (Attrs)); - -#define FMA3_AVX2_VECTOR_GROUP(Name) \ - FMA3RM(Name##132PSr, Name##213PSr, Name##231PSr, \ - Name##132PSm, Name##213PSm, Name##231PSm); \ - FMA3RM(Name##132PDr, Name##213PDr, Name##231PDr, \ - Name##132PDm, Name##213PDm, Name##231PDm); \ - FMA3RM(Name##132PSYr, Name##213PSYr, Name##231PSYr, \ - Name##132PSYm, Name##213PSYm, Name##231PSYm); \ - FMA3RM(Name##132PDYr, Name##213PDYr, Name##231PDYr, \ - Name##132PDYm, Name##213PDYm, Name##231PDYm); - -#define FMA3_AVX2_SCALAR_GROUP(Name) \ - FMA3RM(Name##132SSr, Name##213SSr, Name##231SSr, \ - Name##132SSm, Name##213SSm, Name##231SSm); \ - FMA3RM(Name##132SDr, Name##213SDr, Name##231SDr, \ - Name##132SDm, Name##213SDm, Name##231SDm); \ - FMA3RMA(Name##132SSr_Int, Name##213SSr_Int, Name##231SSr_Int, \ - Name##132SSm_Int, Name##213SSm_Int, Name##231SSm_Int, \ - X86InstrFMA3Group::X86FMA3Intrinsic); \ - FMA3RMA(Name##132SDr_Int, Name##213SDr_Int, Name##231SDr_Int, \ - Name##132SDm_Int, Name##213SDm_Int, Name##231SDm_Int, \ - X86InstrFMA3Group::X86FMA3Intrinsic); - -#define FMA3_AVX2_FULL_GROUP(Name) \ - FMA3_AVX2_VECTOR_GROUP(Name); \ - FMA3_AVX2_SCALAR_GROUP(Name); - -#define FMA3_AVX512_VECTOR_GROUP(Name) \ - FMA3RM(Name##132PSZ128r, Name##213PSZ128r, Name##231PSZ128r, \ - Name##132PSZ128m, Name##213PSZ128m, Name##231PSZ128m); \ - FMA3RM(Name##132PDZ128r, Name##213PDZ128r, Name##231PDZ128r, \ - Name##132PDZ128m, Name##213PDZ128m, Name##231PDZ128m); \ - FMA3RM(Name##132PSZ256r, Name##213PSZ256r, Name##231PSZ256r, \ - Name##132PSZ256m, Name##213PSZ256m, Name##231PSZ256m); \ - FMA3RM(Name##132PDZ256r, Name##213PDZ256r, Name##231PDZ256r, \ - Name##132PDZ256m, Name##213PDZ256m, Name##231PDZ256m); \ - FMA3RM(Name##132PSZr, Name##213PSZr, Name##231PSZr, \ - Name##132PSZm, Name##213PSZm, Name##231PSZm); \ - FMA3RM(Name##132PDZr, Name##213PDZr, Name##231PDZr, \ - Name##132PDZm, Name##213PDZm, Name##231PDZm); \ - FMA3RMA(Name##132PSZ128rk, Name##213PSZ128rk, Name##231PSZ128rk, \ - Name##132PSZ128mk, Name##213PSZ128mk, Name##231PSZ128mk, \ - X86InstrFMA3Group::X86FMA3KMergeMasked); \ - FMA3RMA(Name##132PDZ128rk, Name##213PDZ128rk, Name##231PDZ128rk, \ - Name##132PDZ128mk, Name##213PDZ128mk, Name##231PDZ128mk, \ - X86InstrFMA3Group::X86FMA3KMergeMasked); \ - FMA3RMA(Name##132PSZ256rk, Name##213PSZ256rk, Name##231PSZ256rk, \ - Name##132PSZ256mk, Name##213PSZ256mk, Name##231PSZ256mk, \ - X86InstrFMA3Group::X86FMA3KMergeMasked); \ - FMA3RMA(Name##132PDZ256rk, Name##213PDZ256rk, Name##231PDZ256rk, \ - Name##132PDZ256mk, Name##213PDZ256mk, Name##231PDZ256mk, \ - X86InstrFMA3Group::X86FMA3KMergeMasked); \ - FMA3RMA(Name##132PSZrk, Name##213PSZrk, Name##231PSZrk, \ - Name##132PSZmk, Name##213PSZmk, Name##231PSZmk, \ - X86InstrFMA3Group::X86FMA3KMergeMasked); \ - FMA3RMA(Name##132PDZrk, Name##213PDZrk, Name##231PDZrk, \ - Name##132PDZmk, Name##213PDZmk, Name##231PDZmk, \ - X86InstrFMA3Group::X86FMA3KMergeMasked); \ - FMA3RMA(Name##132PSZ128rkz, Name##213PSZ128rkz, Name##231PSZ128rkz, \ - Name##132PSZ128mkz, Name##213PSZ128mkz, Name##231PSZ128mkz, \ - X86InstrFMA3Group::X86FMA3KZeroMasked); \ - FMA3RMA(Name##132PDZ128rkz, Name##213PDZ128rkz, Name##231PDZ128rkz, \ - Name##132PDZ128mkz, Name##213PDZ128mkz, Name##231PDZ128mkz, \ - X86InstrFMA3Group::X86FMA3KZeroMasked); \ - FMA3RMA(Name##132PSZ256rkz, Name##213PSZ256rkz, Name##231PSZ256rkz, \ - Name##132PSZ256mkz, Name##213PSZ256mkz, Name##231PSZ256mkz, \ - X86InstrFMA3Group::X86FMA3KZeroMasked); \ - FMA3RMA(Name##132PDZ256rkz, Name##213PDZ256rkz, Name##231PDZ256rkz, \ - Name##132PDZ256mkz, Name##213PDZ256mkz, Name##231PDZ256mkz, \ - X86InstrFMA3Group::X86FMA3KZeroMasked); \ - FMA3RMA(Name##132PSZrkz, Name##213PSZrkz, Name##231PSZrkz, \ - Name##132PSZmkz, Name##213PSZmkz, Name##231PSZmkz, \ - X86InstrFMA3Group::X86FMA3KZeroMasked); \ - FMA3RMA(Name##132PDZrkz, Name##213PDZrkz, Name##231PDZrkz, \ - Name##132PDZmkz, Name##213PDZmkz, Name##231PDZmkz, \ - X86InstrFMA3Group::X86FMA3KZeroMasked); \ - FMA3R(Name##132PSZrb, Name##213PSZrb, Name##231PSZrb); \ - FMA3R(Name##132PDZrb, Name##213PDZrb, Name##231PDZrb); \ - FMA3RA(Name##132PSZrbk, Name##213PSZrbk, Name##231PSZrbk, \ - X86InstrFMA3Group::X86FMA3KMergeMasked); \ - FMA3RA(Name##132PDZrbk, Name##213PDZrbk, Name##231PDZrbk, \ - X86InstrFMA3Group::X86FMA3KMergeMasked); \ - FMA3RA(Name##132PSZrbkz, Name##213PSZrbkz, Name##231PSZrbkz, \ - X86InstrFMA3Group::X86FMA3KZeroMasked); \ - FMA3RA(Name##132PDZrbkz, Name##213PDZrbkz, Name##231PDZrbkz, \ - X86InstrFMA3Group::X86FMA3KZeroMasked); \ - FMA3M(Name##132PSZ128mb, Name##213PSZ128mb, Name##231PSZ128mb); \ - FMA3M(Name##132PDZ128mb, Name##213PDZ128mb, Name##231PDZ128mb); \ - FMA3M(Name##132PSZ256mb, Name##213PSZ256mb, Name##231PSZ256mb); \ - FMA3M(Name##132PDZ256mb, Name##213PDZ256mb, Name##231PDZ256mb); \ - FMA3M(Name##132PSZmb, Name##213PSZmb, Name##231PSZmb); \ - FMA3M(Name##132PDZmb, Name##213PDZmb, Name##231PDZmb); \ - FMA3MA(Name##132PSZ128mbk, Name##213PSZ128mbk, Name##231PSZ128mbk, \ - X86InstrFMA3Group::X86FMA3KMergeMasked); \ - FMA3MA(Name##132PDZ128mbk, Name##213PDZ128mbk, Name##231PDZ128mbk, \ - X86InstrFMA3Group::X86FMA3KMergeMasked); \ - FMA3MA(Name##132PSZ256mbk, Name##213PSZ256mbk, Name##231PSZ256mbk, \ - X86InstrFMA3Group::X86FMA3KMergeMasked); \ - FMA3MA(Name##132PDZ256mbk, Name##213PDZ256mbk, Name##231PDZ256mbk, \ - X86InstrFMA3Group::X86FMA3KMergeMasked); \ - FMA3MA(Name##132PSZmbk, Name##213PSZmbk, Name##231PSZmbk, \ - X86InstrFMA3Group::X86FMA3KMergeMasked); \ - FMA3MA(Name##132PDZmbk, Name##213PDZmbk, Name##231PDZmbk, \ - X86InstrFMA3Group::X86FMA3KMergeMasked); \ - FMA3MA(Name##132PSZ128mbkz, Name##213PSZ128mbkz, Name##231PSZ128mbkz, \ - X86InstrFMA3Group::X86FMA3KZeroMasked); \ - FMA3MA(Name##132PDZ128mbkz, Name##213PDZ128mbkz, Name##231PDZ128mbkz, \ - X86InstrFMA3Group::X86FMA3KZeroMasked); \ - FMA3MA(Name##132PSZ256mbkz, Name##213PSZ256mbkz, Name##231PSZ256mbkz, \ - X86InstrFMA3Group::X86FMA3KZeroMasked); \ - FMA3MA(Name##132PDZ256mbkz, Name##213PDZ256mbkz, Name##231PDZ256mbkz, \ - X86InstrFMA3Group::X86FMA3KZeroMasked); \ - FMA3MA(Name##132PSZmbkz, Name##213PSZmbkz, Name##231PSZmbkz, \ - X86InstrFMA3Group::X86FMA3KZeroMasked); \ - FMA3MA(Name##132PDZmbkz, Name##213PDZmbkz, Name##231PDZmbkz, \ - X86InstrFMA3Group::X86FMA3KZeroMasked); - -#define FMA3_AVX512_SCALAR_GROUP(Name) \ - FMA3RM(Name##132SSZr, Name##213SSZr, Name##231SSZr, \ - Name##132SSZm, Name##213SSZm, Name##231SSZm); \ - FMA3RM(Name##132SDZr, Name##213SDZr, Name##231SDZr, \ - Name##132SDZm, Name##213SDZm, Name##231SDZm); \ - FMA3RMA(Name##132SSZr_Int, Name##213SSZr_Int, Name##231SSZr_Int, \ - Name##132SSZm_Int, Name##213SSZm_Int, Name##231SSZm_Int, \ - X86InstrFMA3Group::X86FMA3Intrinsic); \ - FMA3RMA(Name##132SDZr_Int, Name##213SDZr_Int, Name##231SDZr_Int, \ - Name##132SDZm_Int, Name##213SDZm_Int, Name##231SDZm_Int, \ - X86InstrFMA3Group::X86FMA3Intrinsic); \ - FMA3RMA(Name##132SSZr_Intk, Name##213SSZr_Intk, Name##231SSZr_Intk, \ - Name##132SSZm_Intk, Name##213SSZm_Intk, Name##231SSZm_Intk, \ - X86InstrFMA3Group::X86FMA3Intrinsic | \ - X86InstrFMA3Group::X86FMA3KMergeMasked); \ - FMA3RMA(Name##132SDZr_Intk, Name##213SDZr_Intk, Name##231SDZr_Intk, \ - Name##132SDZm_Intk, Name##213SDZm_Intk, Name##231SDZm_Intk, \ - X86InstrFMA3Group::X86FMA3Intrinsic | \ - X86InstrFMA3Group::X86FMA3KMergeMasked); \ - FMA3RMA(Name##132SSZr_Intkz, Name##213SSZr_Intkz, Name##231SSZr_Intkz, \ - Name##132SSZm_Intkz, Name##213SSZm_Intkz, Name##231SSZm_Intkz, \ - X86InstrFMA3Group::X86FMA3Intrinsic | \ - X86InstrFMA3Group::X86FMA3KZeroMasked); \ - FMA3RMA(Name##132SDZr_Intkz, Name##213SDZr_Intkz, Name##231SDZr_Intkz, \ - Name##132SDZm_Intkz, Name##213SDZm_Intkz, Name##231SDZm_Intkz, \ - X86InstrFMA3Group::X86FMA3Intrinsic | \ - X86InstrFMA3Group::X86FMA3KZeroMasked); \ - FMA3RA(Name##132SSZrb_Int, Name##213SSZrb_Int, Name##231SSZrb_Int, \ - X86InstrFMA3Group::X86FMA3Intrinsic); \ - FMA3RA(Name##132SDZrb_Int, Name##213SDZrb_Int, Name##231SDZrb_Int, \ - X86InstrFMA3Group::X86FMA3Intrinsic); \ - FMA3RA(Name##132SSZrb_Intk, Name##213SSZrb_Intk, Name##231SSZrb_Intk, \ - X86InstrFMA3Group::X86FMA3Intrinsic | \ - X86InstrFMA3Group::X86FMA3KMergeMasked); \ - FMA3RA(Name##132SDZrb_Intk, Name##213SDZrb_Intk, Name##231SDZrb_Intk, \ - X86InstrFMA3Group::X86FMA3Intrinsic | \ - X86InstrFMA3Group::X86FMA3KMergeMasked); \ - FMA3RA(Name##132SSZrb_Intkz, Name##213SSZrb_Intkz, Name##231SSZrb_Intkz, \ - X86InstrFMA3Group::X86FMA3Intrinsic | \ - X86InstrFMA3Group::X86FMA3KZeroMasked); \ - FMA3RA(Name##132SDZrb_Intkz, Name##213SDZrb_Intkz, Name##231SDZrb_Intkz, \ - X86InstrFMA3Group::X86FMA3Intrinsic | \ - X86InstrFMA3Group::X86FMA3KZeroMasked); - -#define FMA3_AVX512_FULL_GROUP(Name) \ - FMA3_AVX512_VECTOR_GROUP(Name); \ - FMA3_AVX512_SCALAR_GROUP(Name); - -void X86InstrFMA3Info::initGroupsOnceImpl() { - FMA3_AVX2_FULL_GROUP(VFMADD); - FMA3_AVX2_FULL_GROUP(VFMSUB); - FMA3_AVX2_FULL_GROUP(VFNMADD); - FMA3_AVX2_FULL_GROUP(VFNMSUB); - - FMA3_AVX2_VECTOR_GROUP(VFMADDSUB); - FMA3_AVX2_VECTOR_GROUP(VFMSUBADD); - - FMA3_AVX512_FULL_GROUP(VFMADD); - FMA3_AVX512_FULL_GROUP(VFMSUB); - FMA3_AVX512_FULL_GROUP(VFNMADD); - FMA3_AVX512_FULL_GROUP(VFNMSUB); - - FMA3_AVX512_VECTOR_GROUP(VFMADDSUB); - FMA3_AVX512_VECTOR_GROUP(VFMSUBADD); -} - -void X86InstrFMA3Info::initGroupsOnce() { - llvm::call_once(InitGroupsOnceFlag, - []() { getX86InstrFMA3Info()->initGroupsOnceImpl(); }); -} Index: lib/Target/X86/X86InstrFormats.td =================================================================== --- lib/Target/X86/X86InstrFormats.td +++ lib/Target/X86/X86InstrFormats.td @@ -170,6 +170,16 @@ def AdSize32 : AddressSize<2>; // Encodes a 32-bit address. def AdSize64 : AddressSize<3>; // Encodes a 64-bit address. +// FMA3Format - This specifies what form this FMA3 instruction is. This is used +// for FMA3 commuting. +class FMA3Format val> { + bits<2> Value = val; +} +def NotFMA3 : FMA3Format<0>; +def FMA3_132 : FMA3Format<1>; +def FMA3_213 : FMA3Format<2>; +def FMA3_231 : FMA3Format<3>; + // Prefix byte classes which are used to indicate to the ad-hoc machine code // emitter that various prefix bytes are required. class OpSize16 { OperandSize OpSize = OpSize16; } @@ -212,6 +222,9 @@ class EVEX_V512 { bit hasEVEX_L2 = 1; bit hasVEX_L = 0; } class EVEX_V256 { bit hasEVEX_L2 = 0; bit hasVEX_L = 1; } class EVEX_V128 { bit hasEVEX_L2 = 0; bit hasVEX_L = 0; } +class FMA3_132 { FMA3Format FMA3Form = FMA3_132; } +class FMA3_213 { FMA3Format FMA3Form = FMA3_213; } +class FMA3_231 { FMA3Format FMA3Form = FMA3_231; } // Specify AVX512 8-bit compressed displacement encoding based on the vector // element size in bits (8, 16, 32, 64) and the CDisp8 form. @@ -284,6 +297,8 @@ int CD8_EltSize = 0; // Compressed disp8 form - element-size in bytes. bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding? bit hasEVEX_RC = 0; // Explicitly specified rounding control in FP instruction. + FMA3Format FMA3Form = NotFMA3; // What flavor of FMA3 is this? + bit FMA3Intrinsic = 0; // Is this an FMA3 scalar intrinsic opcode. bits<2> EVEX_LL; let EVEX_LL{0} = hasVEX_L; @@ -328,6 +343,8 @@ let TSFlags{52-46} = CD8_Scale; let TSFlags{53} = has3DNow0F0FOpcode; let TSFlags{54} = hasEVEX_RC; + let TSFlags{56-55} = FMA3Form.Value; + let TSFlags{57} = FMA3Intrinsic; } class PseudoI pattern> Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -15,7 +15,6 @@ #define LLVM_LIB_TARGET_X86_X86INSTRINFO_H #include "MCTargetDesc/X86BaseInfo.h" -#include "X86InstrFMA3Info.h" #include "X86RegisterInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Target/TargetInstrInfo.h" @@ -139,6 +138,14 @@ MI.getOperand(Op + X86::AddrSegmentReg).isReg() && isLeaMem(MI, Op); } +struct X86FMA3Group { + uint16_t Opcodes[3]; + + bool operator<(const X86FMA3Group &RHS) const { + return Opcodes[0] < RHS.Opcodes[0]; + } +}; + class X86InstrInfo final : public X86GenInstrInfo { X86Subtarget &Subtarget; const X86RegisterInfo RI; @@ -276,7 +283,7 @@ /// operand index is not set and this method is free to pick any of /// available commutable operands. /// The parameter \p FMA3Group keeps the reference to the group of relative - /// FMA3 opcodes including register/memory forms of 132/213/231 opcodes. + /// FMA3 opcodes. /// /// For example, calling this method this way: /// unsigned Idx1 = 1, Idx2 = CommuteAnyOperandIndex; @@ -292,7 +299,7 @@ bool findFMA3CommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2, - const X86InstrFMA3Group &FMA3Group) const; + const X86FMA3Group &FMA3Group) const; /// Returns an adjusted FMA opcode that must be used in FMA instruction that /// performs the same computations as the given \p MI but which has the @@ -312,7 +319,7 @@ unsigned getFMA3OpcodeToCommuteOperands(const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2, - const X86InstrFMA3Group &FMA3Group) const; + const X86FMA3Group &FMA3Group) const; // Branch analysis. bool isUnpredicatedTerminator(const MachineInstr &MI) const override; Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -14,6 +14,7 @@ #include "X86InstrInfo.h" #include "X86.h" #include "X86InstrBuilder.h" +#include "X86InstrFMA3Info.h" #include "X86MachineFunctionInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" @@ -121,6 +122,8 @@ (STI.is64Bit() ? X86::RETQ : X86::RETL)), Subtarget(STI), RI(STI.getTargetTriple()) { + verifyFMA3Tables(); + static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = { { X86::ADC32ri, X86::ADC32mi, 0 }, { X86::ADC32ri8, X86::ADC32mi8, 0 }, @@ -2019,13 +2022,16 @@ // Index 3, folded load Entry.Flags | TB_INDEX_3 | TB_FOLDED_LOAD); } - auto I = X86InstrFMA3Info::rm_begin(); - auto E = X86InstrFMA3Info::rm_end(); - for (; I != E; ++I) - if (!I.getGroup()->isKMasked()) + + // Add FMA3 instructions. + for (size_t i = 0; i != array_lengthof(FMA3RegOpcodes); ++i) { + for (size_t j = 0; j != 3; ++j) { AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable, - I.getRegOpcode(), I.getMemOpcode(), + FMA3RegOpcodes[i].Opcodes[j], FMA3MemOpcodes[i].Opcodes[j], + // Index 3, folded load TB_ALIGN_NONE | TB_INDEX_3 | TB_FOLDED_LOAD); + } + } static const X86MemoryFoldTableEntry MemoryFoldTable4[] = { // AVX-512 foldable instructions @@ -2135,11 +2141,17 @@ // Index 4, folded load Entry.Flags | TB_INDEX_4 | TB_FOLDED_LOAD); } - for (I = X86InstrFMA3Info::rm_begin(); I != E; ++I) - if (I.getGroup()->isKMasked()) + + // Add FMA3 instructions. + for (size_t i = 0; i != array_lengthof(FMA3RegMaskedOpcodes); ++i) { + for (size_t j = 0; j != 3; ++j) { AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable, - I.getRegOpcode(), I.getMemOpcode(), + FMA3RegMaskedOpcodes[i].Opcodes[j], + FMA3MemMaskedOpcodes[i].Opcodes[j], + // Index 4, folded load TB_ALIGN_NONE | TB_INDEX_4 | TB_FOLDED_LOAD); + } + } } void @@ -3204,9 +3216,9 @@ unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands( const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2, - const X86InstrFMA3Group &FMA3Group) const { + const X86FMA3Group &FMA3Group) const { - unsigned Opc = MI.getOpcode(); + uint64_t TSFlags = MI.getDesc().TSFlags; // Put the lowest index to SrcOpIdx1 to simplify the checks below. if (SrcOpIdx1 > SrcOpIdx2) @@ -3218,11 +3230,11 @@ // not implemented yet. So, just return 0 in that case. // When such analysis are available this place will be the right place for // calling it. - if (FMA3Group.isIntrinsic() && SrcOpIdx1 == 1) + if (X86II::isFMA3Intrinsic(TSFlags) && SrcOpIdx1 == 1) return 0; unsigned FMAOp1 = 1, FMAOp2 = 2, FMAOp3 = 3; - if (FMA3Group.isKMasked()) { + if (X86II::isKMasked(TSFlags)) { // The k-mask operand cannot be commuted. if (SrcOpIdx1 == 2) return 0; @@ -3240,7 +3252,7 @@ // : v1[i]; // VMOVAPSZmrk , k, v4; // this is the ONLY user of v4 -> // // Ok, to commute v1 in FMADD213PSZrk. - if (FMA3Group.isKMergeMasked() && SrcOpIdx1 == FMAOp1) + if (X86II::isKMergeMasked(TSFlags) && SrcOpIdx1 == FMAOp1) return 0; FMAOp2++; FMAOp3++; @@ -3259,45 +3271,27 @@ // Define the FMA forms mapping array that helps to map input FMA form // to output FMA form to preserve the operation semantics after // commuting the operands. - const unsigned Form132Index = 0; - const unsigned Form213Index = 1; - const unsigned Form231Index = 2; static const unsigned FormMapping[][3] = { // 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2; // FMA132 A, C, b; ==> FMA231 C, A, b; // FMA213 B, A, c; ==> FMA213 A, B, c; // FMA231 C, A, b; ==> FMA132 A, C, b; - { Form231Index, Form213Index, Form132Index }, + { X86::FMA3Form231, X86::FMA3Form213, X86::FMA3Form132 }, // 1: SrcOpIdx1 == 1 && SrcOpIdx2 == 3; // FMA132 A, c, B; ==> FMA132 B, c, A; // FMA213 B, a, C; ==> FMA231 C, a, B; // FMA231 C, a, B; ==> FMA213 B, a, C; - { Form132Index, Form231Index, Form213Index }, + { X86::FMA3Form132, X86::FMA3Form231, X86::FMA3Form213 }, // 2: SrcOpIdx1 == 2 && SrcOpIdx2 == 3; // FMA132 a, C, B; ==> FMA213 a, B, C; // FMA213 b, A, C; ==> FMA132 b, C, A; // FMA231 c, A, B; ==> FMA231 c, B, A; - { Form213Index, Form132Index, Form231Index } + { X86::FMA3Form213, X86::FMA3Form132, X86::FMA3Form231 } }; - unsigned FMAForms[3]; - if (FMA3Group.isRegOpcodeFromGroup(Opc)) { - FMAForms[0] = FMA3Group.getReg132Opcode(); - FMAForms[1] = FMA3Group.getReg213Opcode(); - FMAForms[2] = FMA3Group.getReg231Opcode(); - } else { - FMAForms[0] = FMA3Group.getMem132Opcode(); - FMAForms[1] = FMA3Group.getMem213Opcode(); - FMAForms[2] = FMA3Group.getMem231Opcode(); - } - unsigned FormIndex; - for (FormIndex = 0; FormIndex < 3; FormIndex++) - if (Opc == FMAForms[FormIndex]) - break; - // Everything is ready, just adjust the FMA opcode and return it. - FormIndex = FormMapping[Case][FormIndex]; - return FMAForms[FormIndex]; + unsigned FormIndex = FormMapping[Case][X86II::getFMA3Form(TSFlags)]; + return FMA3Group.Opcodes[FormIndex]; } MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, @@ -3522,9 +3516,8 @@ OpIdx1, OpIdx2); } default: - const X86InstrFMA3Group *FMA3Group = - X86InstrFMA3Info::getFMA3Group(MI.getOpcode()); - if (FMA3Group) { + if (const X86FMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(), + MI.getDesc().TSFlags)) { unsigned Opc = getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2, *FMA3Group); if (Opc == 0) @@ -3539,19 +3532,22 @@ } } -bool X86InstrInfo::findFMA3CommutedOpIndices( - const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2, - const X86InstrFMA3Group &FMA3Group) const { +bool +X86InstrInfo::findFMA3CommutedOpIndices(const MachineInstr &MI, + unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2, + const X86FMA3Group &FMA3Group) const { + uint64_t TSFlags = MI.getDesc().TSFlags; unsigned FirstCommutableVecOp = 1; unsigned LastCommutableVecOp = 3; unsigned KMaskOp = 0; - if (FMA3Group.isKMasked()) { + if (TSFlags & X86II::EVEX_K) { // The k-mask operand has index = 2 for masked and zero-masked operations. KMaskOp = 2; // The operand with index = 1 is used as a source for those elements for // which the corresponding bit in the k-mask is set to 0. - if (FMA3Group.isKMergeMasked()) + if (!(TSFlags & X86II::EVEX_Z)) FirstCommutableVecOp = 3; LastCommutableVecOp++; @@ -3648,9 +3644,8 @@ return false; } default: - const X86InstrFMA3Group *FMA3Group = - X86InstrFMA3Info::getFMA3Group(MI.getOpcode()); - if (FMA3Group) + if (const X86FMA3Group *FMA3Group = getFMA3Group(MI.getOpcode(), + MI.getDesc().TSFlags)) return findFMA3CommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2, *FMA3Group); return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); }