Index: bindings/go/llvm/DIBuilderBindings.cpp =================================================================== --- bindings/go/llvm/DIBuilderBindings.cpp +++ bindings/go/llvm/DIBuilderBindings.cpp @@ -19,8 +19,6 @@ using namespace llvm; -DEFINE_SIMPLE_CONVERSION_FUNCTIONS(DIBuilder, LLVMDIBuilderRef) - LLVMDIBuilderRef LLVMNewDIBuilder(LLVMModuleRef mref) { Module *m = unwrap(mref); return wrap(new DIBuilder(*m)); Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -12290,6 +12290,7 @@ assumed. This argument must be one of the following strings: :: + "round.dynamic" "round.tonearest" "round.downward" @@ -12321,6 +12322,7 @@ strings: :: + "fpexcept.ignore" "fpexcept.maytrap" "fpexcept.strict" Index: include/llvm-c/DebugInfo.h =================================================================== --- /dev/null +++ include/llvm-c/DebugInfo.h @@ -0,0 +1,121 @@ +//===------------ DebugInfo.h - LLVM C API Debug Info API -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the C API endpoints for generating DWARF Debug Info +// +// Note: This interface is experimental. It is *NOT* stable, and may be +// changed without warning. +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Core.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/// Debug info flags. +typedef enum { +#define HANDLE_DI_FLAG(ID, NAME) LLVMDIFlag##NAME = ID, + #include "llvm/IR/DebugInfoFlags.def" + LLVMDIFlagAccessibility = LLVMDIFlagPrivate + | LLVMDIFlagProtected + | LLVMDIFlagPublic +} LLVMDIFlags; + +/// Source languages known by DWARF. +typedef enum { +#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) LLVMDWARFSourceLanguage##NAME = ID, + #include "llvm/Support/Dwarf.def" +} LLVMDWARFSourceLanguage; + +/// The amount of debug information to emit. +typedef enum { + LLVMDWARFEmissionNone = 0, + LLVMDWARFEmissionFull, + LLVMDWARFEmissionLineTablesOnly +} LLVMDWARFEmissionKind; + +/// The current debug metadata version number. +uint32_t LLVMDebugMetadataVersion(); + +/// The version of debug metadata that's present in the provided \c Module. +unsigned LLVMGetModuleDebugMetadataVersion(LLVMModuleRef Module); + +/// \brief Strip debug info in the module if it exists. +/// +/// To do this, we remove all calls to the debugger intrinsics and any named +/// metadata for debugging. We also remove debug locations for instructions. +/// Return true if module is modified. +uint8_t LLVMStripModuleDebugInfo(LLVMModuleRef Module); + +/// Construct a builder for a module. +/// +/// If \c AllowUnresolved, collect unresolved nodes attached to the module +/// in order to resolve cycles during a call to \c LLVMDIBuilderFinalize. +LLVMDIBuilderRef LLVMDIBuilderCreate(LLVMModuleRef M, uint8_t AllowUnresolved); + +/// Deallocates the DIBuilder and everything it owns. +void LLVMDIBuilderDispose(LLVMDIBuilderRef Builder); + +/// Construct any deferred debug info descriptors. +void LLVMDIBuilderFinalize(LLVMDIBuilderRef Builder); + +/// A CompileUnit provides an anchor for all debugging +/// information generated during this instance of compilation. +/// \param Lang Source programming language, eg. +/// \c LLVMDWARFSourceLanguageC99 +/// \param File File info. +/// \param Producer Identify the producer of debugging information +/// and code. Usually this is a compiler +/// version string. +/// \param isOptimized A boolean flag which indicates whether optimization +/// is enabled or not. +/// \param Flags This string lists command line options. This +/// string is directly embedded in debug info +/// output which may be used by a tool +/// analyzing generated debugging information. +/// \param RuntimeVer This indicates runtime version for languages like +/// Objective-C. +/// \param SplitName The name of the file that we'll split debug info +/// out into. +/// \param Kind The kind of debug information to generate. +/// \param DWOId The DWOId if this is a split skeleton compile unit. +/// \param SplitDebugInlining Whether to emit inline debug info. +/// \param DebugInfoForProfiling Whether to emit extra debug info for +/// profile collection. +LLVMMetadataRef LLVMDIBuilderCreateCompileUnit( + LLVMDIBuilderRef Builder, LLVMDWARFSourceLanguage Lang, + LLVMMetadataRef FileRef, const char *Producer, uint8_t isOptimized, + const char *Flags, unsigned RuntimeVer, const char *SplitName, + LLVMDWARFEmissionKind Kind, uint64_t DWOId, uint8_t SplitDebugInlining, + uint8_t DebugInfoForProfiling); + +/// Create a file descriptor to hold debugging information for a file. +/// \param Builder The DIBuilder. +/// \param Filename File name. +/// \param Directory Directory. +LLVMMetadataRef +LLVMDIBuilderCreateFile(LLVMDIBuilderRef Builder, const char *Filename, + const char *Directory); + +/// Creates a new DebugLocation that describes a source location. +/// \param Line The line in the source file. +/// \param Column The column in the source file. +/// \param Scope The scope in which the location resides. +/// \param InlinedAt The scope where this location was inlined, if at all. +/// (optional). +LLVMMetadataRef +LLVMDIBuilderCreateDebugLocation(LLVMContextRef Ctx, unsigned Line, + unsigned Column, LLVMMetadataRef Scope, + LLVMMetadataRef InlinedAt); + +#ifdef __cplusplus +} // end extern "C" +#endif Index: include/llvm-c/Types.h =================================================================== --- include/llvm-c/Types.h +++ include/llvm-c/Types.h @@ -97,6 +97,13 @@ typedef struct LLVMOpaqueBuilder *LLVMBuilderRef; /** + * Represents an LLVM debug info builder. + * + * This models llvm::DIBuilder. + */ +typedef struct LLVMOpaqueDIBuilder *LLVMDIBuilderRef; + +/** * Interface used to provide a module to JIT or interpreter. * This is now just a synonym for llvm::Module, but we have to keep using the * different type to keep binary compatibility. Index: include/llvm/ADT/APFloat.h =================================================================== --- include/llvm/ADT/APFloat.h +++ include/llvm/ADT/APFloat.h @@ -397,6 +397,12 @@ /// consider inserting before falling back to scientific /// notation. 0 means to always use scientific notation. /// + /// \param TruncateZero Indicate whether to remove the trailing zero in + /// fraction part or not. Also setting this parameter to false forcing + /// producing of output more similar to default printf behavior. + /// Specifically the lower e is used as exponent delimiter and exponent + /// always contains no less than two digits. + /// /// Number Precision MaxPadding Result /// ------ --------- ---------- ------ /// 1.01E+4 5 2 10100 @@ -406,7 +412,7 @@ /// 1.01E-2 4 2 0.0101 /// 1.01E-2 4 1 1.01E-2 void toString(SmallVectorImpl &Str, unsigned FormatPrecision = 0, - unsigned FormatMaxPadding = 3) const; + unsigned FormatMaxPadding = 3, bool TruncateZero = true) const; /// If this value has an exact multiplicative inverse, store it in inv and /// return true. @@ -649,7 +655,7 @@ bool isInteger() const; void toString(SmallVectorImpl &Str, unsigned FormatPrecision, - unsigned FormatMaxPadding) const; + unsigned FormatMaxPadding, bool TruncateZero = true) const; bool getExactInverse(APFloat *inv) const; @@ -1144,9 +1150,9 @@ APFloat &operator=(APFloat &&RHS) = default; void toString(SmallVectorImpl &Str, unsigned FormatPrecision = 0, - unsigned FormatMaxPadding = 3) const { + unsigned FormatMaxPadding = 3, bool TruncateZero = true) const { APFLOAT_DISPATCH_ON_SEMANTICS( - toString(Str, FormatPrecision, FormatMaxPadding)); + toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero)); } void print(raw_ostream &) const; Index: include/llvm/ADT/APInt.h =================================================================== --- include/llvm/ADT/APInt.h +++ include/llvm/ADT/APInt.h @@ -212,6 +212,9 @@ /// out-of-line slow case for intersects. bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY; + /// out-of-line slow case for isSubsetOf. + bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY; + /// out-of-line slow case for setBits. void setBitsSlowCase(unsigned loBit, unsigned hiBit); @@ -227,6 +230,14 @@ /// out-of-line slow case for operator^=. void XorAssignSlowCase(const APInt& RHS); + /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal + /// to, or greater than RHS. + int compare(const APInt &RHS) const LLVM_READONLY; + + /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal + /// to, or greater than RHS. + int compareSigned(const APInt &RHS) const LLVM_READONLY; + public: /// \name Constructors /// @{ @@ -413,10 +424,10 @@ return countPopulationSlowCase() == 1; } - /// \brief Check if the APInt's value is returned by getSignBit. + /// \brief Check if the APInt's value is returned by getSignMask. /// - /// \returns true if this is the value returned by getSignBit. - bool isSignBit() const { return isMinSignedValue(); } + /// \returns true if this is the value returned by getSignMask. + bool isSignMask() const { return isMinSignedValue(); } /// \brief Convert APInt to a boolean value. /// @@ -494,11 +505,11 @@ return API; } - /// \brief Get the SignBit for a specific bit width. + /// \brief Get the SignMask for a specific bit width. /// /// This is just a wrapper function of getSignedMinValue(), and it helps code - /// readability when we want to get a SignBit. - static APInt getSignBit(unsigned BitWidth) { + /// readability when we want to get a SignMask. + static APInt getSignMask(unsigned BitWidth) { return getSignedMinValue(BitWidth); } @@ -1076,7 +1087,7 @@ /// the validity of the less-than relationship. /// /// \returns true if *this < RHS when both are considered unsigned. - bool ult(const APInt &RHS) const LLVM_READONLY; + bool ult(const APInt &RHS) const { return compare(RHS) < 0; } /// \brief Unsigned less than comparison /// @@ -1095,7 +1106,7 @@ /// validity of the less-than relationship. /// /// \returns true if *this < RHS when both are considered signed. - bool slt(const APInt &RHS) const LLVM_READONLY; + bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; } /// \brief Signed less than comparison /// @@ -1114,7 +1125,7 @@ /// validity of the less-or-equal relationship. /// /// \returns true if *this <= RHS when both are considered unsigned. - bool ule(const APInt &RHS) const { return ult(RHS) || eq(RHS); } + bool ule(const APInt &RHS) const { return compare(RHS) <= 0; } /// \brief Unsigned less or equal comparison /// @@ -1130,7 +1141,7 @@ /// validity of the less-or-equal relationship. /// /// \returns true if *this <= RHS when both are considered signed. - bool sle(const APInt &RHS) const { return slt(RHS) || eq(RHS); } + bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; } /// \brief Signed less or equal comparison /// @@ -1146,7 +1157,7 @@ /// the validity of the greater-than relationship. /// /// \returns true if *this > RHS when both are considered unsigned. - bool ugt(const APInt &RHS) const { return !ult(RHS) && !eq(RHS); } + bool ugt(const APInt &RHS) const { return !ule(RHS); } /// \brief Unsigned greater than comparison /// @@ -1165,7 +1176,7 @@ /// validity of the greater-than relationship. /// /// \returns true if *this > RHS when both are considered signed. - bool sgt(const APInt &RHS) const { return !slt(RHS) && !eq(RHS); } + bool sgt(const APInt &RHS) const { return !sle(RHS); } /// \brief Signed greater than comparison /// @@ -1219,6 +1230,14 @@ return intersectsSlowCase(RHS); } + /// This operation checks that all bits set in this APInt are also set in RHS. + bool isSubsetOf(const APInt &RHS) const { + assert(BitWidth == RHS.BitWidth && "Bit widths must be the same"); + if (isSingleWord()) + return (VAL & ~RHS.VAL) == 0; + return isSubsetOfSlowCase(RHS); + } + /// @} /// \name Resizing Operators /// @{ Index: include/llvm/ADT/BitVector.h =================================================================== --- include/llvm/ADT/BitVector.h +++ include/llvm/ADT/BitVector.h @@ -14,6 +14,7 @@ #ifndef LLVM_ADT_BITVECTOR_H #define LLVM_ADT_BITVECTOR_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/Support/MathExtras.h" #include #include @@ -161,6 +162,22 @@ return -1; } + /// find_last - Returns the index of the last set bit, -1 if none of the bits + /// are set. + int find_last() const { + if (Size == 0) + return -1; + + unsigned N = NumBitWords(size()); + assert(N > 0); + + unsigned i = N - 1; + while (i > 0 && Bits[i] == BitWord(0)) + --i; + + return int((i + 1) * BITWORD_SIZE - countLeadingZeros(Bits[i])) - 1; + } + /// find_first_unset - Returns the index of the first unset bit, -1 if all /// of the bits are set. int find_first_unset() const { @@ -172,6 +189,30 @@ return -1; } + /// find_last_unset - Returns the index of the last unset bit, -1 if all of + /// the bits are set. + int find_last_unset() const { + if (Size == 0) + return -1; + + const unsigned N = NumBitWords(size()); + assert(N > 0); + + unsigned i = N - 1; + BitWord W = Bits[i]; + + // The last word in the BitVector has some unused bits, so we need to set + // them all to 1 first. Set them all to 1 so they don't get treated as + // valid unset bits. + unsigned UnusedCount = BITWORD_SIZE - size() % BITWORD_SIZE; + W |= maskLeadingOnes(UnusedCount); + + while (W == ~BitWord(0) && --i > 0) + W = Bits[i]; + + return int((i + 1) * BITWORD_SIZE - countLeadingOnes(W)) - 1; + } + /// find_next - Returns the index of the next set bit following the /// "Prev" bit. Returns -1 if the next set bit is not found. int find_next(unsigned Prev) const { @@ -455,6 +496,105 @@ return *this; } + BitVector &operator>>=(unsigned N) { + assert(N <= Size); + if (LLVM_UNLIKELY(empty() || N == 0)) + return *this; + + unsigned NumWords = NumBitWords(Size); + assert(NumWords >= 1); + + wordShr(N / BITWORD_SIZE); + + unsigned BitDistance = N % BITWORD_SIZE; + if (BitDistance == 0) + return *this; + + // When the shift size is not a multiple of the word size, then we have + // a tricky situation where each word in succession needs to extract some + // of the bits from the next word and or them into this word while + // shifting this word to make room for the new bits. This has to be done + // for every word in the array. + + // Since we're shifting each word right, some bits will fall off the end + // of each word to the right, and empty space will be created on the left. + // The final word in the array will lose bits permanently, so starting at + // the beginning, work forwards shifting each word to the right, and + // OR'ing in the bits from the end of the next word to the beginning of + // the current word. + + // Example: + // Starting with {0xAABBCCDD, 0xEEFF0011, 0x22334455} and shifting right + // by 4 bits. + // Step 1: Word[0] >>= 4 ; 0x0ABBCCDD + // Step 2: Word[0] |= 0x10000000 ; 0x1ABBCCDD + // Step 3: Word[1] >>= 4 ; 0x0EEFF001 + // Step 4: Word[1] |= 0x50000000 ; 0x5EEFF001 + // Step 5: Word[2] >>= 4 ; 0x02334455 + // Result: { 0x1ABBCCDD, 0x5EEFF001, 0x02334455 } + const BitWord Mask = maskTrailingOnes(BitDistance); + const unsigned LSH = BITWORD_SIZE - BitDistance; + + for (unsigned I = 0; I < NumWords - 1; ++I) { + Bits[I] >>= BitDistance; + Bits[I] |= (Bits[I + 1] & Mask) << LSH; + } + + Bits[NumWords - 1] >>= BitDistance; + + return *this; + } + + BitVector &operator<<=(unsigned N) { + assert(N <= Size); + if (LLVM_UNLIKELY(empty() || N == 0)) + return *this; + + unsigned NumWords = NumBitWords(Size); + assert(NumWords >= 1); + + wordShl(N / BITWORD_SIZE); + + unsigned BitDistance = N % BITWORD_SIZE; + if (BitDistance == 0) + return *this; + + // When the shift size is not a multiple of the word size, then we have + // a tricky situation where each word in succession needs to extract some + // of the bits from the previous word and or them into this word while + // shifting this word to make room for the new bits. This has to be done + // for every word in the array. This is similar to the algorithm outlined + // in operator>>=, but backwards. + + // Since we're shifting each word left, some bits will fall off the end + // of each word to the left, and empty space will be created on the right. + // The first word in the array will lose bits permanently, so starting at + // the end, work backwards shifting each word to the left, and OR'ing + // in the bits from the end of the next word to the beginning of the + // current word. + + // Example: + // Starting with {0xAABBCCDD, 0xEEFF0011, 0x22334455} and shifting left + // by 4 bits. + // Step 1: Word[2] <<= 4 ; 0x23344550 + // Step 2: Word[2] |= 0x0000000E ; 0x2334455E + // Step 3: Word[1] <<= 4 ; 0xEFF00110 + // Step 4: Word[1] |= 0x0000000A ; 0xEFF0011A + // Step 5: Word[0] <<= 4 ; 0xABBCCDD0 + // Result: { 0xABBCCDD0, 0xEFF0011A, 0x2334455E } + const BitWord Mask = maskLeadingOnes(BitDistance); + const unsigned RSH = BITWORD_SIZE - BitDistance; + + for (int I = NumWords - 1; I > 0; --I) { + Bits[I] <<= BitDistance; + Bits[I] |= (Bits[I - 1] & Mask) >> RSH; + } + Bits[0] <<= BitDistance; + clear_unused_bits(); + + return *this; + } + // Assignment operator. const BitVector &operator=(const BitVector &RHS) { if (this == &RHS) return *this; @@ -538,6 +678,54 @@ } private: + /// \brief Perform a logical left shift of \p Count words by moving everything + /// \p Count words to the right in memory. + /// + /// While confusing, words are stored from least significant at Bits[0] to + /// most significant at Bits[NumWords-1]. A logical shift left, however, + /// moves the current least significant bit to a higher logical index, and + /// fills the previous least significant bits with 0. Thus, we actually + /// need to move the bytes of the memory to the right, not to the left. + /// Example: + /// Words = [0xBBBBAAAA, 0xDDDDFFFF, 0x00000000, 0xDDDD0000] + /// represents a BitVector where 0xBBBBAAAA contain the least significant + /// bits. So if we want to shift the BitVector left by 2 words, we need to + /// turn this into 0x00000000 0x00000000 0xBBBBAAAA 0xDDDDFFFF by using a + /// memmove which moves right, not left. + void wordShl(uint32_t Count) { + if (Count == 0) + return; + + uint32_t NumWords = NumBitWords(Size); + + auto Src = ArrayRef(Bits, NumWords).drop_back(Count); + auto Dest = MutableArrayRef(Bits, NumWords).drop_front(Count); + + // Since we always move Word-sized chunks of data with src and dest both + // aligned to a word-boundary, we don't need to worry about endianness + // here. + std::memmove(Dest.begin(), Src.begin(), Dest.size() * sizeof(BitWord)); + std::memset(Bits, 0, Count * sizeof(BitWord)); + clear_unused_bits(); + } + + /// \brief Perform a logical right shift of \p Count words by moving those + /// words to the left in memory. See wordShl for more information. + /// + void wordShr(uint32_t Count) { + if (Count == 0) + return; + + uint32_t NumWords = NumBitWords(Size); + + auto Src = ArrayRef(Bits, NumWords).drop_front(Count); + auto Dest = MutableArrayRef(Bits, NumWords).drop_back(Count); + assert(Dest.size() == Src.size()); + + std::memmove(Dest.begin(), Src.begin(), Dest.size() * sizeof(BitWord)); + std::memset(Dest.end(), 0, Count * sizeof(BitWord)); + } + int next_unset_in_word(int WordIndex, BitWord Word) const { unsigned Result = WordIndex * BITWORD_SIZE + countTrailingOnes(Word); return Result < size() ? Result : -1; Index: include/llvm/ADT/SmallBitVector.h =================================================================== --- include/llvm/ADT/SmallBitVector.h +++ include/llvm/ADT/SmallBitVector.h @@ -117,9 +117,7 @@ } // Return the size. - size_t getSmallSize() const { - return getSmallRawBits() >> SmallNumDataBits; - } + size_t getSmallSize() const { return getSmallRawBits() >> SmallNumDataBits; } void setSmallSize(size_t Size) { setSmallRawBits(getSmallBits() | (Size << SmallNumDataBits)); @@ -216,6 +214,16 @@ return getPointer()->find_first(); } + int find_last() const { + if (isSmall()) { + uintptr_t Bits = getSmallBits(); + if (Bits == 0) + return -1; + return NumBaseBits - countLeadingZeros(Bits); + } + return getPointer()->find_last(); + } + /// Returns the index of the first unset bit, -1 if all of the bits are set. int find_first_unset() const { if (isSmall()) { @@ -228,6 +236,17 @@ return getPointer()->find_first_unset(); } + int find_last_unset() const { + if (isSmall()) { + if (count() == getSmallSize()) + return -1; + + uintptr_t Bits = getSmallBits(); + return NumBaseBits - countLeadingOnes(Bits); + } + return getPointer()->find_last_unset(); + } + /// Returns the index of the next set bit following the "Prev" bit. /// Returns -1 if the next set bit is not found. int find_next(unsigned Prev) const { @@ -508,6 +527,22 @@ return *this; } + SmallBitVector &operator<<=(unsigned N) { + if (isSmall()) + setSmallBits(getSmallBits() << N); + else + getPointer()->operator<<=(N); + return *this; + } + + SmallBitVector &operator>>=(unsigned N) { + if (isSmall()) + setSmallBits(getSmallBits() >> N); + else + getPointer()->operator>>=(N); + return *this; + } + // Assignment operator. const SmallBitVector &operator=(const SmallBitVector &RHS) { if (isSmall()) { Index: include/llvm/CodeGen/GlobalISel/InstructionSelector.h =================================================================== --- include/llvm/CodeGen/GlobalISel/InstructionSelector.h +++ include/llvm/CodeGen/GlobalISel/InstructionSelector.h @@ -18,20 +18,50 @@ #include "llvm/ADT/Optional.h" #include +#include namespace llvm { class MachineInstr; +class MachineFunction; class MachineOperand; class MachineRegisterInfo; class RegisterBankInfo; class TargetInstrInfo; class TargetRegisterInfo; +/// Container class for CodeGen predicate results. +/// This is convenient because std::bitset does not have a constructor +/// with an initializer list of set bits. +/// +/// Each InstructionSelector subclass should define a PredicateBitset class with: +/// const unsigned MAX_SUBTARGET_PREDICATES = 192; +/// using PredicateBitset = PredicateBitsetImpl; +/// and updating the constant to suit the target. Tablegen provides a suitable +/// definition for the predicates in use in GenGlobalISel.inc when +/// GET_GLOBALISEL_PREDICATE_BITSET is defined. +template +class PredicateBitsetImpl : public std::bitset { +public: + // Cannot inherit constructors because it's not supported by VC++.. + PredicateBitsetImpl() = default; + + PredicateBitsetImpl(const std::bitset &B) + : std::bitset(B) {} + + PredicateBitsetImpl(std::initializer_list Init) { + for (auto I : Init) + std::bitset::set(I); + } +}; + /// Provides the logic to select generic machine instructions. class InstructionSelector { public: virtual ~InstructionSelector() {} + /// This is executed before selecting a function. + virtual void beginFunction(const MachineFunction &MF) {} + /// Select the (possibly generic) instruction \p I to only use target-specific /// opcodes. It is OK to insert multiple instructions, but they cannot be /// generic pre-isel instructions. Index: include/llvm/CodeGen/MachineValueType.h =================================================================== --- include/llvm/CodeGen/MachineValueType.h +++ include/llvm/CodeGen/MachineValueType.h @@ -28,155 +28,246 @@ /// type can be represented by an MVT. class MVT { public: - enum SimpleValueType : int8_t { - // Simple value types less than zero are considered extended value types. - INVALID_SIMPLE_VALUE_TYPE = -1, + enum SimpleValueType : uint8_t { + // Simple value types that aren't explicitly part of this enumeration + // are considered extended value types. + INVALID_SIMPLE_VALUE_TYPE = 0, // If you change this numbering, you must change the values in // ValueTypes.td as well! - Other = 0, // This is a non-standard value - i1 = 1, // This is a 1 bit integer value - i8 = 2, // This is an 8 bit integer value - i16 = 3, // This is a 16 bit integer value - i32 = 4, // This is a 32 bit integer value - i64 = 5, // This is a 64 bit integer value - i128 = 6, // This is a 128 bit integer value + Other = 1, // This is a non-standard value + i1 = 2, // This is a 1 bit integer value + i8 = 3, // This is an 8 bit integer value + i16 = 4, // This is a 16 bit integer value + i32 = 5, // This is a 32 bit integer value + i64 = 6, // This is a 64 bit integer value + i128 = 7, // This is a 128 bit integer value FIRST_INTEGER_VALUETYPE = i1, LAST_INTEGER_VALUETYPE = i128, - f16 = 7, // This is a 16 bit floating point value - f32 = 8, // This is a 32 bit floating point value - f64 = 9, // This is a 64 bit floating point value - f80 = 10, // This is a 80 bit floating point value - f128 = 11, // This is a 128 bit floating point value - ppcf128 = 12, // This is a PPC 128-bit floating point value + f16 = 8, // This is a 16 bit floating point value + f32 = 9, // This is a 32 bit floating point value + f64 = 10, // This is a 64 bit floating point value + f80 = 11, // This is a 80 bit floating point value + f128 = 12, // This is a 128 bit floating point value + ppcf128 = 13, // This is a PPC 128-bit floating point value FIRST_FP_VALUETYPE = f16, LAST_FP_VALUETYPE = ppcf128, - v2i1 = 13, // 2 x i1 - v4i1 = 14, // 4 x i1 - v8i1 = 15, // 8 x i1 - v16i1 = 16, // 16 x i1 - v32i1 = 17, // 32 x i1 - v64i1 = 18, // 64 x i1 - v512i1 = 19, // 512 x i1 - v1024i1 = 20, // 1024 x i1 - - v1i8 = 21, // 1 x i8 - v2i8 = 22, // 2 x i8 - v4i8 = 23, // 4 x i8 - v8i8 = 24, // 8 x i8 - v16i8 = 25, // 16 x i8 - v32i8 = 26, // 32 x i8 - v64i8 = 27, // 64 x i8 - v128i8 = 28, //128 x i8 - v256i8 = 29, //256 x i8 - - v1i16 = 30, // 1 x i16 - v2i16 = 31, // 2 x i16 - v4i16 = 32, // 4 x i16 - v8i16 = 33, // 8 x i16 - v16i16 = 34, // 16 x i16 - v32i16 = 35, // 32 x i16 - v64i16 = 36, // 64 x i16 - v128i16 = 37, //128 x i16 - - v1i32 = 38, // 1 x i32 - v2i32 = 39, // 2 x i32 - v4i32 = 40, // 4 x i32 - v8i32 = 41, // 8 x i32 - v16i32 = 42, // 16 x i32 - v32i32 = 43, // 32 x i32 - v64i32 = 44, // 64 x i32 - - v1i64 = 45, // 1 x i64 - v2i64 = 46, // 2 x i64 - v4i64 = 47, // 4 x i64 - v8i64 = 48, // 8 x i64 - v16i64 = 49, // 16 x i64 - v32i64 = 50, // 32 x i64 - - v1i128 = 51, // 1 x i128 + v2i1 = 14, // 2 x i1 + v4i1 = 15, // 4 x i1 + v8i1 = 16, // 8 x i1 + v16i1 = 17, // 16 x i1 + v32i1 = 18, // 32 x i1 + v64i1 = 19, // 64 x i1 + v512i1 = 20, // 512 x i1 + v1024i1 = 21, // 1024 x i1 + + v1i8 = 22, // 1 x i8 + v2i8 = 23, // 2 x i8 + v4i8 = 24, // 4 x i8 + v8i8 = 25, // 8 x i8 + v16i8 = 26, // 16 x i8 + v32i8 = 27, // 32 x i8 + v64i8 = 28, // 64 x i8 + v128i8 = 29, //128 x i8 + v256i8 = 30, //256 x i8 + + v1i16 = 31, // 1 x i16 + v2i16 = 32, // 2 x i16 + v4i16 = 33, // 4 x i16 + v8i16 = 34, // 8 x i16 + v16i16 = 35, // 16 x i16 + v32i16 = 36, // 32 x i16 + v64i16 = 37, // 64 x i16 + v128i16 = 38, //128 x i16 + + v1i32 = 39, // 1 x i32 + v2i32 = 40, // 2 x i32 + v4i32 = 41, // 4 x i32 + v8i32 = 42, // 8 x i32 + v16i32 = 43, // 16 x i32 + v32i32 = 44, // 32 x i32 + v64i32 = 45, // 64 x i32 + + v1i64 = 46, // 1 x i64 + v2i64 = 47, // 2 x i64 + v4i64 = 48, // 4 x i64 + v8i64 = 49, // 8 x i64 + v16i64 = 50, // 16 x i64 + v32i64 = 51, // 32 x i64 + + v1i128 = 52, // 1 x i128 + + // Scalable integer types + nxv2i1 = 53, // n x 2 x i1 + nxv4i1 = 54, // n x 4 x i1 + nxv8i1 = 55, // n x 8 x i1 + nxv16i1 = 56, // n x 16 x i1 + nxv32i1 = 57, // n x 32 x i1 + + nxv1i8 = 58, // n x 1 x i8 + nxv2i8 = 59, // n x 2 x i8 + nxv4i8 = 60, // n x 4 x i8 + nxv8i8 = 61, // n x 8 x i8 + nxv16i8 = 62, // n x 16 x i8 + nxv32i8 = 63, // n x 32 x i8 + + nxv1i16 = 64, // n x 1 x i16 + nxv2i16 = 65, // n x 2 x i16 + nxv4i16 = 66, // n x 4 x i16 + nxv8i16 = 67, // n x 8 x i16 + nxv16i16 = 68, // n x 16 x i16 + nxv32i16 = 69, // n x 32 x i16 + + nxv1i32 = 70, // n x 1 x i32 + nxv2i32 = 71, // n x 2 x i32 + nxv4i32 = 72, // n x 4 x i32 + nxv8i32 = 73, // n x 8 x i32 + nxv16i32 = 74, // n x 16 x i32 + nxv32i32 = 75, // n x 32 x i32 + + nxv1i64 = 76, // n x 1 x i64 + nxv2i64 = 77, // n x 2 x i64 + nxv4i64 = 78, // n x 4 x i64 + nxv8i64 = 79, // n x 8 x i64 + nxv16i64 = 80, // n x 16 x i64 + nxv32i64 = 81, // n x 32 x i64 FIRST_INTEGER_VECTOR_VALUETYPE = v2i1, - LAST_INTEGER_VECTOR_VALUETYPE = v1i128, - - v2f16 = 52, // 2 x f16 - v4f16 = 53, // 4 x f16 - v8f16 = 54, // 8 x f16 - v1f32 = 55, // 1 x f32 - v2f32 = 56, // 2 x f32 - v4f32 = 57, // 4 x f32 - v8f32 = 58, // 8 x f32 - v16f32 = 59, // 16 x f32 - v1f64 = 60, // 1 x f64 - v2f64 = 61, // 2 x f64 - v4f64 = 62, // 4 x f64 - v8f64 = 63, // 8 x f64 + LAST_INTEGER_VECTOR_VALUETYPE = nxv32i64, + + FIRST_INTEGER_SCALABLE_VALUETYPE = nxv2i1, + LAST_INTEGER_SCALABLE_VALUETYPE = nxv32i64, + + v2f16 = 82, // 2 x f16 + v4f16 = 83, // 4 x f16 + v8f16 = 84, // 8 x f16 + v1f32 = 85, // 1 x f32 + v2f32 = 86, // 2 x f32 + v4f32 = 87, // 4 x f32 + v8f32 = 88, // 8 x f32 + v16f32 = 89, // 16 x f32 + v1f64 = 90, // 1 x f64 + v2f64 = 91, // 2 x f64 + v4f64 = 92, // 4 x f64 + v8f64 = 93, // 8 x f64 + + nxv2f16 = 94, // n x 2 x f16 + nxv4f16 = 95, // n x 4 x f16 + nxv8f16 = 96, // n x 8 x f16 + nxv1f32 = 97, // n x 1 x f32 + nxv2f32 = 98, // n x 2 x f32 + nxv4f32 = 99, // n x 4 x f32 + nxv8f32 = 100, // n x 8 x f32 + nxv16f32 = 101, // n x 16 x f32 + nxv1f64 = 102, // n x 1 x f64 + nxv2f64 = 103, // n x 2 x f64 + nxv4f64 = 104, // n x 4 x f64 + nxv8f64 = 105, // n x 8 x f64 FIRST_FP_VECTOR_VALUETYPE = v2f16, - LAST_FP_VECTOR_VALUETYPE = v8f64, + LAST_FP_VECTOR_VALUETYPE = nxv8f64, + + FIRST_FP_SCALABLE_VALUETYPE = nxv2f16, + LAST_FP_SCALABLE_VALUETYPE = nxv8f64, FIRST_VECTOR_VALUETYPE = v2i1, - LAST_VECTOR_VALUETYPE = v8f64, + LAST_VECTOR_VALUETYPE = nxv8f64, - x86mmx = 64, // This is an X86 MMX value + x86mmx = 106, // This is an X86 MMX value - Glue = 65, // This glues nodes together during pre-RA sched + Glue = 107, // This glues nodes together during pre-RA sched - isVoid = 66, // This has no value + isVoid = 108, // This has no value - Untyped = 67, // This value takes a register, but has - // unspecified type. The register class - // will be determined by the opcode. + Untyped = 109, // This value takes a register, but has + // unspecified type. The register class + // will be determined by the opcode. - FIRST_VALUETYPE = 0, // This is always the beginning of the list. - LAST_VALUETYPE = 68, // This always remains at the end of the list. + FIRST_VALUETYPE = 1, // This is always the beginning of the list. + LAST_VALUETYPE = 110, // This always remains at the end of the list. // This is the current maximum for LAST_VALUETYPE. // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors // This value must be a multiple of 32. - MAX_ALLOWED_VALUETYPE = 96, + MAX_ALLOWED_VALUETYPE = 128, // A value of type llvm::TokenTy - token = 120, + token = 248, // This is MDNode or MDString. - Metadata = 121, + Metadata = 249, // An int value the size of the pointer of the current // target to any address space. This must only be used internal to // tblgen. Other than for overloading, we treat iPTRAny the same as iPTR. - iPTRAny = 122, + iPTRAny = 250, // A vector with any length and element size. This is used // for intrinsics that have overloadings based on vector types. // This is only for tblgen's consumption! - vAny = 123, + vAny = 251, // Any floating-point or vector floating-point value. This is used // for intrinsics that have overloadings based on floating-point types. // This is only for tblgen's consumption! - fAny = 124, + fAny = 252, // An integer or vector integer value of any bit width. This is // used for intrinsics that have overloadings based on integer bit widths. // This is only for tblgen's consumption! - iAny = 125, + iAny = 253, // An int value the size of the pointer of the current // target. This should only be used internal to tblgen! - iPTR = 126, + iPTR = 254, // Any type. This is used for intrinsics that have overloadings. // This is only for tblgen's consumption! - Any = 127 + Any = 255 }; SimpleValueType SimpleTy; + + // A class to represent the number of elements in a vector + // + // For fixed-length vectors, the total number of elements is equal to 'Min' + // For scalable vectors, the total number of elements is a multiple of 'Min' + class ElementCount { + public: + unsigned Min; + bool Scalable; + + ElementCount(unsigned Min, bool Scalable) + : Min(Min), Scalable(Scalable) {} + + ElementCount operator*(unsigned RHS) { + return { Min * RHS, Scalable }; + } + + ElementCount& operator*=(unsigned RHS) { + Min *= RHS; + return *this; + } + + ElementCount operator/(unsigned RHS) { + return { Min / RHS, Scalable }; + } + + ElementCount& operator/=(unsigned RHS) { + Min /= RHS; + return *this; + } + + bool operator==(const ElementCount& RHS) { + return Min == RHS.Min && Scalable == RHS.Scalable; + } + }; + constexpr MVT() : SimpleTy(INVALID_SIMPLE_VALUE_TYPE) {} constexpr MVT(SimpleValueType SVT) : SimpleTy(SVT) {} @@ -221,6 +312,15 @@ SimpleTy <= MVT::LAST_VECTOR_VALUETYPE); } + /// Return true if this is a vector value type where the + /// runtime length is machine dependent + bool isScalableVector() const { + return ((SimpleTy >= MVT::FIRST_INTEGER_SCALABLE_VALUETYPE && + SimpleTy <= MVT::LAST_INTEGER_SCALABLE_VALUETYPE) || + (SimpleTy >= MVT::FIRST_FP_SCALABLE_VALUETYPE && + SimpleTy <= MVT::LAST_FP_SCALABLE_VALUETYPE)); + } + /// Return true if this is a 16-bit vector type. bool is16BitVector() const { return (SimpleTy == MVT::v2i8 || SimpleTy == MVT::v1i16 || @@ -318,7 +418,12 @@ case v32i1: case v64i1: case v512i1: - case v1024i1: return i1; + case v1024i1: + case nxv2i1: + case nxv4i1: + case nxv8i1: + case nxv16i1: + case nxv32i1: return i1; case v1i8: case v2i8: case v4i8: @@ -327,7 +432,13 @@ case v32i8: case v64i8: case v128i8: - case v256i8: return i8; + case v256i8: + case nxv1i8: + case nxv2i8: + case nxv4i8: + case nxv8i8: + case nxv16i8: + case nxv32i8: return i8; case v1i16: case v2i16: case v4i16: @@ -335,33 +446,63 @@ case v16i16: case v32i16: case v64i16: - case v128i16: return i16; + case v128i16: + case nxv1i16: + case nxv2i16: + case nxv4i16: + case nxv8i16: + case nxv16i16: + case nxv32i16: return i16; case v1i32: case v2i32: case v4i32: case v8i32: case v16i32: case v32i32: - case v64i32: return i32; + case v64i32: + case nxv1i32: + case nxv2i32: + case nxv4i32: + case nxv8i32: + case nxv16i32: + case nxv32i32: return i32; case v1i64: case v2i64: case v4i64: case v8i64: case v16i64: - case v32i64: return i64; + case v32i64: + case nxv1i64: + case nxv2i64: + case nxv4i64: + case nxv8i64: + case nxv16i64: + case nxv32i64: return i64; case v1i128: return i128; case v2f16: case v4f16: - case v8f16: return f16; + case v8f16: + case nxv2f16: + case nxv4f16: + case nxv8f16: return f16; case v1f32: case v2f32: case v4f32: case v8f32: - case v16f32: return f32; + case v16f32: + case nxv1f32: + case nxv2f32: + case nxv4f32: + case nxv8f32: + case nxv16f32: return f32; case v1f64: case v2f64: case v4f64: - case v8f64: return f64; + case v8f64: + case nxv1f64: + case nxv2f64: + case nxv4f64: + case nxv8f64: return f64; } } @@ -382,13 +523,24 @@ case v32i8: case v32i16: case v32i32: - case v32i64: return 32; + case v32i64: + case nxv32i1: + case nxv32i8: + case nxv32i16: + case nxv32i32: + case nxv32i64: return 32; case v16i1: case v16i8: case v16i16: case v16i32: case v16i64: - case v16f32: return 16; + case v16f32: + case nxv16i1: + case nxv16i8: + case nxv16i16: + case nxv16i32: + case nxv16i64: + case nxv16f32: return 16; case v8i1: case v8i8: case v8i16: @@ -396,7 +548,15 @@ case v8i64: case v8f16: case v8f32: - case v8f64: return 8; + case v8f64: + case nxv8i1: + case nxv8i8: + case nxv8i16: + case nxv8i32: + case nxv8i64: + case nxv8f16: + case nxv8f32: + case nxv8f64: return 8; case v4i1: case v4i8: case v4i16: @@ -404,7 +564,15 @@ case v4i64: case v4f16: case v4f32: - case v4f64: return 4; + case v4f64: + case nxv4i1: + case nxv4i8: + case nxv4i16: + case nxv4i32: + case nxv4i64: + case nxv4f16: + case nxv4f32: + case nxv4f64: return 4; case v2i1: case v2i8: case v2i16: @@ -412,17 +580,35 @@ case v2i64: case v2f16: case v2f32: - case v2f64: return 2; + case v2f64: + case nxv2i1: + case nxv2i8: + case nxv2i16: + case nxv2i32: + case nxv2i64: + case nxv2f16: + case nxv2f32: + case nxv2f64: return 2; case v1i8: case v1i16: case v1i32: case v1i64: case v1i128: case v1f32: - case v1f64: return 1; + case v1f64: + case nxv1i8: + case nxv1i16: + case nxv1i32: + case nxv1i64: + case nxv1f32: + case nxv1f64: return 1; } } + MVT::ElementCount getVectorElementCount() const { + return { getVectorNumElements(), isScalableVector() }; + } + unsigned getSizeInBits() const { switch (SimpleTy) { default: @@ -443,16 +629,23 @@ case Metadata: llvm_unreachable("Value type is metadata."); case i1 : return 1; - case v2i1: return 2; - case v4i1: return 4; + case v2i1: + case nxv2i1: return 2; + case v4i1: + case nxv4i1: return 4; case i8 : case v1i8: - case v8i1: return 8; + case v8i1: + case nxv1i8: + case nxv8i1: return 8; case i16 : case f16: case v16i1: case v2i8: - case v1i16: return 16; + case v1i16: + case nxv16i1: + case nxv2i8: + case nxv1i16: return 16; case f32 : case i32 : case v32i1: @@ -460,7 +653,13 @@ case v2i16: case v2f16: case v1f32: - case v1i32: return 32; + case v1i32: + case nxv32i1: + case nxv4i8: + case nxv2i16: + case nxv1i32: + case nxv2f16: + case nxv1f32: return 32; case x86mmx: case f64 : case i64 : @@ -471,7 +670,14 @@ case v1i64: case v4f16: case v2f32: - case v1f64: return 64; + case v1f64: + case nxv8i8: + case nxv4i16: + case nxv2i32: + case nxv1i64: + case nxv4f16: + case nxv2f32: + case nxv1f64: return 64; case f80 : return 80; case f128: case ppcf128: @@ -483,29 +689,50 @@ case v1i128: case v8f16: case v4f32: - case v2f64: return 128; + case v2f64: + case nxv16i8: + case nxv8i16: + case nxv4i32: + case nxv2i64: + case nxv8f16: + case nxv4f32: + case nxv2f64: return 128; case v32i8: case v16i16: case v8i32: case v4i64: case v8f32: - case v4f64: return 256; + case v4f64: + case nxv32i8: + case nxv16i16: + case nxv8i32: + case nxv4i64: + case nxv8f32: + case nxv4f64: return 256; case v512i1: case v64i8: case v32i16: case v16i32: case v8i64: case v16f32: - case v8f64: return 512; + case v8f64: + case nxv32i16: + case nxv16i32: + case nxv8i64: + case nxv16f32: + case nxv8f64: return 512; case v1024i1: case v128i8: case v64i16: case v32i32: - case v16i64: return 1024; + case v16i64: + case nxv32i32: + case nxv16i64: return 1024; case v256i8: case v128i16: case v64i32: - case v32i64: return 2048; + case v32i64: + case nxv32i64: return 2048; } } @@ -659,6 +886,83 @@ return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE); } + static MVT getScalableVectorVT(MVT VT, unsigned NumElements) { + switch(VT.SimpleTy) { + default: + break; + case MVT::i1: + if (NumElements == 2) return MVT::nxv2i1; + if (NumElements == 4) return MVT::nxv4i1; + if (NumElements == 8) return MVT::nxv8i1; + if (NumElements == 16) return MVT::nxv16i1; + if (NumElements == 32) return MVT::nxv32i1; + break; + case MVT::i8: + if (NumElements == 1) return MVT::nxv1i8; + if (NumElements == 2) return MVT::nxv2i8; + if (NumElements == 4) return MVT::nxv4i8; + if (NumElements == 8) return MVT::nxv8i8; + if (NumElements == 16) return MVT::nxv16i8; + if (NumElements == 32) return MVT::nxv32i8; + break; + case MVT::i16: + if (NumElements == 1) return MVT::nxv1i16; + if (NumElements == 2) return MVT::nxv2i16; + if (NumElements == 4) return MVT::nxv4i16; + if (NumElements == 8) return MVT::nxv8i16; + if (NumElements == 16) return MVT::nxv16i16; + if (NumElements == 32) return MVT::nxv32i16; + break; + case MVT::i32: + if (NumElements == 1) return MVT::nxv1i32; + if (NumElements == 2) return MVT::nxv2i32; + if (NumElements == 4) return MVT::nxv4i32; + if (NumElements == 8) return MVT::nxv8i32; + if (NumElements == 16) return MVT::nxv16i32; + if (NumElements == 32) return MVT::nxv32i32; + break; + case MVT::i64: + if (NumElements == 1) return MVT::nxv1i64; + if (NumElements == 2) return MVT::nxv2i64; + if (NumElements == 4) return MVT::nxv4i64; + if (NumElements == 8) return MVT::nxv8i64; + if (NumElements == 16) return MVT::nxv16i64; + if (NumElements == 32) return MVT::nxv32i64; + break; + case MVT::f16: + if (NumElements == 2) return MVT::nxv2f16; + if (NumElements == 4) return MVT::nxv4f16; + if (NumElements == 8) return MVT::nxv8f16; + break; + case MVT::f32: + if (NumElements == 1) return MVT::nxv1f32; + if (NumElements == 2) return MVT::nxv2f32; + if (NumElements == 4) return MVT::nxv4f32; + if (NumElements == 8) return MVT::nxv8f32; + if (NumElements == 16) return MVT::nxv16f32; + break; + case MVT::f64: + if (NumElements == 1) return MVT::nxv1f64; + if (NumElements == 2) return MVT::nxv2f64; + if (NumElements == 4) return MVT::nxv4f64; + if (NumElements == 8) return MVT::nxv8f64; + break; + } + return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE); + } + + static MVT getVectorVT(MVT VT, unsigned NumElements, bool IsScalable) { + if (IsScalable) + return getScalableVectorVT(VT, NumElements); + return getVectorVT(VT, NumElements); + } + + static MVT getVectorVT(MVT VT, MVT::ElementCount EC) { + if (EC.Scalable) + return getScalableVectorVT(VT, EC.Min); + return getVectorVT(VT, EC.Min); + } + /// Return the value type corresponding to the specified type. This returns /// all pointers as iPTR. If HandleUnknown is true, unknown types are /// returned as Other, otherwise they are invalid. @@ -709,6 +1013,14 @@ MVT::FIRST_FP_VECTOR_VALUETYPE, (MVT::SimpleValueType)(MVT::LAST_FP_VECTOR_VALUETYPE + 1)); } + static mvt_range integer_scalable_vector_valuetypes() { + return mvt_range(MVT::FIRST_INTEGER_SCALABLE_VALUETYPE, + (MVT::SimpleValueType)(MVT::LAST_INTEGER_SCALABLE_VALUETYPE + 1)); + } + static mvt_range fp_scalable_vector_valuetypes() { + return mvt_range(MVT::FIRST_FP_SCALABLE_VALUETYPE, + (MVT::SimpleValueType)(MVT::LAST_FP_SCALABLE_VALUETYPE + 1)); + } /// @} }; Index: include/llvm/CodeGen/ValueTypes.h =================================================================== --- include/llvm/CodeGen/ValueTypes.h +++ include/llvm/CodeGen/ValueTypes.h @@ -44,7 +44,7 @@ bool operator!=(EVT VT) const { if (V.SimpleTy != VT.V.SimpleTy) return true; - if (V.SimpleTy < 0) + if (V.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE) return LLVMTy != VT.LLVMTy; return false; } @@ -60,31 +60,48 @@ /// bits. static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth) { MVT M = MVT::getIntegerVT(BitWidth); - if (M.SimpleTy >= 0) + if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE) return M; return getExtendedIntegerVT(Context, BitWidth); } /// Returns the EVT that represents a vector NumElements in length, where /// each element is of type VT. - static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements) { - MVT M = MVT::getVectorVT(VT.V, NumElements); - if (M.SimpleTy >= 0) + static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, + bool IsScalable = false) { + MVT M = MVT::getVectorVT(VT.V, NumElements, IsScalable); + if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE) return M; + + assert(!IsScalable && "We don't support extended scalable types yet"); return getExtendedVectorVT(Context, VT, NumElements); } + /// Returns the EVT that represents a vector EC.Min elements in length, + /// where each element is of type VT. + static EVT getVectorVT(LLVMContext &Context, EVT VT, MVT::ElementCount EC) { + MVT M = MVT::getVectorVT(VT.V, EC); + if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE) + return M; + assert (!EC.Scalable && "We don't support extended scalable types yet"); + return getExtendedVectorVT(Context, VT, EC.Min); + } + /// Return a vector with the same number of elements as this vector, but /// with the element type converted to an integer type with the same /// bitwidth. EVT changeVectorElementTypeToInteger() const { - if (!isSimple()) + if (!isSimple()) { + assert (!isScalableVector() && + "We don't support extended scalable types yet"); return changeExtendedVectorElementTypeToInteger(); + } MVT EltTy = getSimpleVT().getVectorElementType(); unsigned BitWidth = EltTy.getSizeInBits(); MVT IntTy = MVT::getIntegerVT(BitWidth); - MVT VecTy = MVT::getVectorVT(IntTy, getVectorNumElements()); - assert(VecTy.SimpleTy >= 0 && + MVT VecTy = MVT::getVectorVT(IntTy, getVectorNumElements(), + isScalableVector()); + assert(VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE && "Simple vector VT not representable by simple integer vector VT!"); return VecTy; } @@ -104,7 +121,7 @@ /// Test if the given EVT is simple (as opposed to being extended). bool isSimple() const { - return V.SimpleTy >= 0; + return V.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE; } /// Test if the given EVT is extended (as opposed to being simple). @@ -132,6 +149,17 @@ return isSimple() ? V.isVector() : isExtendedVector(); } + /// Return true if this is a vector type where the runtime + /// length is machine dependent + bool isScalableVector() const { + // FIXME: We don't support extended scalable types yet, because the + // matching IR type doesn't exist. Once it has been added, this can + // be changed to call isExtendedScalableVector. + if (!isSimple()) + return false; + return V.isScalableVector(); + } + /// Return true if this is a 16-bit vector type. bool is16BitVector() const { return isSimple() ? V.is16BitVector() : isExtended16BitVector(); @@ -247,6 +275,17 @@ return getExtendedVectorNumElements(); } + // Given a (possibly scalable) vector type, return the ElementCount + MVT::ElementCount getVectorElementCount() const { + assert((isVector()) && "Invalid vector type!"); + if (isSimple()) + return V.getVectorElementCount(); + + assert(!isScalableVector() && + "We don't support extended scalable types yet"); + return {getExtendedVectorNumElements(), false}; + } + /// Return the size of the specified value type in bits. unsigned getSizeInBits() const { if (isSimple()) @@ -301,7 +340,7 @@ EVT widenIntegerVectorElementType(LLVMContext &Context) const { EVT EltVT = getVectorElementType(); EltVT = EVT::getIntegerVT(Context, 2 * EltVT.getSizeInBits()); - return EVT::getVectorVT(Context, EltVT, getVectorNumElements()); + return EVT::getVectorVT(Context, EltVT, getVectorElementCount()); } // Return a VT for a vector type with the same element type but @@ -309,9 +348,8 @@ // extended type. EVT getHalfNumVectorElementsVT(LLVMContext &Context) const { EVT EltVT = getVectorElementType(); - auto EltCnt = getVectorNumElements(); - assert(!(getVectorNumElements() & 1) && - "Splitting vector, but not in half!"); + auto EltCnt = getVectorElementCount(); + assert(!(EltCnt.Min & 1) && "Splitting vector, but not in half!"); return EVT::getVectorVT(Context, EltVT, EltCnt / 2); } @@ -327,7 +365,8 @@ if (!isPow2VectorType()) { unsigned NElts = getVectorNumElements(); unsigned Pow2NElts = 1 << Log2_32_Ceil(NElts); - return EVT::getVectorVT(Context, getVectorElementType(), Pow2NElts); + return EVT::getVectorVT(Context, getVectorElementType(), Pow2NElts, + isScalableVector()); } else { return *this; Index: include/llvm/CodeGen/ValueTypes.td =================================================================== --- include/llvm/CodeGen/ValueTypes.td +++ include/llvm/CodeGen/ValueTypes.td @@ -19,101 +19,147 @@ int Value = value; } -def OtherVT: ValueType<0 , 0>; // "Other" value -def i1 : ValueType<1 , 1>; // One bit boolean value -def i8 : ValueType<8 , 2>; // 8-bit integer value -def i16 : ValueType<16 , 3>; // 16-bit integer value -def i32 : ValueType<32 , 4>; // 32-bit integer value -def i64 : ValueType<64 , 5>; // 64-bit integer value -def i128 : ValueType<128, 6>; // 128-bit integer value -def f16 : ValueType<16 , 7>; // 16-bit floating point value -def f32 : ValueType<32 , 8>; // 32-bit floating point value -def f64 : ValueType<64 , 9>; // 64-bit floating point value -def f80 : ValueType<80 , 10>; // 80-bit floating point value -def f128 : ValueType<128, 11>; // 128-bit floating point value -def ppcf128: ValueType<128, 12>; // PPC 128-bit floating point value - -def v2i1 : ValueType<2 , 13>; // 2 x i1 vector value -def v4i1 : ValueType<4 , 14>; // 4 x i1 vector value -def v8i1 : ValueType<8 , 15>; // 8 x i1 vector value -def v16i1 : ValueType<16, 16>; // 16 x i1 vector value -def v32i1 : ValueType<32 , 17>; // 32 x i1 vector value -def v64i1 : ValueType<64 , 18>; // 64 x i1 vector value -def v512i1 : ValueType<512, 19>; // 512 x i1 vector value -def v1024i1: ValueType<1024,20>; //1024 x i1 vector value - -def v1i8 : ValueType<16, 21>; // 1 x i8 vector value -def v2i8 : ValueType<16 , 22>; // 2 x i8 vector value -def v4i8 : ValueType<32 , 23>; // 4 x i8 vector value -def v8i8 : ValueType<64 , 24>; // 8 x i8 vector value -def v16i8 : ValueType<128, 25>; // 16 x i8 vector value -def v32i8 : ValueType<256, 26>; // 32 x i8 vector value -def v64i8 : ValueType<512, 27>; // 64 x i8 vector value -def v128i8 : ValueType<1024,28>; //128 x i8 vector value -def v256i8 : ValueType<2048,29>; //256 x i8 vector value - -def v1i16 : ValueType<16 , 30>; // 1 x i16 vector value -def v2i16 : ValueType<32 , 31>; // 2 x i16 vector value -def v4i16 : ValueType<64 , 32>; // 4 x i16 vector value -def v8i16 : ValueType<128, 33>; // 8 x i16 vector value -def v16i16 : ValueType<256, 34>; // 16 x i16 vector value -def v32i16 : ValueType<512, 35>; // 32 x i16 vector value -def v64i16 : ValueType<1024,36>; // 64 x i16 vector value -def v128i16: ValueType<2048,37>; //128 x i16 vector value - -def v1i32 : ValueType<32 , 38>; // 1 x i32 vector value -def v2i32 : ValueType<64 , 39>; // 2 x i32 vector value -def v4i32 : ValueType<128, 40>; // 4 x i32 vector value -def v8i32 : ValueType<256, 41>; // 8 x i32 vector value -def v16i32 : ValueType<512, 42>; // 16 x i32 vector value -def v32i32 : ValueType<1024,43>; // 32 x i32 vector value -def v64i32 : ValueType<2048,44>; // 32 x i32 vector value - -def v1i64 : ValueType<64 , 45>; // 1 x i64 vector value -def v2i64 : ValueType<128, 46>; // 2 x i64 vector value -def v4i64 : ValueType<256, 47>; // 4 x i64 vector value -def v8i64 : ValueType<512, 48>; // 8 x i64 vector value -def v16i64 : ValueType<1024,49>; // 16 x i64 vector value -def v32i64 : ValueType<2048,50>; // 32 x i64 vector value - -def v1i128 : ValueType<128, 51>; // 1 x i128 vector value - -def v2f16 : ValueType<32 , 52>; // 2 x f16 vector value -def v4f16 : ValueType<64 , 53>; // 4 x f16 vector value -def v8f16 : ValueType<128, 54>; // 8 x f16 vector value -def v1f32 : ValueType<32 , 55>; // 1 x f32 vector value -def v2f32 : ValueType<64 , 56>; // 2 x f32 vector value -def v4f32 : ValueType<128, 57>; // 4 x f32 vector value -def v8f32 : ValueType<256, 58>; // 8 x f32 vector value -def v16f32 : ValueType<512, 59>; // 16 x f32 vector value -def v1f64 : ValueType<64, 60>; // 1 x f64 vector value -def v2f64 : ValueType<128, 61>; // 2 x f64 vector value -def v4f64 : ValueType<256, 62>; // 4 x f64 vector value -def v8f64 : ValueType<512, 63>; // 8 x f64 vector value - - -def x86mmx : ValueType<64 , 64>; // X86 MMX value -def FlagVT : ValueType<0 , 65>; // Pre-RA sched glue -def isVoid : ValueType<0 , 66>; // Produces no value -def untyped: ValueType<8 , 67>; // Produces an untyped value -def token : ValueType<0 , 120>; // TokenTy -def MetadataVT: ValueType<0, 121>; // Metadata +def OtherVT: ValueType<0 , 1>; // "Other" value +def i1 : ValueType<1 , 2>; // One bit boolean value +def i8 : ValueType<8 , 3>; // 8-bit integer value +def i16 : ValueType<16 , 4>; // 16-bit integer value +def i32 : ValueType<32 , 5>; // 32-bit integer value +def i64 : ValueType<64 , 6>; // 64-bit integer value +def i128 : ValueType<128, 7>; // 128-bit integer value +def f16 : ValueType<16 , 8>; // 16-bit floating point value +def f32 : ValueType<32 , 9>; // 32-bit floating point value +def f64 : ValueType<64 , 10>; // 64-bit floating point value +def f80 : ValueType<80 , 11>; // 80-bit floating point value +def f128 : ValueType<128, 12>; // 128-bit floating point value +def ppcf128: ValueType<128, 13>; // PPC 128-bit floating point value + +def v2i1 : ValueType<2 , 14>; // 2 x i1 vector value +def v4i1 : ValueType<4 , 15>; // 4 x i1 vector value +def v8i1 : ValueType<8 , 16>; // 8 x i1 vector value +def v16i1 : ValueType<16, 17>; // 16 x i1 vector value +def v32i1 : ValueType<32 , 18>; // 32 x i1 vector value +def v64i1 : ValueType<64 , 19>; // 64 x i1 vector value +def v512i1 : ValueType<512, 20>; // 512 x i1 vector value +def v1024i1: ValueType<1024,21>; //1024 x i1 vector value + +def v1i8 : ValueType<16, 22>; // 1 x i8 vector value +def v2i8 : ValueType<16 , 23>; // 2 x i8 vector value +def v4i8 : ValueType<32 , 24>; // 4 x i8 vector value +def v8i8 : ValueType<64 , 25>; // 8 x i8 vector value +def v16i8 : ValueType<128, 26>; // 16 x i8 vector value +def v32i8 : ValueType<256, 27>; // 32 x i8 vector value +def v64i8 : ValueType<512, 28>; // 64 x i8 vector value +def v128i8 : ValueType<1024,29>; //128 x i8 vector value +def v256i8 : ValueType<2048,30>; //256 x i8 vector value + +def v1i16 : ValueType<16 , 31>; // 1 x i16 vector value +def v2i16 : ValueType<32 , 32>; // 2 x i16 vector value +def v4i16 : ValueType<64 , 33>; // 4 x i16 vector value +def v8i16 : ValueType<128, 34>; // 8 x i16 vector value +def v16i16 : ValueType<256, 35>; // 16 x i16 vector value +def v32i16 : ValueType<512, 36>; // 32 x i16 vector value +def v64i16 : ValueType<1024,37>; // 64 x i16 vector value +def v128i16: ValueType<2048,38>; //128 x i16 vector value + +def v1i32 : ValueType<32 , 39>; // 1 x i32 vector value +def v2i32 : ValueType<64 , 40>; // 2 x i32 vector value +def v4i32 : ValueType<128, 41>; // 4 x i32 vector value +def v8i32 : ValueType<256, 42>; // 8 x i32 vector value +def v16i32 : ValueType<512, 43>; // 16 x i32 vector value +def v32i32 : ValueType<1024,44>; // 32 x i32 vector value +def v64i32 : ValueType<2048,45>; // 32 x i32 vector value + +def v1i64 : ValueType<64 , 46>; // 1 x i64 vector value +def v2i64 : ValueType<128, 47>; // 2 x i64 vector value +def v4i64 : ValueType<256, 48>; // 4 x i64 vector value +def v8i64 : ValueType<512, 49>; // 8 x i64 vector value +def v16i64 : ValueType<1024,50>; // 16 x i64 vector value +def v32i64 : ValueType<2048,51>; // 32 x i64 vector value + +def v1i128 : ValueType<128, 52>; // 1 x i128 vector value + +def nxv2i1 : ValueType<2, 53>; // n x 2 x i1 vector value +def nxv4i1 : ValueType<4, 54>; // n x 4 x i1 vector value +def nxv8i1 : ValueType<8, 55>; // n x 8 x i1 vector value +def nxv16i1 : ValueType<16, 56>; // n x 16 x i1 vector value +def nxv32i1 : ValueType<32, 57>; // n x 32 x i1 vector value + +def nxv1i8 : ValueType<8, 58>; // n x 1 x i8 vector value +def nxv2i8 : ValueType<16, 59>; // n x 2 x i8 vector value +def nxv4i8 : ValueType<32, 60>; // n x 4 x i8 vector value +def nxv8i8 : ValueType<64, 61>; // n x 8 x i8 vector value +def nxv16i8 : ValueType<128, 62>; // n x 16 x i8 vector value +def nxv32i8 : ValueType<256, 63>; // n x 32 x i8 vector value + +def nxv1i16 : ValueType<16, 64>; // n x 1 x i16 vector value +def nxv2i16 : ValueType<32, 65>; // n x 2 x i16 vector value +def nxv4i16 : ValueType<64, 66>; // n x 4 x i16 vector value +def nxv8i16 : ValueType<128, 67>; // n x 8 x i16 vector value +def nxv16i16: ValueType<256, 68>; // n x 16 x i16 vector value +def nxv32i16: ValueType<512, 69>; // n x 32 x i16 vector value + +def nxv1i32 : ValueType<32, 70>; // n x 1 x i32 vector value +def nxv2i32 : ValueType<64, 71>; // n x 2 x i32 vector value +def nxv4i32 : ValueType<128, 72>; // n x 4 x i32 vector value +def nxv8i32 : ValueType<256, 73>; // n x 8 x i32 vector value +def nxv16i32: ValueType<512, 74>; // n x 16 x i32 vector value +def nxv32i32: ValueType<1024,75>; // n x 32 x i32 vector value + +def nxv1i64 : ValueType<64, 76>; // n x 1 x i64 vector value +def nxv2i64 : ValueType<128, 77>; // n x 2 x i64 vector value +def nxv4i64 : ValueType<256, 78>; // n x 4 x i64 vector value +def nxv8i64 : ValueType<512, 79>; // n x 8 x i64 vector value +def nxv16i64: ValueType<1024,80>; // n x 16 x i64 vector value +def nxv32i64: ValueType<2048,81>; // n x 32 x i64 vector value + +def v2f16 : ValueType<32 , 82>; // 2 x f16 vector value +def v4f16 : ValueType<64 , 83>; // 4 x f16 vector value +def v8f16 : ValueType<128, 84>; // 8 x f16 vector value +def v1f32 : ValueType<32 , 85>; // 1 x f32 vector value +def v2f32 : ValueType<64 , 86>; // 2 x f32 vector value +def v4f32 : ValueType<128, 87>; // 4 x f32 vector value +def v8f32 : ValueType<256, 88>; // 8 x f32 vector value +def v16f32 : ValueType<512, 89>; // 16 x f32 vector value +def v1f64 : ValueType<64, 90>; // 1 x f64 vector value +def v2f64 : ValueType<128, 91>; // 2 x f64 vector value +def v4f64 : ValueType<256, 92>; // 4 x f64 vector value +def v8f64 : ValueType<512, 93>; // 8 x f64 vector value + +def nxv2f16 : ValueType<32 , 94>; // n x 2 x f16 vector value +def nxv4f16 : ValueType<64 , 95>; // n x 4 x f16 vector value +def nxv8f16 : ValueType<128, 96>; // n x 8 x f16 vector value +def nxv1f32 : ValueType<32 , 97>; // n x 1 x f32 vector value +def nxv2f32 : ValueType<64 , 98>; // n x 2 x f32 vector value +def nxv4f32 : ValueType<128, 99>; // n x 4 x f32 vector value +def nxv8f32 : ValueType<256, 100>; // n x 8 x f32 vector value +def nxv16f32 : ValueType<512, 101>; // n x 16 x f32 vector value +def nxv1f64 : ValueType<64, 102>; // n x 1 x f64 vector value +def nxv2f64 : ValueType<128, 103>; // n x 2 x f64 vector value +def nxv4f64 : ValueType<256, 104>; // n x 4 x f64 vector value +def nxv8f64 : ValueType<512, 105>; // n x 8 x f64 vector value + +def x86mmx : ValueType<64 , 106>; // X86 MMX value +def FlagVT : ValueType<0 , 107>; // Pre-RA sched glue +def isVoid : ValueType<0 , 108>; // Produces no value +def untyped: ValueType<8 , 109>; // Produces an untyped value +def token : ValueType<0 , 248>; // TokenTy +def MetadataVT: ValueType<0, 249>; // Metadata // Pseudo valuetype mapped to the current pointer size to any address space. // Should only be used in TableGen. -def iPTRAny : ValueType<0, 122>; +def iPTRAny : ValueType<0, 250>; // Pseudo valuetype to represent "vector of any size" -def vAny : ValueType<0 , 123>; +def vAny : ValueType<0 , 251>; // Pseudo valuetype to represent "float of any format" -def fAny : ValueType<0 , 124>; +def fAny : ValueType<0 , 252>; // Pseudo valuetype to represent "integer of any bit width" -def iAny : ValueType<0 , 125>; +def iAny : ValueType<0 , 253>; // Pseudo valuetype mapped to the current pointer size. -def iPTR : ValueType<0 , 126>; +def iPTR : ValueType<0 , 254>; // Pseudo valuetype to represent "any type of any size". -def Any : ValueType<0 , 127>; +def Any : ValueType<0 , 255>; Index: include/llvm/DebugInfo/DWARF/DWARFContext.h =================================================================== --- include/llvm/DebugInfo/DWARF/DWARFContext.h +++ include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -50,6 +50,11 @@ // entire size of the debug info sections. typedef DenseMap> RelocAddrMap; +/// Reads a value from data extractor and applies a relocation to the result if +/// one exists for the given offset. +uint64_t getRelocatedValue(const DataExtractor &Data, uint32_t Size, + uint32_t *Off, const RelocAddrMap *Relocs); + /// DWARFContext /// This data structure is the top level entity that deals with dwarf debug /// information parsing. The actual data is supplied through pure virtual Index: include/llvm/IR/ConstantRange.h =================================================================== --- include/llvm/IR/ConstantRange.h +++ include/llvm/IR/ConstantRange.h @@ -93,7 +93,7 @@ /// /// NB! The returned set does *not* contain **all** possible values of X for /// which "X BinOpC Y" does not wrap -- some viable values of X may be - /// missing, so you cannot use this to contrain X's range. E.g. in the last + /// missing, so you cannot use this to constrain X's range. E.g. in the last /// example, "(-2) + 1" is both nsw and nuw (so the "X" could be -2), but (-2) /// is not in the set returned. /// Index: include/llvm/IR/DIBuilder.h =================================================================== --- include/llvm/IR/DIBuilder.h +++ include/llvm/IR/DIBuilder.h @@ -778,6 +778,9 @@ } }; + // Create wrappers for C Binding types (see CBindingWrapping.h). + DEFINE_ISA_CONVERSION_FUNCTIONS(DIBuilder, LLVMDIBuilderRef) + } // end namespace llvm #endif // LLVM_IR_DIBUILDER_H Index: include/llvm/IR/PatternMatch.h =================================================================== --- include/llvm/IR/PatternMatch.h +++ include/llvm/IR/PatternMatch.h @@ -267,15 +267,15 @@ } inline api_pred_ty m_AllOnes(const APInt *&V) { return V; } -struct is_sign_bit { - bool isValue(const APInt &C) { return C.isSignBit(); } +struct is_sign_mask { + bool isValue(const APInt &C) { return C.isSignMask(); } }; /// \brief Match an integer or vector with only the sign bit(s) set. -inline cst_pred_ty m_SignBit() { - return cst_pred_ty(); +inline cst_pred_ty m_SignMask() { + return cst_pred_ty(); } -inline api_pred_ty m_SignBit(const APInt *&V) { return V; } +inline api_pred_ty m_SignMask(const APInt *&V) { return V; } struct is_power2 { bool isValue(const APInt &C) { return C.isPowerOf2(); } Index: include/llvm/ObjectYAML/DWARFYAML.h =================================================================== --- include/llvm/ObjectYAML/DWARFYAML.h +++ include/llvm/ObjectYAML/DWARFYAML.h @@ -236,7 +236,7 @@ static void mapping(IO &IO, DWARFYAML::InitialLength &DWARF); }; -#define HANDLE_DW_TAG(unused, name) \ +#define HANDLE_DW_TAG(unused, name, unused2, unused3) \ io.enumCase(value, "DW_TAG_" #name, dwarf::DW_TAG_##name); template <> struct ScalarEnumerationTraits { @@ -266,7 +266,7 @@ } }; -#define HANDLE_DW_AT(unused, name) \ +#define HANDLE_DW_AT(unused, name, unused2, unused3) \ io.enumCase(value, "DW_AT_" #name, dwarf::DW_AT_##name); template <> struct ScalarEnumerationTraits { @@ -276,7 +276,7 @@ } }; -#define HANDLE_DW_FORM(unused, name) \ +#define HANDLE_DW_FORM(unused, name, unused2, unused3) \ io.enumCase(value, "DW_FORM_" #name, dwarf::DW_FORM_##name); template <> struct ScalarEnumerationTraits { Index: include/llvm/PassSupport.h =================================================================== --- include/llvm/PassSupport.h +++ include/llvm/PassSupport.h @@ -93,11 +93,7 @@ /// static RegisterPass tmp("passopt", "My Pass Name"); /// /// This statement will cause your pass to be created by calling the default -/// constructor exposed by the pass. If you have a different constructor that -/// must be called, create a global constructor function (which takes the -/// arguments you need and returns a Pass*) and register your pass like this: -/// -/// static RegisterPass tmp("passopt", "My Name"); +/// constructor exposed by the pass. /// template struct RegisterPass : public PassInfo { // Register Pass using default constructor... Index: include/llvm/Support/ArrayRecycler.h =================================================================== --- include/llvm/Support/ArrayRecycler.h +++ include/llvm/Support/ArrayRecycler.h @@ -47,7 +47,9 @@ FreeList *Entry = Bucket[Idx]; if (!Entry) return nullptr; + __asan_unpoison_memory_region(Entry, Capacity::get(Idx).getSize()); Bucket[Idx] = Entry->Next; + __msan_allocated_memory(Entry, Capacity::get(Idx).getSize()); return reinterpret_cast(Entry); } @@ -59,6 +61,7 @@ Bucket.resize(size_t(Idx) + 1); Entry->Next = Bucket[Idx]; Bucket[Idx] = Entry; + __asan_poison_memory_region(Ptr, Capacity::get(Idx).getSize()); } public: Index: include/llvm/Support/BinaryStreamArray.h =================================================================== --- include/llvm/Support/BinaryStreamArray.h +++ include/llvm/Support/BinaryStreamArray.h @@ -162,6 +162,11 @@ return ThisValue; } + ValueType &operator*() { + assert(Array && !HasError); + return ThisValue; + } + IterType &operator+=(unsigned N) { for (unsigned I = 0; I < N; ++I) { // We are done with the current record, discard it so that we are Index: include/llvm/Support/BranchProbability.h =================================================================== --- include/llvm/Support/BranchProbability.h +++ include/llvm/Support/BranchProbability.h @@ -112,6 +112,13 @@ return *this; } + BranchProbability &operator*=(uint32_t RHS) { + assert(N != UnknownN && + "Unknown probability cannot participate in arithmetics."); + N = (uint64_t(N) * RHS > D) ? D : N * RHS; + return *this; + } + BranchProbability &operator/=(uint32_t RHS) { assert(N != UnknownN && "Unknown probability cannot participate in arithmetics."); @@ -135,6 +142,11 @@ return Prob *= RHS; } + BranchProbability operator*(uint32_t RHS) const { + BranchProbability Prob(*this); + return Prob *= RHS; + } + BranchProbability operator/(uint32_t RHS) const { BranchProbability Prob(*this); return Prob /= RHS; Index: include/llvm/Support/Dwarf.h =================================================================== --- include/llvm/Support/Dwarf.h +++ include/llvm/Support/Dwarf.h @@ -46,7 +46,15 @@ DWARF_VERSION = 4, // Default dwarf version we output. DW_PUBTYPES_VERSION = 2, // Section version number for .debug_pubtypes. DW_PUBNAMES_VERSION = 2, // Section version number for .debug_pubnames. - DW_ARANGES_VERSION = 2 // Section version number for .debug_aranges. + DW_ARANGES_VERSION = 2, // Section version number for .debug_aranges. + // Identifiers we use to distinguish vendor extensions. + DWARF_VENDOR_DWARF = 0, // Defined in v2 or later of the DWARF standard. + DWARF_VENDOR_APPLE = 1, + DWARF_VENDOR_BORLAND = 2, + DWARF_VENDOR_GNU = 3, + DWARF_VENDOR_GOOGLE = 4, + DWARF_VENDOR_LLVM = 5, + DWARF_VENDOR_MIPS = 6 }; // Special ID values that distinguish a CIE from a FDE in DWARF CFI. @@ -55,7 +63,7 @@ const uint64_t DW64_CIE_ID = UINT64_MAX; enum Tag : uint16_t { -#define HANDLE_DW_TAG(ID, NAME) DW_TAG_##NAME = ID, +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) DW_TAG_##NAME = ID, #include "llvm/Support/Dwarf.def" DW_TAG_lo_user = 0x4080, DW_TAG_hi_user = 0xffff, @@ -92,20 +100,20 @@ /// Attributes. enum Attribute : uint16_t { -#define HANDLE_DW_AT(ID, NAME) DW_AT_##NAME = ID, +#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) DW_AT_##NAME = ID, #include "llvm/Support/Dwarf.def" DW_AT_lo_user = 0x2000, DW_AT_hi_user = 0x3fff, }; enum Form : uint16_t { -#define HANDLE_DW_FORM(ID, NAME) DW_FORM_##NAME = ID, +#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) DW_FORM_##NAME = ID, #include "llvm/Support/Dwarf.def" DW_FORM_lo_user = 0x1f00, ///< Not specified by DWARF. }; enum LocationAtom { -#define HANDLE_DW_OP(ID, NAME) DW_OP_##NAME = ID, +#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) DW_OP_##NAME = ID, #include "llvm/Support/Dwarf.def" DW_OP_lo_user = 0xe0, DW_OP_hi_user = 0xff, @@ -113,7 +121,7 @@ }; enum TypeKind { -#define HANDLE_DW_ATE(ID, NAME) DW_ATE_##NAME = ID, +#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) DW_ATE_##NAME = ID, #include "llvm/Support/Dwarf.def" DW_ATE_lo_user = 0x80, DW_ATE_hi_user = 0xff @@ -164,7 +172,7 @@ }; enum SourceLanguage { -#define HANDLE_DW_LANG(ID, NAME) DW_LANG_##NAME = ID, +#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) DW_LANG_##NAME = ID, #include "llvm/Support/Dwarf.def" DW_LANG_lo_user = 0x8000, DW_LANG_hi_user = 0xffff @@ -220,8 +228,8 @@ DW_LNE_hi_user = 0xff }; -enum LinerNumberEntryFormat { -#define HANDLE_DW_LNCT(ID, NAME) DW_DEFAULTED_##NAME = ID, +enum LineNumberEntryFormat { +#define HANDLE_DW_LNCT(ID, NAME) DW_LNCT_##NAME = ID, #include "llvm/Support/Dwarf.def" DW_LNCT_lo_user = 0x2000, DW_LNCT_hi_user = 0x3fff, @@ -406,6 +414,40 @@ unsigned getMacinfo(StringRef MacinfoString); /// @} +/// \defgroup DwarfConstantsVersioning Dwarf version for constants +/// +/// For constants defined by DWARF, returns the DWARF version when the constant +/// was first defined. For vendor extensions, if there is a version-related +/// policy for when to emit it, returns a version number for that policy. +/// Otherwise returns 0. +/// +/// @{ +unsigned TagVersion(Tag T); +unsigned AttributeVersion(Attribute A); +unsigned FormVersion(Form F); +unsigned OperationVersion(LocationAtom O); +unsigned AttributeEncodingVersion(TypeKind E); +unsigned LanguageVersion(SourceLanguage L); +/// @} + +/// \defgroup DwarfConstantsVendor Dwarf "vendor" for constants +/// +/// These functions return an identifier describing "who" defined the constant, +/// either the DWARF standard itself or the vendor who defined the extension. +/// +/// @{ +unsigned TagVendor(Tag T); +unsigned AttributeVendor(Attribute A); +unsigned FormVendor(Form F); +unsigned OperationVendor(LocationAtom O); +unsigned AttributeEncodingVendor(TypeKind E); +unsigned LanguageVendor(SourceLanguage L); +/// @} + +/// Tells whether the specified form is defined in the specified version, +/// or is an extension if extensions are allowed. +bool isValidFormForVersion(Form F, unsigned Version, bool ExtensionsOk = true); + /// \brief Returns the symbolic string representing Val when used as a value /// for attribute Attr. StringRef AttributeValueString(uint16_t Attr, unsigned Val); Index: include/llvm/Support/Dwarf.def =================================================================== --- include/llvm/Support/Dwarf.def +++ include/llvm/Support/Dwarf.def @@ -25,27 +25,27 @@ #endif #ifndef HANDLE_DW_TAG -#define HANDLE_DW_TAG(ID, NAME) +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) #endif #ifndef HANDLE_DW_AT -#define HANDLE_DW_AT(ID, NAME) +#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) #endif #ifndef HANDLE_DW_FORM -#define HANDLE_DW_FORM(ID, NAME) +#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) #endif #ifndef HANDLE_DW_OP -#define HANDLE_DW_OP(ID, NAME) +#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) #endif #ifndef HANDLE_DW_LANG -#define HANDLE_DW_LANG(ID, NAME) +#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) #endif #ifndef HANDLE_DW_ATE -#define HANDLE_DW_ATE(ID, NAME) +#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) #endif #ifndef HANDLE_DW_VIRTUALITY @@ -92,591 +92,591 @@ #define HANDLE_DW_UT(ID, NAME) #endif -HANDLE_DW_TAG(0x0000, null) -HANDLE_DW_TAG(0x0001, array_type) -HANDLE_DW_TAG(0x0002, class_type) -HANDLE_DW_TAG(0x0003, entry_point) -HANDLE_DW_TAG(0x0004, enumeration_type) -HANDLE_DW_TAG(0x0005, formal_parameter) -HANDLE_DW_TAG(0x0008, imported_declaration) -HANDLE_DW_TAG(0x000a, label) -HANDLE_DW_TAG(0x000b, lexical_block) -HANDLE_DW_TAG(0x000d, member) -HANDLE_DW_TAG(0x000f, pointer_type) -HANDLE_DW_TAG(0x0010, reference_type) -HANDLE_DW_TAG(0x0011, compile_unit) -HANDLE_DW_TAG(0x0012, string_type) -HANDLE_DW_TAG(0x0013, structure_type) -HANDLE_DW_TAG(0x0015, subroutine_type) -HANDLE_DW_TAG(0x0016, typedef) -HANDLE_DW_TAG(0x0017, union_type) -HANDLE_DW_TAG(0x0018, unspecified_parameters) -HANDLE_DW_TAG(0x0019, variant) -HANDLE_DW_TAG(0x001a, common_block) -HANDLE_DW_TAG(0x001b, common_inclusion) -HANDLE_DW_TAG(0x001c, inheritance) -HANDLE_DW_TAG(0x001d, inlined_subroutine) -HANDLE_DW_TAG(0x001e, module) -HANDLE_DW_TAG(0x001f, ptr_to_member_type) -HANDLE_DW_TAG(0x0020, set_type) -HANDLE_DW_TAG(0x0021, subrange_type) -HANDLE_DW_TAG(0x0022, with_stmt) -HANDLE_DW_TAG(0x0023, access_declaration) -HANDLE_DW_TAG(0x0024, base_type) -HANDLE_DW_TAG(0x0025, catch_block) -HANDLE_DW_TAG(0x0026, const_type) -HANDLE_DW_TAG(0x0027, constant) -HANDLE_DW_TAG(0x0028, enumerator) -HANDLE_DW_TAG(0x0029, file_type) -HANDLE_DW_TAG(0x002a, friend) -HANDLE_DW_TAG(0x002b, namelist) -HANDLE_DW_TAG(0x002c, namelist_item) -HANDLE_DW_TAG(0x002d, packed_type) -HANDLE_DW_TAG(0x002e, subprogram) -HANDLE_DW_TAG(0x002f, template_type_parameter) -HANDLE_DW_TAG(0x0030, template_value_parameter) -HANDLE_DW_TAG(0x0031, thrown_type) -HANDLE_DW_TAG(0x0032, try_block) -HANDLE_DW_TAG(0x0033, variant_part) -HANDLE_DW_TAG(0x0034, variable) -HANDLE_DW_TAG(0x0035, volatile_type) +HANDLE_DW_TAG(0x0000, null, 2, DWARF) +HANDLE_DW_TAG(0x0001, array_type, 2, DWARF) +HANDLE_DW_TAG(0x0002, class_type, 2, DWARF) +HANDLE_DW_TAG(0x0003, entry_point, 2, DWARF) +HANDLE_DW_TAG(0x0004, enumeration_type, 2, DWARF) +HANDLE_DW_TAG(0x0005, formal_parameter, 2, DWARF) +HANDLE_DW_TAG(0x0008, imported_declaration, 2, DWARF) +HANDLE_DW_TAG(0x000a, label, 2, DWARF) +HANDLE_DW_TAG(0x000b, lexical_block, 2, DWARF) +HANDLE_DW_TAG(0x000d, member, 2, DWARF) +HANDLE_DW_TAG(0x000f, pointer_type, 2, DWARF) +HANDLE_DW_TAG(0x0010, reference_type, 2, DWARF) +HANDLE_DW_TAG(0x0011, compile_unit, 2, DWARF) +HANDLE_DW_TAG(0x0012, string_type, 2, DWARF) +HANDLE_DW_TAG(0x0013, structure_type, 2, DWARF) +HANDLE_DW_TAG(0x0015, subroutine_type, 2, DWARF) +HANDLE_DW_TAG(0x0016, typedef, 2, DWARF) +HANDLE_DW_TAG(0x0017, union_type, 2, DWARF) +HANDLE_DW_TAG(0x0018, unspecified_parameters, 2, DWARF) +HANDLE_DW_TAG(0x0019, variant, 2, DWARF) +HANDLE_DW_TAG(0x001a, common_block, 2, DWARF) +HANDLE_DW_TAG(0x001b, common_inclusion, 2, DWARF) +HANDLE_DW_TAG(0x001c, inheritance, 2, DWARF) +HANDLE_DW_TAG(0x001d, inlined_subroutine, 2, DWARF) +HANDLE_DW_TAG(0x001e, module, 2, DWARF) +HANDLE_DW_TAG(0x001f, ptr_to_member_type, 2, DWARF) +HANDLE_DW_TAG(0x0020, set_type, 2, DWARF) +HANDLE_DW_TAG(0x0021, subrange_type, 2, DWARF) +HANDLE_DW_TAG(0x0022, with_stmt, 2, DWARF) +HANDLE_DW_TAG(0x0023, access_declaration, 2, DWARF) +HANDLE_DW_TAG(0x0024, base_type, 2, DWARF) +HANDLE_DW_TAG(0x0025, catch_block, 2, DWARF) +HANDLE_DW_TAG(0x0026, const_type, 2, DWARF) +HANDLE_DW_TAG(0x0027, constant, 2, DWARF) +HANDLE_DW_TAG(0x0028, enumerator, 2, DWARF) +HANDLE_DW_TAG(0x0029, file_type, 2, DWARF) +HANDLE_DW_TAG(0x002a, friend, 2, DWARF) +HANDLE_DW_TAG(0x002b, namelist, 2, DWARF) +HANDLE_DW_TAG(0x002c, namelist_item, 2, DWARF) +HANDLE_DW_TAG(0x002d, packed_type, 2, DWARF) +HANDLE_DW_TAG(0x002e, subprogram, 2, DWARF) +HANDLE_DW_TAG(0x002f, template_type_parameter, 2, DWARF) +HANDLE_DW_TAG(0x0030, template_value_parameter, 2, DWARF) +HANDLE_DW_TAG(0x0031, thrown_type, 2, DWARF) +HANDLE_DW_TAG(0x0032, try_block, 2, DWARF) +HANDLE_DW_TAG(0x0033, variant_part, 2, DWARF) +HANDLE_DW_TAG(0x0034, variable, 2, DWARF) +HANDLE_DW_TAG(0x0035, volatile_type, 2, DWARF) // New in DWARF v3: -HANDLE_DW_TAG(0x0036, dwarf_procedure) -HANDLE_DW_TAG(0x0037, restrict_type) -HANDLE_DW_TAG(0x0038, interface_type) -HANDLE_DW_TAG(0x0039, namespace) -HANDLE_DW_TAG(0x003a, imported_module) -HANDLE_DW_TAG(0x003b, unspecified_type) -HANDLE_DW_TAG(0x003c, partial_unit) -HANDLE_DW_TAG(0x003d, imported_unit) -HANDLE_DW_TAG(0x003f, condition) -HANDLE_DW_TAG(0x0040, shared_type) +HANDLE_DW_TAG(0x0036, dwarf_procedure, 3, DWARF) +HANDLE_DW_TAG(0x0037, restrict_type, 3, DWARF) +HANDLE_DW_TAG(0x0038, interface_type, 3, DWARF) +HANDLE_DW_TAG(0x0039, namespace, 3, DWARF) +HANDLE_DW_TAG(0x003a, imported_module, 3, DWARF) +HANDLE_DW_TAG(0x003b, unspecified_type, 3, DWARF) +HANDLE_DW_TAG(0x003c, partial_unit, 3, DWARF) +HANDLE_DW_TAG(0x003d, imported_unit, 3, DWARF) +HANDLE_DW_TAG(0x003f, condition, 3, DWARF) +HANDLE_DW_TAG(0x0040, shared_type, 3, DWARF) // New in DWARF v4: -HANDLE_DW_TAG(0x0041, type_unit) -HANDLE_DW_TAG(0x0042, rvalue_reference_type) -HANDLE_DW_TAG(0x0043, template_alias) +HANDLE_DW_TAG(0x0041, type_unit, 4, DWARF) +HANDLE_DW_TAG(0x0042, rvalue_reference_type, 4, DWARF) +HANDLE_DW_TAG(0x0043, template_alias, 4, DWARF) // New in DWARF v5: -HANDLE_DW_TAG(0x0044, coarray_type) -HANDLE_DW_TAG(0x0045, generic_subrange) -HANDLE_DW_TAG(0x0046, dynamic_type) -HANDLE_DW_TAG(0x0047, atomic_type) -HANDLE_DW_TAG(0x0048, call_site) -HANDLE_DW_TAG(0x0049, call_site_parameter) -HANDLE_DW_TAG(0x004a, skeleton_unit) -HANDLE_DW_TAG(0x004b, immutable_type) +HANDLE_DW_TAG(0x0044, coarray_type, 5, DWARF) +HANDLE_DW_TAG(0x0045, generic_subrange, 5, DWARF) +HANDLE_DW_TAG(0x0046, dynamic_type, 5, DWARF) +HANDLE_DW_TAG(0x0047, atomic_type, 5, DWARF) +HANDLE_DW_TAG(0x0048, call_site, 5, DWARF) +HANDLE_DW_TAG(0x0049, call_site_parameter, 5, DWARF) +HANDLE_DW_TAG(0x004a, skeleton_unit, 5, DWARF) +HANDLE_DW_TAG(0x004b, immutable_type, 5, DWARF) // Vendor extensions: -HANDLE_DW_TAG(0x4081, MIPS_loop) -HANDLE_DW_TAG(0x4101, format_label) -HANDLE_DW_TAG(0x4102, function_template) -HANDLE_DW_TAG(0x4103, class_template) -HANDLE_DW_TAG(0x4106, GNU_template_template_param) -HANDLE_DW_TAG(0x4107, GNU_template_parameter_pack) -HANDLE_DW_TAG(0x4108, GNU_formal_parameter_pack) -HANDLE_DW_TAG(0x4200, APPLE_property) -HANDLE_DW_TAG(0xb000, BORLAND_property) -HANDLE_DW_TAG(0xb001, BORLAND_Delphi_string) -HANDLE_DW_TAG(0xb002, BORLAND_Delphi_dynamic_array) -HANDLE_DW_TAG(0xb003, BORLAND_Delphi_set) -HANDLE_DW_TAG(0xb004, BORLAND_Delphi_variant) +HANDLE_DW_TAG(0x4081, MIPS_loop, 0, MIPS) +HANDLE_DW_TAG(0x4101, format_label, 0, GNU) +HANDLE_DW_TAG(0x4102, function_template, 0, GNU) +HANDLE_DW_TAG(0x4103, class_template, 0, GNU) +HANDLE_DW_TAG(0x4106, GNU_template_template_param, 0, GNU) +HANDLE_DW_TAG(0x4107, GNU_template_parameter_pack, 0, GNU) +HANDLE_DW_TAG(0x4108, GNU_formal_parameter_pack, 0, GNU) +HANDLE_DW_TAG(0x4200, APPLE_property, 0, APPLE) +HANDLE_DW_TAG(0xb000, BORLAND_property, 0, BORLAND) +HANDLE_DW_TAG(0xb001, BORLAND_Delphi_string, 0, BORLAND) +HANDLE_DW_TAG(0xb002, BORLAND_Delphi_dynamic_array, 0, BORLAND) +HANDLE_DW_TAG(0xb003, BORLAND_Delphi_set, 0, BORLAND) +HANDLE_DW_TAG(0xb004, BORLAND_Delphi_variant, 0, BORLAND) // Attributes. -HANDLE_DW_AT(0x01, sibling) -HANDLE_DW_AT(0x02, location) -HANDLE_DW_AT(0x03, name) -HANDLE_DW_AT(0x09, ordering) -HANDLE_DW_AT(0x0b, byte_size) -HANDLE_DW_AT(0x0c, bit_offset) -HANDLE_DW_AT(0x0d, bit_size) -HANDLE_DW_AT(0x10, stmt_list) -HANDLE_DW_AT(0x11, low_pc) -HANDLE_DW_AT(0x12, high_pc) -HANDLE_DW_AT(0x13, language) -HANDLE_DW_AT(0x15, discr) -HANDLE_DW_AT(0x16, discr_value) -HANDLE_DW_AT(0x17, visibility) -HANDLE_DW_AT(0x18, import) -HANDLE_DW_AT(0x19, string_length) -HANDLE_DW_AT(0x1a, common_reference) -HANDLE_DW_AT(0x1b, comp_dir) -HANDLE_DW_AT(0x1c, const_value) -HANDLE_DW_AT(0x1d, containing_type) -HANDLE_DW_AT(0x1e, default_value) -HANDLE_DW_AT(0x20, inline) -HANDLE_DW_AT(0x21, is_optional) -HANDLE_DW_AT(0x22, lower_bound) -HANDLE_DW_AT(0x25, producer) -HANDLE_DW_AT(0x27, prototyped) -HANDLE_DW_AT(0x2a, return_addr) -HANDLE_DW_AT(0x2c, start_scope) -HANDLE_DW_AT(0x2e, bit_stride) -HANDLE_DW_AT(0x2f, upper_bound) -HANDLE_DW_AT(0x31, abstract_origin) -HANDLE_DW_AT(0x32, accessibility) -HANDLE_DW_AT(0x33, address_class) -HANDLE_DW_AT(0x34, artificial) -HANDLE_DW_AT(0x35, base_types) -HANDLE_DW_AT(0x36, calling_convention) -HANDLE_DW_AT(0x37, count) -HANDLE_DW_AT(0x38, data_member_location) -HANDLE_DW_AT(0x39, decl_column) -HANDLE_DW_AT(0x3a, decl_file) -HANDLE_DW_AT(0x3b, decl_line) -HANDLE_DW_AT(0x3c, declaration) -HANDLE_DW_AT(0x3d, discr_list) -HANDLE_DW_AT(0x3e, encoding) -HANDLE_DW_AT(0x3f, external) -HANDLE_DW_AT(0x40, frame_base) -HANDLE_DW_AT(0x41, friend) -HANDLE_DW_AT(0x42, identifier_case) -HANDLE_DW_AT(0x43, macro_info) -HANDLE_DW_AT(0x44, namelist_item) -HANDLE_DW_AT(0x45, priority) -HANDLE_DW_AT(0x46, segment) -HANDLE_DW_AT(0x47, specification) -HANDLE_DW_AT(0x48, static_link) -HANDLE_DW_AT(0x49, type) -HANDLE_DW_AT(0x4a, use_location) -HANDLE_DW_AT(0x4b, variable_parameter) -HANDLE_DW_AT(0x4c, virtuality) -HANDLE_DW_AT(0x4d, vtable_elem_location) +HANDLE_DW_AT(0x01, sibling, 2, DWARF) +HANDLE_DW_AT(0x02, location, 2, DWARF) +HANDLE_DW_AT(0x03, name, 2, DWARF) +HANDLE_DW_AT(0x09, ordering, 2, DWARF) +HANDLE_DW_AT(0x0b, byte_size, 2, DWARF) +HANDLE_DW_AT(0x0c, bit_offset, 2, DWARF) +HANDLE_DW_AT(0x0d, bit_size, 2, DWARF) +HANDLE_DW_AT(0x10, stmt_list, 2, DWARF) +HANDLE_DW_AT(0x11, low_pc, 2, DWARF) +HANDLE_DW_AT(0x12, high_pc, 2, DWARF) +HANDLE_DW_AT(0x13, language, 2, DWARF) +HANDLE_DW_AT(0x15, discr, 2, DWARF) +HANDLE_DW_AT(0x16, discr_value, 2, DWARF) +HANDLE_DW_AT(0x17, visibility, 2, DWARF) +HANDLE_DW_AT(0x18, import, 2, DWARF) +HANDLE_DW_AT(0x19, string_length, 2, DWARF) +HANDLE_DW_AT(0x1a, common_reference, 2, DWARF) +HANDLE_DW_AT(0x1b, comp_dir, 2, DWARF) +HANDLE_DW_AT(0x1c, const_value, 2, DWARF) +HANDLE_DW_AT(0x1d, containing_type, 2, DWARF) +HANDLE_DW_AT(0x1e, default_value, 2, DWARF) +HANDLE_DW_AT(0x20, inline, 2, DWARF) +HANDLE_DW_AT(0x21, is_optional, 2, DWARF) +HANDLE_DW_AT(0x22, lower_bound, 2, DWARF) +HANDLE_DW_AT(0x25, producer, 2, DWARF) +HANDLE_DW_AT(0x27, prototyped, 2, DWARF) +HANDLE_DW_AT(0x2a, return_addr, 2, DWARF) +HANDLE_DW_AT(0x2c, start_scope, 2, DWARF) +HANDLE_DW_AT(0x2e, bit_stride, 2, DWARF) +HANDLE_DW_AT(0x2f, upper_bound, 2, DWARF) +HANDLE_DW_AT(0x31, abstract_origin, 2, DWARF) +HANDLE_DW_AT(0x32, accessibility, 2, DWARF) +HANDLE_DW_AT(0x33, address_class, 2, DWARF) +HANDLE_DW_AT(0x34, artificial, 2, DWARF) +HANDLE_DW_AT(0x35, base_types, 2, DWARF) +HANDLE_DW_AT(0x36, calling_convention, 2, DWARF) +HANDLE_DW_AT(0x37, count, 2, DWARF) +HANDLE_DW_AT(0x38, data_member_location, 2, DWARF) +HANDLE_DW_AT(0x39, decl_column, 2, DWARF) +HANDLE_DW_AT(0x3a, decl_file, 2, DWARF) +HANDLE_DW_AT(0x3b, decl_line, 2, DWARF) +HANDLE_DW_AT(0x3c, declaration, 2, DWARF) +HANDLE_DW_AT(0x3d, discr_list, 2, DWARF) +HANDLE_DW_AT(0x3e, encoding, 2, DWARF) +HANDLE_DW_AT(0x3f, external, 2, DWARF) +HANDLE_DW_AT(0x40, frame_base, 2, DWARF) +HANDLE_DW_AT(0x41, friend, 2, DWARF) +HANDLE_DW_AT(0x42, identifier_case, 2, DWARF) +HANDLE_DW_AT(0x43, macro_info, 2, DWARF) +HANDLE_DW_AT(0x44, namelist_item, 2, DWARF) +HANDLE_DW_AT(0x45, priority, 2, DWARF) +HANDLE_DW_AT(0x46, segment, 2, DWARF) +HANDLE_DW_AT(0x47, specification, 2, DWARF) +HANDLE_DW_AT(0x48, static_link, 2, DWARF) +HANDLE_DW_AT(0x49, type, 2, DWARF) +HANDLE_DW_AT(0x4a, use_location, 2, DWARF) +HANDLE_DW_AT(0x4b, variable_parameter, 2, DWARF) +HANDLE_DW_AT(0x4c, virtuality, 2, DWARF) +HANDLE_DW_AT(0x4d, vtable_elem_location, 2, DWARF) // New in DWARF v3: -HANDLE_DW_AT(0x4e, allocated) -HANDLE_DW_AT(0x4f, associated) -HANDLE_DW_AT(0x50, data_location) -HANDLE_DW_AT(0x51, byte_stride) -HANDLE_DW_AT(0x52, entry_pc) -HANDLE_DW_AT(0x53, use_UTF8) -HANDLE_DW_AT(0x54, extension) -HANDLE_DW_AT(0x55, ranges) -HANDLE_DW_AT(0x56, trampoline) -HANDLE_DW_AT(0x57, call_column) -HANDLE_DW_AT(0x58, call_file) -HANDLE_DW_AT(0x59, call_line) -HANDLE_DW_AT(0x5a, description) -HANDLE_DW_AT(0x5b, binary_scale) -HANDLE_DW_AT(0x5c, decimal_scale) -HANDLE_DW_AT(0x5d, small) -HANDLE_DW_AT(0x5e, decimal_sign) -HANDLE_DW_AT(0x5f, digit_count) -HANDLE_DW_AT(0x60, picture_string) -HANDLE_DW_AT(0x61, mutable) -HANDLE_DW_AT(0x62, threads_scaled) -HANDLE_DW_AT(0x63, explicit) -HANDLE_DW_AT(0x64, object_pointer) -HANDLE_DW_AT(0x65, endianity) -HANDLE_DW_AT(0x66, elemental) -HANDLE_DW_AT(0x67, pure) -HANDLE_DW_AT(0x68, recursive) +HANDLE_DW_AT(0x4e, allocated, 3, DWARF) +HANDLE_DW_AT(0x4f, associated, 3, DWARF) +HANDLE_DW_AT(0x50, data_location, 3, DWARF) +HANDLE_DW_AT(0x51, byte_stride, 3, DWARF) +HANDLE_DW_AT(0x52, entry_pc, 3, DWARF) +HANDLE_DW_AT(0x53, use_UTF8, 3, DWARF) +HANDLE_DW_AT(0x54, extension, 3, DWARF) +HANDLE_DW_AT(0x55, ranges, 3, DWARF) +HANDLE_DW_AT(0x56, trampoline, 3, DWARF) +HANDLE_DW_AT(0x57, call_column, 3, DWARF) +HANDLE_DW_AT(0x58, call_file, 3, DWARF) +HANDLE_DW_AT(0x59, call_line, 3, DWARF) +HANDLE_DW_AT(0x5a, description, 3, DWARF) +HANDLE_DW_AT(0x5b, binary_scale, 3, DWARF) +HANDLE_DW_AT(0x5c, decimal_scale, 3, DWARF) +HANDLE_DW_AT(0x5d, small, 3, DWARF) +HANDLE_DW_AT(0x5e, decimal_sign, 3, DWARF) +HANDLE_DW_AT(0x5f, digit_count, 3, DWARF) +HANDLE_DW_AT(0x60, picture_string, 3, DWARF) +HANDLE_DW_AT(0x61, mutable, 3, DWARF) +HANDLE_DW_AT(0x62, threads_scaled, 3, DWARF) +HANDLE_DW_AT(0x63, explicit, 3, DWARF) +HANDLE_DW_AT(0x64, object_pointer, 3, DWARF) +HANDLE_DW_AT(0x65, endianity, 3, DWARF) +HANDLE_DW_AT(0x66, elemental, 3, DWARF) +HANDLE_DW_AT(0x67, pure, 3, DWARF) +HANDLE_DW_AT(0x68, recursive, 3, DWARF) // New in DWARF v4: -HANDLE_DW_AT(0x69, signature) -HANDLE_DW_AT(0x6a, main_subprogram) -HANDLE_DW_AT(0x6b, data_bit_offset) -HANDLE_DW_AT(0x6c, const_expr) -HANDLE_DW_AT(0x6d, enum_class) -HANDLE_DW_AT(0x6e, linkage_name) +HANDLE_DW_AT(0x69, signature, 4, DWARF) +HANDLE_DW_AT(0x6a, main_subprogram, 4, DWARF) +HANDLE_DW_AT(0x6b, data_bit_offset, 4, DWARF) +HANDLE_DW_AT(0x6c, const_expr, 4, DWARF) +HANDLE_DW_AT(0x6d, enum_class, 4, DWARF) +HANDLE_DW_AT(0x6e, linkage_name, 4, DWARF) // New in DWARF v5: -HANDLE_DW_AT(0x6f, string_length_bit_size) -HANDLE_DW_AT(0x70, string_length_byte_size) -HANDLE_DW_AT(0x71, rank) -HANDLE_DW_AT(0x72, str_offsets_base) -HANDLE_DW_AT(0x73, addr_base) -HANDLE_DW_AT(0x74, rnglists_base) -HANDLE_DW_AT(0x75, dwo_id) ///< Retracted from DWARF 5. -HANDLE_DW_AT(0x76, dwo_name) -HANDLE_DW_AT(0x77, reference) -HANDLE_DW_AT(0x78, rvalue_reference) -HANDLE_DW_AT(0x79, macros) -HANDLE_DW_AT(0x7a, call_all_calls) -HANDLE_DW_AT(0x7b, call_all_source_calls) -HANDLE_DW_AT(0x7c, call_all_tail_calls) -HANDLE_DW_AT(0x7d, call_return_pc) -HANDLE_DW_AT(0x7e, call_value) -HANDLE_DW_AT(0x7f, call_origin) -HANDLE_DW_AT(0x80, call_parameter) -HANDLE_DW_AT(0x81, call_pc) -HANDLE_DW_AT(0x82, call_tail_call) -HANDLE_DW_AT(0x83, call_target) -HANDLE_DW_AT(0x84, call_target_clobbered) -HANDLE_DW_AT(0x85, call_data_location) -HANDLE_DW_AT(0x86, call_data_value) -HANDLE_DW_AT(0x87, noreturn) -HANDLE_DW_AT(0x88, alignment) -HANDLE_DW_AT(0x89, export_symbols) -HANDLE_DW_AT(0x8a, deleted) -HANDLE_DW_AT(0x8b, defaulted) -HANDLE_DW_AT(0x8c, loclists_base) +HANDLE_DW_AT(0x6f, string_length_bit_size, 5, DWARF) +HANDLE_DW_AT(0x70, string_length_byte_size, 5, DWARF) +HANDLE_DW_AT(0x71, rank, 5, DWARF) +HANDLE_DW_AT(0x72, str_offsets_base, 5, DWARF) +HANDLE_DW_AT(0x73, addr_base, 5, DWARF) +HANDLE_DW_AT(0x74, rnglists_base, 5, DWARF) +HANDLE_DW_AT(0x75, dwo_id, 0, DWARF) ///< Retracted from DWARF v5. +HANDLE_DW_AT(0x76, dwo_name, 5, DWARF) +HANDLE_DW_AT(0x77, reference, 5, DWARF) +HANDLE_DW_AT(0x78, rvalue_reference, 5, DWARF) +HANDLE_DW_AT(0x79, macros, 5, DWARF) +HANDLE_DW_AT(0x7a, call_all_calls, 5, DWARF) +HANDLE_DW_AT(0x7b, call_all_source_calls, 5, DWARF) +HANDLE_DW_AT(0x7c, call_all_tail_calls, 5, DWARF) +HANDLE_DW_AT(0x7d, call_return_pc, 5, DWARF) +HANDLE_DW_AT(0x7e, call_value, 5, DWARF) +HANDLE_DW_AT(0x7f, call_origin, 5, DWARF) +HANDLE_DW_AT(0x80, call_parameter, 5, DWARF) +HANDLE_DW_AT(0x81, call_pc, 5, DWARF) +HANDLE_DW_AT(0x82, call_tail_call, 5, DWARF) +HANDLE_DW_AT(0x83, call_target, 5, DWARF) +HANDLE_DW_AT(0x84, call_target_clobbered, 5, DWARF) +HANDLE_DW_AT(0x85, call_data_location, 5, DWARF) +HANDLE_DW_AT(0x86, call_data_value, 5, DWARF) +HANDLE_DW_AT(0x87, noreturn, 5, DWARF) +HANDLE_DW_AT(0x88, alignment, 5, DWARF) +HANDLE_DW_AT(0x89, export_symbols, 5, DWARF) +HANDLE_DW_AT(0x8a, deleted, 5, DWARF) +HANDLE_DW_AT(0x8b, defaulted, 5, DWARF) +HANDLE_DW_AT(0x8c, loclists_base, 5, DWARF) // Vendor extensions: -HANDLE_DW_AT(0x2002, MIPS_loop_begin) -HANDLE_DW_AT(0x2003, MIPS_tail_loop_begin) -HANDLE_DW_AT(0x2004, MIPS_epilog_begin) -HANDLE_DW_AT(0x2005, MIPS_loop_unroll_factor) -HANDLE_DW_AT(0x2006, MIPS_software_pipeline_depth) -HANDLE_DW_AT(0x2007, MIPS_linkage_name) -HANDLE_DW_AT(0x2008, MIPS_stride) -HANDLE_DW_AT(0x2009, MIPS_abstract_name) -HANDLE_DW_AT(0x200a, MIPS_clone_origin) -HANDLE_DW_AT(0x200b, MIPS_has_inlines) -HANDLE_DW_AT(0x200c, MIPS_stride_byte) -HANDLE_DW_AT(0x200d, MIPS_stride_elem) -HANDLE_DW_AT(0x200e, MIPS_ptr_dopetype) -HANDLE_DW_AT(0x200f, MIPS_allocatable_dopetype) -HANDLE_DW_AT(0x2010, MIPS_assumed_shape_dopetype) +HANDLE_DW_AT(0x2002, MIPS_loop_begin, 0, MIPS) +HANDLE_DW_AT(0x2003, MIPS_tail_loop_begin, 0, MIPS) +HANDLE_DW_AT(0x2004, MIPS_epilog_begin, 0, MIPS) +HANDLE_DW_AT(0x2005, MIPS_loop_unroll_factor, 0, MIPS) +HANDLE_DW_AT(0x2006, MIPS_software_pipeline_depth, 0, MIPS) +HANDLE_DW_AT(0x2007, MIPS_linkage_name, 0, MIPS) +HANDLE_DW_AT(0x2008, MIPS_stride, 0, MIPS) +HANDLE_DW_AT(0x2009, MIPS_abstract_name, 0, MIPS) +HANDLE_DW_AT(0x200a, MIPS_clone_origin, 0, MIPS) +HANDLE_DW_AT(0x200b, MIPS_has_inlines, 0, MIPS) +HANDLE_DW_AT(0x200c, MIPS_stride_byte, 0, MIPS) +HANDLE_DW_AT(0x200d, MIPS_stride_elem, 0, MIPS) +HANDLE_DW_AT(0x200e, MIPS_ptr_dopetype, 0, MIPS) +HANDLE_DW_AT(0x200f, MIPS_allocatable_dopetype, 0, MIPS) +HANDLE_DW_AT(0x2010, MIPS_assumed_shape_dopetype, 0, MIPS) // This one appears to have only been implemented by Open64 for // fortran and may conflict with other extensions. -HANDLE_DW_AT(0x2011, MIPS_assumed_size) +HANDLE_DW_AT(0x2011, MIPS_assumed_size, 0, MIPS) // GNU extensions -HANDLE_DW_AT(0x2101, sf_names) -HANDLE_DW_AT(0x2102, src_info) -HANDLE_DW_AT(0x2103, mac_info) -HANDLE_DW_AT(0x2104, src_coords) -HANDLE_DW_AT(0x2105, body_begin) -HANDLE_DW_AT(0x2106, body_end) -HANDLE_DW_AT(0x2107, GNU_vector) -HANDLE_DW_AT(0x2110, GNU_template_name) -HANDLE_DW_AT(0x210f, GNU_odr_signature) -HANDLE_DW_AT(0x2119, GNU_macros) +HANDLE_DW_AT(0x2101, sf_names, 0, GNU) +HANDLE_DW_AT(0x2102, src_info, 0, GNU) +HANDLE_DW_AT(0x2103, mac_info, 0, GNU) +HANDLE_DW_AT(0x2104, src_coords, 0, GNU) +HANDLE_DW_AT(0x2105, body_begin, 0, GNU) +HANDLE_DW_AT(0x2106, body_end, 0, GNU) +HANDLE_DW_AT(0x2107, GNU_vector, 0, GNU) +HANDLE_DW_AT(0x2110, GNU_template_name, 0, GNU) +HANDLE_DW_AT(0x210f, GNU_odr_signature, 0, GNU) +HANDLE_DW_AT(0x2119, GNU_macros, 0, GNU) // Extensions for Fission proposal. -HANDLE_DW_AT(0x2130, GNU_dwo_name) -HANDLE_DW_AT(0x2131, GNU_dwo_id) -HANDLE_DW_AT(0x2132, GNU_ranges_base) -HANDLE_DW_AT(0x2133, GNU_addr_base) -HANDLE_DW_AT(0x2134, GNU_pubnames) -HANDLE_DW_AT(0x2135, GNU_pubtypes) -HANDLE_DW_AT(0x2136, GNU_discriminator) +HANDLE_DW_AT(0x2130, GNU_dwo_name, 0, GNU) +HANDLE_DW_AT(0x2131, GNU_dwo_id, 0, GNU) +HANDLE_DW_AT(0x2132, GNU_ranges_base, 0, GNU) +HANDLE_DW_AT(0x2133, GNU_addr_base, 0, GNU) +HANDLE_DW_AT(0x2134, GNU_pubnames, 0, GNU) +HANDLE_DW_AT(0x2135, GNU_pubtypes, 0, GNU) +HANDLE_DW_AT(0x2136, GNU_discriminator, 0, GNU) // Borland extensions. -HANDLE_DW_AT(0x3b11, BORLAND_property_read) -HANDLE_DW_AT(0x3b12, BORLAND_property_write) -HANDLE_DW_AT(0x3b13, BORLAND_property_implements) -HANDLE_DW_AT(0x3b14, BORLAND_property_index) -HANDLE_DW_AT(0x3b15, BORLAND_property_default) -HANDLE_DW_AT(0x3b20, BORLAND_Delphi_unit) -HANDLE_DW_AT(0x3b21, BORLAND_Delphi_class) -HANDLE_DW_AT(0x3b22, BORLAND_Delphi_record) -HANDLE_DW_AT(0x3b23, BORLAND_Delphi_metaclass) -HANDLE_DW_AT(0x3b24, BORLAND_Delphi_constructor) -HANDLE_DW_AT(0x3b25, BORLAND_Delphi_destructor) -HANDLE_DW_AT(0x3b26, BORLAND_Delphi_anonymous_method) -HANDLE_DW_AT(0x3b27, BORLAND_Delphi_interface) -HANDLE_DW_AT(0x3b28, BORLAND_Delphi_ABI) -HANDLE_DW_AT(0x3b29, BORLAND_Delphi_return) -HANDLE_DW_AT(0x3b30, BORLAND_Delphi_frameptr) -HANDLE_DW_AT(0x3b31, BORLAND_closure) +HANDLE_DW_AT(0x3b11, BORLAND_property_read, 0, BORLAND) +HANDLE_DW_AT(0x3b12, BORLAND_property_write, 0, BORLAND) +HANDLE_DW_AT(0x3b13, BORLAND_property_implements, 0, BORLAND) +HANDLE_DW_AT(0x3b14, BORLAND_property_index, 0, BORLAND) +HANDLE_DW_AT(0x3b15, BORLAND_property_default, 0, BORLAND) +HANDLE_DW_AT(0x3b20, BORLAND_Delphi_unit, 0, BORLAND) +HANDLE_DW_AT(0x3b21, BORLAND_Delphi_class, 0, BORLAND) +HANDLE_DW_AT(0x3b22, BORLAND_Delphi_record, 0, BORLAND) +HANDLE_DW_AT(0x3b23, BORLAND_Delphi_metaclass, 0, BORLAND) +HANDLE_DW_AT(0x3b24, BORLAND_Delphi_constructor, 0, BORLAND) +HANDLE_DW_AT(0x3b25, BORLAND_Delphi_destructor, 0, BORLAND) +HANDLE_DW_AT(0x3b26, BORLAND_Delphi_anonymous_method, 0, BORLAND) +HANDLE_DW_AT(0x3b27, BORLAND_Delphi_interface, 0, BORLAND) +HANDLE_DW_AT(0x3b28, BORLAND_Delphi_ABI, 0, BORLAND) +HANDLE_DW_AT(0x3b29, BORLAND_Delphi_return, 0, BORLAND) +HANDLE_DW_AT(0x3b30, BORLAND_Delphi_frameptr, 0, BORLAND) +HANDLE_DW_AT(0x3b31, BORLAND_closure, 0, BORLAND) // LLVM project extensions. -HANDLE_DW_AT(0x3e00, LLVM_include_path) -HANDLE_DW_AT(0x3e01, LLVM_config_macros) -HANDLE_DW_AT(0x3e02, LLVM_isysroot) +HANDLE_DW_AT(0x3e00, LLVM_include_path, 0, LLVM) +HANDLE_DW_AT(0x3e01, LLVM_config_macros, 0, LLVM) +HANDLE_DW_AT(0x3e02, LLVM_isysroot, 0, LLVM) // Apple extensions. -HANDLE_DW_AT(0x3fe1, APPLE_optimized) -HANDLE_DW_AT(0x3fe2, APPLE_flags) -HANDLE_DW_AT(0x3fe3, APPLE_isa) -HANDLE_DW_AT(0x3fe4, APPLE_block) -HANDLE_DW_AT(0x3fe5, APPLE_major_runtime_vers) -HANDLE_DW_AT(0x3fe6, APPLE_runtime_class) -HANDLE_DW_AT(0x3fe7, APPLE_omit_frame_ptr) -HANDLE_DW_AT(0x3fe8, APPLE_property_name) -HANDLE_DW_AT(0x3fe9, APPLE_property_getter) -HANDLE_DW_AT(0x3fea, APPLE_property_setter) -HANDLE_DW_AT(0x3feb, APPLE_property_attribute) -HANDLE_DW_AT(0x3fec, APPLE_objc_complete_type) -HANDLE_DW_AT(0x3fed, APPLE_property) +HANDLE_DW_AT(0x3fe1, APPLE_optimized, 0, APPLE) +HANDLE_DW_AT(0x3fe2, APPLE_flags, 0, APPLE) +HANDLE_DW_AT(0x3fe3, APPLE_isa, 0, APPLE) +HANDLE_DW_AT(0x3fe4, APPLE_block, 0, APPLE) +HANDLE_DW_AT(0x3fe5, APPLE_major_runtime_vers, 0, APPLE) +HANDLE_DW_AT(0x3fe6, APPLE_runtime_class, 0, APPLE) +HANDLE_DW_AT(0x3fe7, APPLE_omit_frame_ptr, 0, APPLE) +HANDLE_DW_AT(0x3fe8, APPLE_property_name, 0, APPLE) +HANDLE_DW_AT(0x3fe9, APPLE_property_getter, 0, APPLE) +HANDLE_DW_AT(0x3fea, APPLE_property_setter, 0, APPLE) +HANDLE_DW_AT(0x3feb, APPLE_property_attribute, 0, APPLE) +HANDLE_DW_AT(0x3fec, APPLE_objc_complete_type, 0, APPLE) +HANDLE_DW_AT(0x3fed, APPLE_property, 0, APPLE) // Attribute form encodings. -HANDLE_DW_FORM(0x01, addr) -HANDLE_DW_FORM(0x03, block2) -HANDLE_DW_FORM(0x04, block4) -HANDLE_DW_FORM(0x05, data2) -HANDLE_DW_FORM(0x06, data4) -HANDLE_DW_FORM(0x07, data8) -HANDLE_DW_FORM(0x08, string) -HANDLE_DW_FORM(0x09, block) -HANDLE_DW_FORM(0x0a, block1) -HANDLE_DW_FORM(0x0b, data1) -HANDLE_DW_FORM(0x0c, flag) -HANDLE_DW_FORM(0x0d, sdata) -HANDLE_DW_FORM(0x0e, strp) -HANDLE_DW_FORM(0x0f, udata) -HANDLE_DW_FORM(0x10, ref_addr) -HANDLE_DW_FORM(0x11, ref1) -HANDLE_DW_FORM(0x12, ref2) -HANDLE_DW_FORM(0x13, ref4) -HANDLE_DW_FORM(0x14, ref8) -HANDLE_DW_FORM(0x15, ref_udata) -HANDLE_DW_FORM(0x16, indirect) +HANDLE_DW_FORM(0x01, addr, 2, DWARF) +HANDLE_DW_FORM(0x03, block2, 2, DWARF) +HANDLE_DW_FORM(0x04, block4, 2, DWARF) +HANDLE_DW_FORM(0x05, data2, 2, DWARF) +HANDLE_DW_FORM(0x06, data4, 2, DWARF) +HANDLE_DW_FORM(0x07, data8, 2, DWARF) +HANDLE_DW_FORM(0x08, string, 2, DWARF) +HANDLE_DW_FORM(0x09, block, 2, DWARF) +HANDLE_DW_FORM(0x0a, block1, 2, DWARF) +HANDLE_DW_FORM(0x0b, data1, 2, DWARF) +HANDLE_DW_FORM(0x0c, flag, 2, DWARF) +HANDLE_DW_FORM(0x0d, sdata, 2, DWARF) +HANDLE_DW_FORM(0x0e, strp, 2, DWARF) +HANDLE_DW_FORM(0x0f, udata, 2, DWARF) +HANDLE_DW_FORM(0x10, ref_addr, 2, DWARF) +HANDLE_DW_FORM(0x11, ref1, 2, DWARF) +HANDLE_DW_FORM(0x12, ref2, 2, DWARF) +HANDLE_DW_FORM(0x13, ref4, 2, DWARF) +HANDLE_DW_FORM(0x14, ref8, 2, DWARF) +HANDLE_DW_FORM(0x15, ref_udata, 2, DWARF) +HANDLE_DW_FORM(0x16, indirect, 2, DWARF) // New in DWARF v4: -HANDLE_DW_FORM(0x17, sec_offset) -HANDLE_DW_FORM(0x18, exprloc) -HANDLE_DW_FORM(0x19, flag_present) +HANDLE_DW_FORM(0x17, sec_offset, 4, DWARF) +HANDLE_DW_FORM(0x18, exprloc, 4, DWARF) +HANDLE_DW_FORM(0x19, flag_present, 4, DWARF) // This was defined out of sequence. -HANDLE_DW_FORM(0x20, ref_sig8) +HANDLE_DW_FORM(0x20, ref_sig8, 4, DWARF) // New in DWARF v5: -HANDLE_DW_FORM(0x1a, strx) -HANDLE_DW_FORM(0x1b, addrx) -HANDLE_DW_FORM(0x1c, ref_sup4) -HANDLE_DW_FORM(0x1d, strp_sup) -HANDLE_DW_FORM(0x1e, data16) -HANDLE_DW_FORM(0x1f, line_strp) -HANDLE_DW_FORM(0x21, implicit_const) -HANDLE_DW_FORM(0x22, loclistx) -HANDLE_DW_FORM(0x23, rnglistx) -HANDLE_DW_FORM(0x24, ref_sup8) -HANDLE_DW_FORM(0x25, strx1) -HANDLE_DW_FORM(0x26, strx2) -HANDLE_DW_FORM(0x27, strx3) -HANDLE_DW_FORM(0x28, strx4) -HANDLE_DW_FORM(0x29, addrx1) -HANDLE_DW_FORM(0x2a, addrx2) -HANDLE_DW_FORM(0x2b, addrx3) -HANDLE_DW_FORM(0x2c, addrx4) +HANDLE_DW_FORM(0x1a, strx, 5, DWARF) +HANDLE_DW_FORM(0x1b, addrx, 5, DWARF) +HANDLE_DW_FORM(0x1c, ref_sup4, 5, DWARF) +HANDLE_DW_FORM(0x1d, strp_sup, 5, DWARF) +HANDLE_DW_FORM(0x1e, data16, 5, DWARF) +HANDLE_DW_FORM(0x1f, line_strp, 5, DWARF) +HANDLE_DW_FORM(0x21, implicit_const, 5, DWARF) +HANDLE_DW_FORM(0x22, loclistx, 5, DWARF) +HANDLE_DW_FORM(0x23, rnglistx, 5, DWARF) +HANDLE_DW_FORM(0x24, ref_sup8, 5, DWARF) +HANDLE_DW_FORM(0x25, strx1, 5, DWARF) +HANDLE_DW_FORM(0x26, strx2, 5, DWARF) +HANDLE_DW_FORM(0x27, strx3, 5, DWARF) +HANDLE_DW_FORM(0x28, strx4, 5, DWARF) +HANDLE_DW_FORM(0x29, addrx1, 5, DWARF) +HANDLE_DW_FORM(0x2a, addrx2, 5, DWARF) +HANDLE_DW_FORM(0x2b, addrx3, 5, DWARF) +HANDLE_DW_FORM(0x2c, addrx4, 5, DWARF) // Extensions for Fission proposal -HANDLE_DW_FORM(0x1f01, GNU_addr_index) -HANDLE_DW_FORM(0x1f02, GNU_str_index) +HANDLE_DW_FORM(0x1f01, GNU_addr_index, 0, GNU) +HANDLE_DW_FORM(0x1f02, GNU_str_index, 0, GNU) // Alternate debug sections proposal (output of "dwz" tool). -HANDLE_DW_FORM(0x1f20, GNU_ref_alt) -HANDLE_DW_FORM(0x1f21, GNU_strp_alt) +HANDLE_DW_FORM(0x1f20, GNU_ref_alt, 0, GNU) +HANDLE_DW_FORM(0x1f21, GNU_strp_alt, 0, GNU) // DWARF Expression operators. -HANDLE_DW_OP(0x03, addr) -HANDLE_DW_OP(0x06, deref) -HANDLE_DW_OP(0x08, const1u) -HANDLE_DW_OP(0x09, const1s) -HANDLE_DW_OP(0x0a, const2u) -HANDLE_DW_OP(0x0b, const2s) -HANDLE_DW_OP(0x0c, const4u) -HANDLE_DW_OP(0x0d, const4s) -HANDLE_DW_OP(0x0e, const8u) -HANDLE_DW_OP(0x0f, const8s) -HANDLE_DW_OP(0x10, constu) -HANDLE_DW_OP(0x11, consts) -HANDLE_DW_OP(0x12, dup) -HANDLE_DW_OP(0x13, drop) -HANDLE_DW_OP(0x14, over) -HANDLE_DW_OP(0x15, pick) -HANDLE_DW_OP(0x16, swap) -HANDLE_DW_OP(0x17, rot) -HANDLE_DW_OP(0x18, xderef) -HANDLE_DW_OP(0x19, abs) -HANDLE_DW_OP(0x1a, and) -HANDLE_DW_OP(0x1b, div) -HANDLE_DW_OP(0x1c, minus) -HANDLE_DW_OP(0x1d, mod) -HANDLE_DW_OP(0x1e, mul) -HANDLE_DW_OP(0x1f, neg) -HANDLE_DW_OP(0x20, not) -HANDLE_DW_OP(0x21, or) -HANDLE_DW_OP(0x22, plus) -HANDLE_DW_OP(0x23, plus_uconst) -HANDLE_DW_OP(0x24, shl) -HANDLE_DW_OP(0x25, shr) -HANDLE_DW_OP(0x26, shra) -HANDLE_DW_OP(0x27, xor) -HANDLE_DW_OP(0x28, bra) -HANDLE_DW_OP(0x29, eq) -HANDLE_DW_OP(0x2a, ge) -HANDLE_DW_OP(0x2b, gt) -HANDLE_DW_OP(0x2c, le) -HANDLE_DW_OP(0x2d, lt) -HANDLE_DW_OP(0x2e, ne) -HANDLE_DW_OP(0x2f, skip) -HANDLE_DW_OP(0x30, lit0) -HANDLE_DW_OP(0x31, lit1) -HANDLE_DW_OP(0x32, lit2) -HANDLE_DW_OP(0x33, lit3) -HANDLE_DW_OP(0x34, lit4) -HANDLE_DW_OP(0x35, lit5) -HANDLE_DW_OP(0x36, lit6) -HANDLE_DW_OP(0x37, lit7) -HANDLE_DW_OP(0x38, lit8) -HANDLE_DW_OP(0x39, lit9) -HANDLE_DW_OP(0x3a, lit10) -HANDLE_DW_OP(0x3b, lit11) -HANDLE_DW_OP(0x3c, lit12) -HANDLE_DW_OP(0x3d, lit13) -HANDLE_DW_OP(0x3e, lit14) -HANDLE_DW_OP(0x3f, lit15) -HANDLE_DW_OP(0x40, lit16) -HANDLE_DW_OP(0x41, lit17) -HANDLE_DW_OP(0x42, lit18) -HANDLE_DW_OP(0x43, lit19) -HANDLE_DW_OP(0x44, lit20) -HANDLE_DW_OP(0x45, lit21) -HANDLE_DW_OP(0x46, lit22) -HANDLE_DW_OP(0x47, lit23) -HANDLE_DW_OP(0x48, lit24) -HANDLE_DW_OP(0x49, lit25) -HANDLE_DW_OP(0x4a, lit26) -HANDLE_DW_OP(0x4b, lit27) -HANDLE_DW_OP(0x4c, lit28) -HANDLE_DW_OP(0x4d, lit29) -HANDLE_DW_OP(0x4e, lit30) -HANDLE_DW_OP(0x4f, lit31) -HANDLE_DW_OP(0x50, reg0) -HANDLE_DW_OP(0x51, reg1) -HANDLE_DW_OP(0x52, reg2) -HANDLE_DW_OP(0x53, reg3) -HANDLE_DW_OP(0x54, reg4) -HANDLE_DW_OP(0x55, reg5) -HANDLE_DW_OP(0x56, reg6) -HANDLE_DW_OP(0x57, reg7) -HANDLE_DW_OP(0x58, reg8) -HANDLE_DW_OP(0x59, reg9) -HANDLE_DW_OP(0x5a, reg10) -HANDLE_DW_OP(0x5b, reg11) -HANDLE_DW_OP(0x5c, reg12) -HANDLE_DW_OP(0x5d, reg13) -HANDLE_DW_OP(0x5e, reg14) -HANDLE_DW_OP(0x5f, reg15) -HANDLE_DW_OP(0x60, reg16) -HANDLE_DW_OP(0x61, reg17) -HANDLE_DW_OP(0x62, reg18) -HANDLE_DW_OP(0x63, reg19) -HANDLE_DW_OP(0x64, reg20) -HANDLE_DW_OP(0x65, reg21) -HANDLE_DW_OP(0x66, reg22) -HANDLE_DW_OP(0x67, reg23) -HANDLE_DW_OP(0x68, reg24) -HANDLE_DW_OP(0x69, reg25) -HANDLE_DW_OP(0x6a, reg26) -HANDLE_DW_OP(0x6b, reg27) -HANDLE_DW_OP(0x6c, reg28) -HANDLE_DW_OP(0x6d, reg29) -HANDLE_DW_OP(0x6e, reg30) -HANDLE_DW_OP(0x6f, reg31) -HANDLE_DW_OP(0x70, breg0) -HANDLE_DW_OP(0x71, breg1) -HANDLE_DW_OP(0x72, breg2) -HANDLE_DW_OP(0x73, breg3) -HANDLE_DW_OP(0x74, breg4) -HANDLE_DW_OP(0x75, breg5) -HANDLE_DW_OP(0x76, breg6) -HANDLE_DW_OP(0x77, breg7) -HANDLE_DW_OP(0x78, breg8) -HANDLE_DW_OP(0x79, breg9) -HANDLE_DW_OP(0x7a, breg10) -HANDLE_DW_OP(0x7b, breg11) -HANDLE_DW_OP(0x7c, breg12) -HANDLE_DW_OP(0x7d, breg13) -HANDLE_DW_OP(0x7e, breg14) -HANDLE_DW_OP(0x7f, breg15) -HANDLE_DW_OP(0x80, breg16) -HANDLE_DW_OP(0x81, breg17) -HANDLE_DW_OP(0x82, breg18) -HANDLE_DW_OP(0x83, breg19) -HANDLE_DW_OP(0x84, breg20) -HANDLE_DW_OP(0x85, breg21) -HANDLE_DW_OP(0x86, breg22) -HANDLE_DW_OP(0x87, breg23) -HANDLE_DW_OP(0x88, breg24) -HANDLE_DW_OP(0x89, breg25) -HANDLE_DW_OP(0x8a, breg26) -HANDLE_DW_OP(0x8b, breg27) -HANDLE_DW_OP(0x8c, breg28) -HANDLE_DW_OP(0x8d, breg29) -HANDLE_DW_OP(0x8e, breg30) -HANDLE_DW_OP(0x8f, breg31) -HANDLE_DW_OP(0x90, regx) -HANDLE_DW_OP(0x91, fbreg) -HANDLE_DW_OP(0x92, bregx) -HANDLE_DW_OP(0x93, piece) -HANDLE_DW_OP(0x94, deref_size) -HANDLE_DW_OP(0x95, xderef_size) -HANDLE_DW_OP(0x96, nop) +HANDLE_DW_OP(0x03, addr, 2, DWARF) +HANDLE_DW_OP(0x06, deref, 2, DWARF) +HANDLE_DW_OP(0x08, const1u, 2, DWARF) +HANDLE_DW_OP(0x09, const1s, 2, DWARF) +HANDLE_DW_OP(0x0a, const2u, 2, DWARF) +HANDLE_DW_OP(0x0b, const2s, 2, DWARF) +HANDLE_DW_OP(0x0c, const4u, 2, DWARF) +HANDLE_DW_OP(0x0d, const4s, 2, DWARF) +HANDLE_DW_OP(0x0e, const8u, 2, DWARF) +HANDLE_DW_OP(0x0f, const8s, 2, DWARF) +HANDLE_DW_OP(0x10, constu, 2, DWARF) +HANDLE_DW_OP(0x11, consts, 2, DWARF) +HANDLE_DW_OP(0x12, dup, 2, DWARF) +HANDLE_DW_OP(0x13, drop, 2, DWARF) +HANDLE_DW_OP(0x14, over, 2, DWARF) +HANDLE_DW_OP(0x15, pick, 2, DWARF) +HANDLE_DW_OP(0x16, swap, 2, DWARF) +HANDLE_DW_OP(0x17, rot, 2, DWARF) +HANDLE_DW_OP(0x18, xderef, 2, DWARF) +HANDLE_DW_OP(0x19, abs, 2, DWARF) +HANDLE_DW_OP(0x1a, and, 2, DWARF) +HANDLE_DW_OP(0x1b, div, 2, DWARF) +HANDLE_DW_OP(0x1c, minus, 2, DWARF) +HANDLE_DW_OP(0x1d, mod, 2, DWARF) +HANDLE_DW_OP(0x1e, mul, 2, DWARF) +HANDLE_DW_OP(0x1f, neg, 2, DWARF) +HANDLE_DW_OP(0x20, not, 2, DWARF) +HANDLE_DW_OP(0x21, or, 2, DWARF) +HANDLE_DW_OP(0x22, plus, 2, DWARF) +HANDLE_DW_OP(0x23, plus_uconst, 2, DWARF) +HANDLE_DW_OP(0x24, shl, 2, DWARF) +HANDLE_DW_OP(0x25, shr, 2, DWARF) +HANDLE_DW_OP(0x26, shra, 2, DWARF) +HANDLE_DW_OP(0x27, xor, 2, DWARF) +HANDLE_DW_OP(0x28, bra, 2, DWARF) +HANDLE_DW_OP(0x29, eq, 2, DWARF) +HANDLE_DW_OP(0x2a, ge, 2, DWARF) +HANDLE_DW_OP(0x2b, gt, 2, DWARF) +HANDLE_DW_OP(0x2c, le, 2, DWARF) +HANDLE_DW_OP(0x2d, lt, 2, DWARF) +HANDLE_DW_OP(0x2e, ne, 2, DWARF) +HANDLE_DW_OP(0x2f, skip, 2, DWARF) +HANDLE_DW_OP(0x30, lit0, 2, DWARF) +HANDLE_DW_OP(0x31, lit1, 2, DWARF) +HANDLE_DW_OP(0x32, lit2, 2, DWARF) +HANDLE_DW_OP(0x33, lit3, 2, DWARF) +HANDLE_DW_OP(0x34, lit4, 2, DWARF) +HANDLE_DW_OP(0x35, lit5, 2, DWARF) +HANDLE_DW_OP(0x36, lit6, 2, DWARF) +HANDLE_DW_OP(0x37, lit7, 2, DWARF) +HANDLE_DW_OP(0x38, lit8, 2, DWARF) +HANDLE_DW_OP(0x39, lit9, 2, DWARF) +HANDLE_DW_OP(0x3a, lit10, 2, DWARF) +HANDLE_DW_OP(0x3b, lit11, 2, DWARF) +HANDLE_DW_OP(0x3c, lit12, 2, DWARF) +HANDLE_DW_OP(0x3d, lit13, 2, DWARF) +HANDLE_DW_OP(0x3e, lit14, 2, DWARF) +HANDLE_DW_OP(0x3f, lit15, 2, DWARF) +HANDLE_DW_OP(0x40, lit16, 2, DWARF) +HANDLE_DW_OP(0x41, lit17, 2, DWARF) +HANDLE_DW_OP(0x42, lit18, 2, DWARF) +HANDLE_DW_OP(0x43, lit19, 2, DWARF) +HANDLE_DW_OP(0x44, lit20, 2, DWARF) +HANDLE_DW_OP(0x45, lit21, 2, DWARF) +HANDLE_DW_OP(0x46, lit22, 2, DWARF) +HANDLE_DW_OP(0x47, lit23, 2, DWARF) +HANDLE_DW_OP(0x48, lit24, 2, DWARF) +HANDLE_DW_OP(0x49, lit25, 2, DWARF) +HANDLE_DW_OP(0x4a, lit26, 2, DWARF) +HANDLE_DW_OP(0x4b, lit27, 2, DWARF) +HANDLE_DW_OP(0x4c, lit28, 2, DWARF) +HANDLE_DW_OP(0x4d, lit29, 2, DWARF) +HANDLE_DW_OP(0x4e, lit30, 2, DWARF) +HANDLE_DW_OP(0x4f, lit31, 2, DWARF) +HANDLE_DW_OP(0x50, reg0, 2, DWARF) +HANDLE_DW_OP(0x51, reg1, 2, DWARF) +HANDLE_DW_OP(0x52, reg2, 2, DWARF) +HANDLE_DW_OP(0x53, reg3, 2, DWARF) +HANDLE_DW_OP(0x54, reg4, 2, DWARF) +HANDLE_DW_OP(0x55, reg5, 2, DWARF) +HANDLE_DW_OP(0x56, reg6, 2, DWARF) +HANDLE_DW_OP(0x57, reg7, 2, DWARF) +HANDLE_DW_OP(0x58, reg8, 2, DWARF) +HANDLE_DW_OP(0x59, reg9, 2, DWARF) +HANDLE_DW_OP(0x5a, reg10, 2, DWARF) +HANDLE_DW_OP(0x5b, reg11, 2, DWARF) +HANDLE_DW_OP(0x5c, reg12, 2, DWARF) +HANDLE_DW_OP(0x5d, reg13, 2, DWARF) +HANDLE_DW_OP(0x5e, reg14, 2, DWARF) +HANDLE_DW_OP(0x5f, reg15, 2, DWARF) +HANDLE_DW_OP(0x60, reg16, 2, DWARF) +HANDLE_DW_OP(0x61, reg17, 2, DWARF) +HANDLE_DW_OP(0x62, reg18, 2, DWARF) +HANDLE_DW_OP(0x63, reg19, 2, DWARF) +HANDLE_DW_OP(0x64, reg20, 2, DWARF) +HANDLE_DW_OP(0x65, reg21, 2, DWARF) +HANDLE_DW_OP(0x66, reg22, 2, DWARF) +HANDLE_DW_OP(0x67, reg23, 2, DWARF) +HANDLE_DW_OP(0x68, reg24, 2, DWARF) +HANDLE_DW_OP(0x69, reg25, 2, DWARF) +HANDLE_DW_OP(0x6a, reg26, 2, DWARF) +HANDLE_DW_OP(0x6b, reg27, 2, DWARF) +HANDLE_DW_OP(0x6c, reg28, 2, DWARF) +HANDLE_DW_OP(0x6d, reg29, 2, DWARF) +HANDLE_DW_OP(0x6e, reg30, 2, DWARF) +HANDLE_DW_OP(0x6f, reg31, 2, DWARF) +HANDLE_DW_OP(0x70, breg0, 2, DWARF) +HANDLE_DW_OP(0x71, breg1, 2, DWARF) +HANDLE_DW_OP(0x72, breg2, 2, DWARF) +HANDLE_DW_OP(0x73, breg3, 2, DWARF) +HANDLE_DW_OP(0x74, breg4, 2, DWARF) +HANDLE_DW_OP(0x75, breg5, 2, DWARF) +HANDLE_DW_OP(0x76, breg6, 2, DWARF) +HANDLE_DW_OP(0x77, breg7, 2, DWARF) +HANDLE_DW_OP(0x78, breg8, 2, DWARF) +HANDLE_DW_OP(0x79, breg9, 2, DWARF) +HANDLE_DW_OP(0x7a, breg10, 2, DWARF) +HANDLE_DW_OP(0x7b, breg11, 2, DWARF) +HANDLE_DW_OP(0x7c, breg12, 2, DWARF) +HANDLE_DW_OP(0x7d, breg13, 2, DWARF) +HANDLE_DW_OP(0x7e, breg14, 2, DWARF) +HANDLE_DW_OP(0x7f, breg15, 2, DWARF) +HANDLE_DW_OP(0x80, breg16, 2, DWARF) +HANDLE_DW_OP(0x81, breg17, 2, DWARF) +HANDLE_DW_OP(0x82, breg18, 2, DWARF) +HANDLE_DW_OP(0x83, breg19, 2, DWARF) +HANDLE_DW_OP(0x84, breg20, 2, DWARF) +HANDLE_DW_OP(0x85, breg21, 2, DWARF) +HANDLE_DW_OP(0x86, breg22, 2, DWARF) +HANDLE_DW_OP(0x87, breg23, 2, DWARF) +HANDLE_DW_OP(0x88, breg24, 2, DWARF) +HANDLE_DW_OP(0x89, breg25, 2, DWARF) +HANDLE_DW_OP(0x8a, breg26, 2, DWARF) +HANDLE_DW_OP(0x8b, breg27, 2, DWARF) +HANDLE_DW_OP(0x8c, breg28, 2, DWARF) +HANDLE_DW_OP(0x8d, breg29, 2, DWARF) +HANDLE_DW_OP(0x8e, breg30, 2, DWARF) +HANDLE_DW_OP(0x8f, breg31, 2, DWARF) +HANDLE_DW_OP(0x90, regx, 2, DWARF) +HANDLE_DW_OP(0x91, fbreg, 2, DWARF) +HANDLE_DW_OP(0x92, bregx, 2, DWARF) +HANDLE_DW_OP(0x93, piece, 2, DWARF) +HANDLE_DW_OP(0x94, deref_size, 2, DWARF) +HANDLE_DW_OP(0x95, xderef_size, 2, DWARF) +HANDLE_DW_OP(0x96, nop, 2, DWARF) // New in DWARF v3: -HANDLE_DW_OP(0x97, push_object_address) -HANDLE_DW_OP(0x98, call2) -HANDLE_DW_OP(0x99, call4) -HANDLE_DW_OP(0x9a, call_ref) -HANDLE_DW_OP(0x9b, form_tls_address) -HANDLE_DW_OP(0x9c, call_frame_cfa) -HANDLE_DW_OP(0x9d, bit_piece) +HANDLE_DW_OP(0x97, push_object_address, 3, DWARF) +HANDLE_DW_OP(0x98, call2, 3, DWARF) +HANDLE_DW_OP(0x99, call4, 3, DWARF) +HANDLE_DW_OP(0x9a, call_ref, 3, DWARF) +HANDLE_DW_OP(0x9b, form_tls_address, 3, DWARF) +HANDLE_DW_OP(0x9c, call_frame_cfa, 3, DWARF) +HANDLE_DW_OP(0x9d, bit_piece, 3, DWARF) // New in DWARF v4: -HANDLE_DW_OP(0x9e, implicit_value) -HANDLE_DW_OP(0x9f, stack_value) +HANDLE_DW_OP(0x9e, implicit_value, 4, DWARF) +HANDLE_DW_OP(0x9f, stack_value, 4, DWARF) // New in DWARF v5: -HANDLE_DW_OP(0xa0, implicit_pointer) -HANDLE_DW_OP(0xa1, addrx) -HANDLE_DW_OP(0xa2, constx) -HANDLE_DW_OP(0xa3, entry_value) -HANDLE_DW_OP(0xa4, const_type) -HANDLE_DW_OP(0xa5, regval_type) -HANDLE_DW_OP(0xa6, deref_type) -HANDLE_DW_OP(0xa7, xderef_type) -HANDLE_DW_OP(0xa8, convert) -HANDLE_DW_OP(0xa9, reinterpret) +HANDLE_DW_OP(0xa0, implicit_pointer, 5, DWARF) +HANDLE_DW_OP(0xa1, addrx, 5, DWARF) +HANDLE_DW_OP(0xa2, constx, 5, DWARF) +HANDLE_DW_OP(0xa3, entry_value, 5, DWARF) +HANDLE_DW_OP(0xa4, const_type, 5, DWARF) +HANDLE_DW_OP(0xa5, regval_type, 5, DWARF) +HANDLE_DW_OP(0xa6, deref_type, 5, DWARF) +HANDLE_DW_OP(0xa7, xderef_type, 5, DWARF) +HANDLE_DW_OP(0xa8, convert, 5, DWARF) +HANDLE_DW_OP(0xa9, reinterpret, 5, DWARF) // Vendor extensions: // Extensions for GNU-style thread-local storage. -HANDLE_DW_OP(0xe0, GNU_push_tls_address) +HANDLE_DW_OP(0xe0, GNU_push_tls_address, 0, GNU) // Extensions for Fission proposal. -HANDLE_DW_OP(0xfb, GNU_addr_index) -HANDLE_DW_OP(0xfc, GNU_const_index) +HANDLE_DW_OP(0xfb, GNU_addr_index, 0, GNU) +HANDLE_DW_OP(0xfc, GNU_const_index, 0, GNU) // DWARF languages. -HANDLE_DW_LANG(0x0001, C89) -HANDLE_DW_LANG(0x0002, C) -HANDLE_DW_LANG(0x0003, Ada83) -HANDLE_DW_LANG(0x0004, C_plus_plus) -HANDLE_DW_LANG(0x0005, Cobol74) -HANDLE_DW_LANG(0x0006, Cobol85) -HANDLE_DW_LANG(0x0007, Fortran77) -HANDLE_DW_LANG(0x0008, Fortran90) -HANDLE_DW_LANG(0x0009, Pascal83) -HANDLE_DW_LANG(0x000a, Modula2) +HANDLE_DW_LANG(0x0001, C89, 2, DWARF) +HANDLE_DW_LANG(0x0002, C, 2, DWARF) +HANDLE_DW_LANG(0x0003, Ada83, 2, DWARF) +HANDLE_DW_LANG(0x0004, C_plus_plus, 2, DWARF) +HANDLE_DW_LANG(0x0005, Cobol74, 2, DWARF) +HANDLE_DW_LANG(0x0006, Cobol85, 2, DWARF) +HANDLE_DW_LANG(0x0007, Fortran77, 2, DWARF) +HANDLE_DW_LANG(0x0008, Fortran90, 2, DWARF) +HANDLE_DW_LANG(0x0009, Pascal83, 2, DWARF) +HANDLE_DW_LANG(0x000a, Modula2, 2, DWARF) // New in DWARF v3: -HANDLE_DW_LANG(0x000b, Java) -HANDLE_DW_LANG(0x000c, C99) -HANDLE_DW_LANG(0x000d, Ada95) -HANDLE_DW_LANG(0x000e, Fortran95) -HANDLE_DW_LANG(0x000f, PLI) -HANDLE_DW_LANG(0x0010, ObjC) -HANDLE_DW_LANG(0x0011, ObjC_plus_plus) -HANDLE_DW_LANG(0x0012, UPC) -HANDLE_DW_LANG(0x0013, D) +HANDLE_DW_LANG(0x000b, Java, 3, DWARF) +HANDLE_DW_LANG(0x000c, C99, 3, DWARF) +HANDLE_DW_LANG(0x000d, Ada95, 3, DWARF) +HANDLE_DW_LANG(0x000e, Fortran95, 3, DWARF) +HANDLE_DW_LANG(0x000f, PLI, 3, DWARF) +HANDLE_DW_LANG(0x0010, ObjC, 3, DWARF) +HANDLE_DW_LANG(0x0011, ObjC_plus_plus, 3, DWARF) +HANDLE_DW_LANG(0x0012, UPC, 3, DWARF) +HANDLE_DW_LANG(0x0013, D, 3, DWARF) // New in DWARF v4: -HANDLE_DW_LANG(0x0014, Python) +HANDLE_DW_LANG(0x0014, Python, 4, DWARF) // New in DWARF v5: -HANDLE_DW_LANG(0x0015, OpenCL) -HANDLE_DW_LANG(0x0016, Go) -HANDLE_DW_LANG(0x0017, Modula3) -HANDLE_DW_LANG(0x0018, Haskell) -HANDLE_DW_LANG(0x0019, C_plus_plus_03) -HANDLE_DW_LANG(0x001a, C_plus_plus_11) -HANDLE_DW_LANG(0x001b, OCaml) -HANDLE_DW_LANG(0x001c, Rust) -HANDLE_DW_LANG(0x001d, C11) -HANDLE_DW_LANG(0x001e, Swift) -HANDLE_DW_LANG(0x001f, Julia) -HANDLE_DW_LANG(0x0020, Dylan) -HANDLE_DW_LANG(0x0021, C_plus_plus_14) -HANDLE_DW_LANG(0x0022, Fortran03) -HANDLE_DW_LANG(0x0023, Fortran08) -HANDLE_DW_LANG(0x0024, RenderScript) -HANDLE_DW_LANG(0x0025, BLISS) +HANDLE_DW_LANG(0x0015, OpenCL, 5, DWARF) +HANDLE_DW_LANG(0x0016, Go, 5, DWARF) +HANDLE_DW_LANG(0x0017, Modula3, 5, DWARF) +HANDLE_DW_LANG(0x0018, Haskell, 5, DWARF) +HANDLE_DW_LANG(0x0019, C_plus_plus_03, 5, DWARF) +HANDLE_DW_LANG(0x001a, C_plus_plus_11, 5, DWARF) +HANDLE_DW_LANG(0x001b, OCaml, 5, DWARF) +HANDLE_DW_LANG(0x001c, Rust, 5, DWARF) +HANDLE_DW_LANG(0x001d, C11, 5, DWARF) +HANDLE_DW_LANG(0x001e, Swift, 5, DWARF) +HANDLE_DW_LANG(0x001f, Julia, 5, DWARF) +HANDLE_DW_LANG(0x0020, Dylan, 5, DWARF) +HANDLE_DW_LANG(0x0021, C_plus_plus_14, 5, DWARF) +HANDLE_DW_LANG(0x0022, Fortran03, 5, DWARF) +HANDLE_DW_LANG(0x0023, Fortran08, 5, DWARF) +HANDLE_DW_LANG(0x0024, RenderScript, 5, DWARF) +HANDLE_DW_LANG(0x0025, BLISS, 5, DWARF) // Vendor extensions: -HANDLE_DW_LANG(0x8001, Mips_Assembler) -HANDLE_DW_LANG(0x8e57, GOOGLE_RenderScript) -HANDLE_DW_LANG(0xb000, BORLAND_Delphi) +HANDLE_DW_LANG(0x8001, Mips_Assembler, 0, MIPS) +HANDLE_DW_LANG(0x8e57, GOOGLE_RenderScript, 0, GOOGLE) +HANDLE_DW_LANG(0xb000, BORLAND_Delphi, 0, BORLAND) // DWARF attribute type encodings. -HANDLE_DW_ATE(0x01, address) -HANDLE_DW_ATE(0x02, boolean) -HANDLE_DW_ATE(0x03, complex_float) -HANDLE_DW_ATE(0x04, float) -HANDLE_DW_ATE(0x05, signed) -HANDLE_DW_ATE(0x06, signed_char) -HANDLE_DW_ATE(0x07, unsigned) -HANDLE_DW_ATE(0x08, unsigned_char) +HANDLE_DW_ATE(0x01, address, 2, DWARF) +HANDLE_DW_ATE(0x02, boolean, 2, DWARF) +HANDLE_DW_ATE(0x03, complex_float, 2, DWARF) +HANDLE_DW_ATE(0x04, float, 2, DWARF) +HANDLE_DW_ATE(0x05, signed, 2, DWARF) +HANDLE_DW_ATE(0x06, signed_char, 2, DWARF) +HANDLE_DW_ATE(0x07, unsigned, 2, DWARF) +HANDLE_DW_ATE(0x08, unsigned_char, 2, DWARF) // New in DWARF v3: -HANDLE_DW_ATE(0x09, imaginary_float) -HANDLE_DW_ATE(0x0a, packed_decimal) -HANDLE_DW_ATE(0x0b, numeric_string) -HANDLE_DW_ATE(0x0c, edited) -HANDLE_DW_ATE(0x0d, signed_fixed) -HANDLE_DW_ATE(0x0e, unsigned_fixed) -HANDLE_DW_ATE(0x0f, decimal_float) +HANDLE_DW_ATE(0x09, imaginary_float, 3, DWARF) +HANDLE_DW_ATE(0x0a, packed_decimal, 3, DWARF) +HANDLE_DW_ATE(0x0b, numeric_string, 3, DWARF) +HANDLE_DW_ATE(0x0c, edited, 3, DWARF) +HANDLE_DW_ATE(0x0d, signed_fixed, 3, DWARF) +HANDLE_DW_ATE(0x0e, unsigned_fixed, 3, DWARF) +HANDLE_DW_ATE(0x0f, decimal_float, 3, DWARF) // New in DWARF v4: -HANDLE_DW_ATE(0x10, UTF) +HANDLE_DW_ATE(0x10, UTF, 4, DWARF) // New in DWARF v5: -HANDLE_DW_ATE(0x11, UCS) -HANDLE_DW_ATE(0x12, ASCII) +HANDLE_DW_ATE(0x11, UCS, 5, DWARF) +HANDLE_DW_ATE(0x12, ASCII, 5, DWARF) // DWARF virtuality codes. HANDLE_DW_VIRTUALITY(0x00, none) Index: include/llvm/Support/Recycler.h =================================================================== --- include/llvm/Support/Recycler.h +++ include/llvm/Support/Recycler.h @@ -42,13 +42,16 @@ FreeNode *pop_val() { auto *Val = FreeList; + __asan_unpoison_memory_region(Val, Size); FreeList = FreeList->Next; + __msan_allocated_memory(Val, Size); return Val; } void push(FreeNode *N) { N->Next = FreeList; FreeList = N; + __asan_poison_memory_region(N, Size); } public: Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -230,6 +230,12 @@ return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); } + /// Return the type for frame index, which is determined by + /// the alloca address space specified through the data layout. + MVT getFrameIndexTy(const DataLayout &DL) const { + return getPointerTy(DL, DL.getAllocaAddrSpace()); + } + /// EVT is not used in-tree, but is used by out-of-tree target. /// A documentation for this function would be nice... virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const; @@ -2382,30 +2388,39 @@ New = N; return true; } - - /// Check to see if the specified operand of the specified instruction is a - /// constant integer. If so, check to see if there are any bits set in the - /// constant that are not demanded. If so, shrink the constant and return - /// true. - bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded); - - /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. This - /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be - /// generalized for targets with other types of implicit widening casts. - bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, - const SDLoc &dl); - - /// Helper for SimplifyDemandedBits that can simplify an operation with - /// multiple uses. This function uses TLI.SimplifyDemandedBits to - /// simplify Operand \p OpIdx of \p User and then updated \p User with - /// the simplified version. No other uses of \p OpIdx are updated. - /// If \p User is the only user of \p OpIdx, this function behaves exactly - /// like TLI.SimplifyDemandedBits except that it also updates the DAG by - /// calling DCI.CommitTargetLoweringOpt. - bool SimplifyDemandedBits(SDNode *User, unsigned OpIdx, - const APInt &Demanded, DAGCombinerInfo &DCI); }; + /// Check to see if the specified operand of the specified instruction is a + /// constant integer. If so, check to see if there are any bits set in the + /// constant that are not demanded. If so, shrink the constant and return + /// true. + bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const; + + // Target hook to do target-specific const optimization, which is called by + // ShrinkDemandedConstant. This function should return true if the target + // doesn't want ShrinkDemandedConstant to further optimize the constant. + virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const { + return false; + } + + /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. This + /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be + /// generalized for targets with other types of implicit widening casts. + bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, + TargetLoweringOpt &TLO) const; + + /// Helper for SimplifyDemandedBits that can simplify an operation with + /// multiple uses. This function simplifies operand \p OpIdx of \p User and + /// then updates \p User with the simplified version. No other uses of + /// \p OpIdx are updated. If \p User is the only user of \p OpIdx, this + /// function behaves exactly like function SimplifyDemandedBits declared + /// below except that it also updates the DAG by calling + /// DCI.CommitTargetLoweringOpt. + bool SimplifyDemandedBits(SDNode *User, unsigned OpIdx, const APInt &Demanded, + DAGCombinerInfo &DCI, TargetLoweringOpt &TLO) const; + /// Look at Op. At this point, we know that only the DemandedMask bits of the /// result of Op are ever used downstream. If we can use this information to /// simplify Op, create a new simplified DAG node and return true, returning Index: include/llvm/Transforms/Scalar/ConstantHoisting.h =================================================================== --- include/llvm/Transforms/Scalar/ConstantHoisting.h +++ include/llvm/Transforms/Scalar/ConstantHoisting.h @@ -36,6 +36,7 @@ #ifndef LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H #define LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/PassManager.h" @@ -98,7 +99,7 @@ // Glue for old PM. bool runImpl(Function &F, TargetTransformInfo &TTI, DominatorTree &DT, - BasicBlock &Entry); + BlockFrequencyInfo *BFI, BasicBlock &Entry); void releaseMemory() { ConstantVec.clear(); @@ -112,6 +113,7 @@ const TargetTransformInfo *TTI; DominatorTree *DT; + BlockFrequencyInfo *BFI; BasicBlock *Entry; /// Keeps track of constant candidates found in the function. @@ -124,8 +126,8 @@ SmallVector ConstantVec; Instruction *findMatInsertPt(Instruction *Inst, unsigned Idx = ~0U) const; - Instruction *findConstantInsertionPoint( - const consthoist::ConstantInfo &ConstInfo) const; + SmallPtrSet + findConstantInsertionPoint(const consthoist::ConstantInfo &ConstInfo) const; void collectConstantCandidates(ConstCandMapType &ConstCandMap, Instruction *Inst, unsigned Idx, ConstantInt *ConstInt); Index: include/llvm/Transforms/Utils/CodeExtractor.h =================================================================== --- include/llvm/Transforms/Utils/CodeExtractor.h +++ include/llvm/Transforms/Utils/CodeExtractor.h @@ -65,14 +65,6 @@ /// Blocks containing EHPads, allocas, invokes, or vastarts are not valid. static bool isBlockValidForExtraction(const BasicBlock &BB); - /// \brief Create a code extractor for a single basic block. - /// - /// In this formation, we don't require a dominator tree. The given basic - /// block is set up for extraction. - CodeExtractor(BasicBlock *BB, bool AggregateArgs = false, - BlockFrequencyInfo *BFI = nullptr, - BranchProbabilityInfo *BPI = nullptr); - /// \brief Create a code extractor for a sequence of blocks. /// /// Given a sequence of basic blocks where the first block in the sequence @@ -91,14 +83,6 @@ BlockFrequencyInfo *BFI = nullptr, BranchProbabilityInfo *BPI = nullptr); - /// \brief Create a code extractor for a region node. - /// - /// Behaves just like the generic code sequence constructor, but uses the - /// block sequence of the region node passed in. - CodeExtractor(DominatorTree &DT, const RegionNode &RN, - bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr, - BranchProbabilityInfo *BPI = nullptr); - /// \brief Perform the extraction, returning the new function. /// /// Returns zero when called on a CodeExtractor instance where isEligible Index: lib/Analysis/InstructionSimplify.cpp =================================================================== --- lib/Analysis/InstructionSimplify.cpp +++ lib/Analysis/InstructionSimplify.cpp @@ -568,11 +568,11 @@ match(Op1, m_Not(m_Specific(Op0)))) return Constant::getAllOnesValue(Ty); - // add nsw/nuw (xor Y, signbit), signbit --> Y + // add nsw/nuw (xor Y, signmask), signmask --> Y // The no-wrapping add guarantees that the top bit will be set by the add. // Therefore, the xor must be clearing the already set sign bit of Y. - if ((isNSW || isNUW) && match(Op1, m_SignBit()) && - match(Op0, m_Xor(m_Value(Y), m_SignBit()))) + if ((isNSW || isNUW) && match(Op1, m_SignMask()) && + match(Op0, m_Xor(m_Value(Y), m_SignMask()))) return Y; /// i1 add -> xor. @@ -2819,7 +2819,7 @@ return ConstantInt::getTrue(RHS->getContext()); } } - if (CIVal->isSignBit() && *CI2Val == 1) { + if (CIVal->isSignMask() && *CI2Val == 1) { if (Pred == ICmpInst::ICMP_UGT) return ConstantInt::getFalse(RHS->getContext()); if (Pred == ICmpInst::ICMP_ULE) Index: lib/Analysis/MemorySSAUpdater.cpp =================================================================== --- lib/Analysis/MemorySSAUpdater.cpp +++ lib/Analysis/MemorySSAUpdater.cpp @@ -29,7 +29,7 @@ #define DEBUG_TYPE "memoryssa" using namespace llvm; -namespace llvm { + // This is the marker algorithm from "Simple and Efficient Construction of // Static Single Assignment Form" // The simple, non-marker algorithm places phi nodes at any join @@ -211,8 +211,8 @@ } // Set every incoming edge {BB, MP->getBlock()} of MemoryPhi MP to NewDef. -void setMemoryPhiValueForBlock(MemoryPhi *MP, const BasicBlock *BB, - MemoryAccess *NewDef) { +static void setMemoryPhiValueForBlock(MemoryPhi *MP, const BasicBlock *BB, + MemoryAccess *NewDef) { // Replace any operand with us an incoming block with the new defining // access. int i = MP->getBasicBlockIndex(BB); @@ -415,6 +415,7 @@ } return MA; } + void MemorySSAUpdater::removeMemoryAccess(MemoryAccess *MA) { assert(!MSSA->isLiveOnEntryDef(MA) && "Trying to remove the live on entry def"); @@ -490,5 +491,3 @@ ++InsertPt->getIterator()); return NewAccess; } - -} // namespace llvm Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -4007,9 +4007,9 @@ case Instruction::Xor: if (auto *RHSC = dyn_cast(Op->getOperand(1))) - // If the RHS of the xor is a signbit, then this is just an add. - // Instcombine turns add of signbit into xor as a strength reduction step. - if (RHSC->getValue().isSignBit()) + // If the RHS of the xor is a signmask, then this is just an add. + // Instcombine turns add of signmask into xor as a strength reduction step. + if (RHSC->getValue().isSignMask()) return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1)); return BinaryOp(Op); @@ -5328,12 +5328,28 @@ break; case Instruction::Or: - // Use ValueTracking to check whether this is actually an add. - if (haveNoCommonBitsSet(BO->LHS, BO->RHS, getDataLayout(), &AC, - nullptr, &DT)) { - // There aren't any common bits set, so the add can't wrap. - auto Flags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNSW); - return getAddExpr(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags); + // If the RHS of the Or is a constant, we may have something like: + // X*4+1 which got turned into X*4|1. Handle this as an Add so loop + // optimizations will transparently handle this case. + // + // In order for this transformation to be safe, the LHS must be of the + // form X*(2^n) and the Or constant must be less than 2^n. + if (ConstantInt *CI = dyn_cast(BO->RHS)) { + const SCEV *LHS = getSCEV(BO->LHS); + const APInt &CIVal = CI->getValue(); + if (GetMinTrailingZeros(LHS) >= + (CIVal.getBitWidth() - CIVal.countLeadingZeros())) { + // Build a plain add SCEV. + const SCEV *S = getAddExpr(LHS, getSCEV(CI)); + // If the LHS of the add was an addrec and it has no-wrap flags, + // transfer the no-wrap flags, since an or won't introduce a wrap. + if (const SCEVAddRecExpr *NewAR = dyn_cast(S)) { + const SCEVAddRecExpr *OldAR = cast(LHS); + const_cast(NewAR)->setNoWrapFlags( + OldAR->getNoWrapFlags()); + } + return S; + } } break; @@ -5369,7 +5385,7 @@ // using an add, which is equivalent, and re-apply the zext. APInt Trunc = CI->getValue().trunc(Z0TySize); if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() && - Trunc.isSignBit()) + Trunc.isSignMask()) return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)), UTy); } Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -778,7 +778,7 @@ // so this isn't a real bug. On the other hand, the program may have undefined // behavior, or we might have a bug in the compiler. We can't assert/crash, so // clear out the known bits, try to warn the user, and hope for the best. - if ((KnownZero & KnownOne) != 0) { + if (KnownZero.intersects(KnownOne)) { KnownZero.clearAllBits(); KnownOne.clearAllBits(); @@ -860,7 +860,8 @@ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, Depth + 1, Q); - KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); + KnownZero.setAllBits(); + KnownOne.setAllBits(); for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) { // Combine the shifted known input bits only for those shift amounts // compatible with its known constraints. @@ -888,7 +889,7 @@ // return anything we'd like, but we need to make sure the sets of known bits // stay disjoint (it should be better for some other code to actually // propagate the undef than to pick a value here using known bits). - if ((KnownZero & KnownOne) != 0) { + if (KnownZero.intersects(KnownOne)) { KnownZero.clearAllBits(); KnownOne.clearAllBits(); } @@ -1640,9 +1641,9 @@ if (match(V, m_Shl(m_One(), m_Value()))) return true; - // (signbit) >>l X is clearly a power of two if the one is not shifted off the - // bottom. If it is shifted off the bottom then the result is undefined. - if (match(V, m_LShr(m_SignBit(), m_Value()))) + // (signmask) >>l X is clearly a power of two if the one is not shifted off + // the bottom. If it is shifted off the bottom then the result is undefined. + if (match(V, m_LShr(m_SignMask(), m_Value()))) return true; // The remaining tests are all recursive, so bail out if we hit the limit. Index: lib/CodeGen/AsmPrinter/DIE.cpp =================================================================== --- lib/CodeGen/AsmPrinter/DIE.cpp +++ lib/CodeGen/AsmPrinter/DIE.cpp @@ -31,6 +31,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "dwarfdebug" + //===----------------------------------------------------------------------===// // DIEAbbrevData Implementation //===----------------------------------------------------------------------===// @@ -79,15 +81,22 @@ dwarf::AttributeString(AttrData.getAttribute()).data()); // Emit form type. +#ifndef NDEBUG + // Could be an assertion, but this way we can see the failing form code + // easily, which helps track down where it came from. + if (!dwarf::isValidFormForVersion(AttrData.getForm(), + AP->getDwarfVersion())) { + DEBUG(dbgs() << "Invalid form " << format("0x%x", AttrData.getForm()) + << " for DWARF version " << AP->getDwarfVersion() << "\n"); + llvm_unreachable("Invalid form for specified DWARF version"); + } +#endif AP->EmitULEB128(AttrData.getForm(), dwarf::FormEncodingString(AttrData.getForm()).data()); // Emit value for DW_FORM_implicit_const. - if (AttrData.getForm() == dwarf::DW_FORM_implicit_const) { - assert(AP->getDwarfVersion() >= 5 && - "DW_FORM_implicit_const is supported starting from DWARFv5"); + if (AttrData.getForm() == dwarf::DW_FORM_implicit_const) AP->EmitSLEB128(AttrData.getValue()); - } } // Mark end of abbreviation. Index: lib/CodeGen/AsmPrinter/DwarfExpression.h =================================================================== --- lib/CodeGen/AsmPrinter/DwarfExpression.h +++ lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -72,6 +72,8 @@ } /// Determine whether there are any operations left in this expression. operator bool() const { return Start != End; } + DIExpression::expr_op_iterator begin() const { return Start; } + DIExpression::expr_op_iterator end() const { return End; } /// Retrieve the fragment information, if any. Optional getFragmentInfo() const { Index: lib/CodeGen/AsmPrinter/DwarfExpression.cpp =================================================================== --- lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -226,8 +226,15 @@ return true; } - // FIXME: // Don't emit locations that cannot be expressed without DW_OP_stack_value. + if (DwarfVersion < 4) + if (std::any_of(ExprCursor.begin(), ExprCursor.end(), + [](DIExpression::ExprOperand Op) -> bool { + return Op.getOp() == dwarf::DW_OP_stack_value; + })) { + DwarfRegs.clear(); + return false; + } assert(DwarfRegs.size() == 1); auto Reg = DwarfRegs[0]; Index: lib/CodeGen/GlobalISel/Legalizer.cpp =================================================================== --- lib/CodeGen/GlobalISel/Legalizer.cpp +++ lib/CodeGen/GlobalISel/Legalizer.cpp @@ -24,6 +24,8 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include + #define DEBUG_TYPE "legalizer" using namespace llvm; @@ -161,7 +163,7 @@ // convergence for performance reasons. bool Changed = false; MachineBasicBlock::iterator NextMI; - for (auto &MBB : MF) + for (auto &MBB : MF) { for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) { // Get the next Instruction before we try to legalize, because there's a // good chance MI will be deleted. @@ -171,18 +173,21 @@ // and are assumed to be legal. if (!isPreISelGenericOpcode(MI->getOpcode())) continue; + unsigned NumNewInsns = 0; SmallVector WorkList; - Helper.MIRBuilder.recordInsertions( - [&](MachineInstr *MI) { WorkList.push_back(MI); }); + Helper.MIRBuilder.recordInsertions([&](MachineInstr *MI) { + ++NumNewInsns; + WorkList.push_back(MI); + }); WorkList.push_back(&*MI); + bool Changed = false; LegalizerHelper::LegalizeResult Res; unsigned Idx = 0; do { Res = Helper.legalizeInstrStep(*WorkList[Idx]); // Error out if we couldn't legalize this instruction. We may want to - // fall - // back to DAG ISel instead in the future. + // fall back to DAG ISel instead in the future. if (Res == LegalizerHelper::UnableToLegalize) { Helper.MIRBuilder.stopRecordingInsertions(); if (Res == LegalizerHelper::UnableToLegalize) { @@ -194,10 +199,21 @@ } Changed |= Res == LegalizerHelper::Legalized; ++Idx; + +#ifndef NDEBUG + if (NumNewInsns) { + DEBUG(dbgs() << ".. .. Emitted " << NumNewInsns << " insns\n"); + for (auto I = WorkList.end() - NumNewInsns, E = WorkList.end(); + I != E; ++I) + DEBUG(dbgs() << ".. .. New MI: "; (*I)->print(dbgs())); + NumNewInsns = 0; + } +#endif } while (Idx < WorkList.size()); Helper.MIRBuilder.stopRecordingInsertions(); } + } MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); @@ -207,7 +223,11 @@ // good chance MI will be deleted. NextMI = std::next(MI); - Changed |= combineExtracts(*MI, MRI, TII); + // combineExtracts erases MI. + if (combineExtracts(*MI, MRI, TII)) { + Changed = true; + continue; + } Changed |= combineMerges(*MI, MRI, TII); } } Index: lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -24,7 +24,7 @@ #include -#define DEBUG_TYPE "legalize-mir" +#define DEBUG_TYPE "legalizer" using namespace llvm; @@ -35,24 +35,34 @@ LegalizerHelper::LegalizeResult LegalizerHelper::legalizeInstrStep(MachineInstr &MI) { + DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs())); + auto Action = LI.getAction(MI, MRI); switch (std::get<0>(Action)) { case LegalizerInfo::Legal: + DEBUG(dbgs() << ".. Already legal\n"); return AlreadyLegal; case LegalizerInfo::Libcall: + DEBUG(dbgs() << ".. Convert to libcall\n"); return libcall(MI); case LegalizerInfo::NarrowScalar: + DEBUG(dbgs() << ".. Narrow scalar\n"); return narrowScalar(MI, std::get<1>(Action), std::get<2>(Action)); case LegalizerInfo::WidenScalar: + DEBUG(dbgs() << ".. Widen scalar\n"); return widenScalar(MI, std::get<1>(Action), std::get<2>(Action)); case LegalizerInfo::Lower: + DEBUG(dbgs() << ".. Lower\n"); return lower(MI, std::get<1>(Action), std::get<2>(Action)); case LegalizerInfo::FewerElements: + DEBUG(dbgs() << ".. Reduce number of elements\n"); return fewerElementsVector(MI, std::get<1>(Action), std::get<2>(Action)); case LegalizerInfo::Custom: + DEBUG(dbgs() << ".. Custom legalization\n"); return LI.legalizeCustom(MI, MRI, MIRBuilder) ? Legalized : UnableToLegalize; default: + DEBUG(dbgs() << ".. Unable to legalize\n"); return UnableToLegalize; } } Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2146,7 +2146,7 @@ if (N->getFlags()->hasNoUnsignedWrap()) return N0; - if (DAG.MaskedValueIsZero(N1, ~APInt::getSignBit(BitWidth))) { + if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) { // N1 is either 0 or the minimum signed value. If the sub is NSW, then // N1 must be 0 because negating the minimum signed value is undefined. if (N->getFlags()->hasNoSignedWrap()) @@ -8298,11 +8298,11 @@ switch (N0.getOpcode()) { case ISD::AND: FPOpcode = ISD::FABS; - SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits()); + SignMask = ~APInt::getSignMask(SourceVT.getSizeInBits()); break; case ISD::XOR: FPOpcode = ISD::FNEG; - SignMask = APInt::getSignBit(SourceVT.getSizeInBits()); + SignMask = APInt::getSignMask(SourceVT.getSizeInBits()); break; // TODO: ISD::OR --> ISD::FNABS? default: @@ -8413,7 +8413,7 @@ if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { assert(VT.getSizeInBits() == 128); SDValue SignBit = DAG.getConstant( - APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64); + APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64); SDValue FlipBit; if (N0.getOpcode() == ISD::FNEG) { FlipBit = SignBit; @@ -8433,7 +8433,7 @@ AddToWorklist(FlipBits.getNode()); return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits); } - APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); + APInt SignBit = APInt::getSignMask(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) return DAG.getNode(ISD::XOR, DL, VT, NewConv, DAG.getConstant(SignBit, DL, VT)); @@ -8481,7 +8481,7 @@ } if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { - APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2); + APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2); SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0)); AddToWorklist(Cst.getNode()); SDValue X = DAG.getBitcast(VT, N0.getOperand(1)); @@ -8502,7 +8502,7 @@ AddToWorklist(FlipBits.getNode()); return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits); } - APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); + APInt SignBit = APInt::getSignMask(VT.getSizeInBits()); X = DAG.getNode(ISD::AND, SDLoc(X), VT, X, DAG.getConstant(SignBit, SDLoc(X), VT)); AddToWorklist(X.getNode()); @@ -10313,11 +10313,11 @@ if (N0.getValueType().isVector()) { // For a vector, get a mask such as 0x80... per scalar element // and splat it. - SignMask = APInt::getSignBit(N0.getScalarValueSizeInBits()); + SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits()); SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); } else { // For a scalar, just generate 0x80... - SignMask = APInt::getSignBit(IntVT.getSizeInBits()); + SignMask = APInt::getSignMask(IntVT.getSizeInBits()); } SDLoc DL0(N0); Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int, @@ -10418,11 +10418,11 @@ if (N0.getValueType().isVector()) { // For a vector, get a mask such as 0x7f... per scalar element // and splat it. - SignMask = ~APInt::getSignBit(N0.getScalarValueSizeInBits()); + SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits()); SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); } else { // For a scalar, just generate 0x7f... - SignMask = ~APInt::getSignBit(IntVT.getSizeInBits()); + SignMask = ~APInt::getSignMask(IntVT.getSizeInBits()); } SDLoc DL(N0); Int = DAG.getNode(ISD::AND, DL, IntVT, Int, @@ -16036,7 +16036,7 @@ /// Return true if base is a frame index, which is known not to alias with /// anything but itself. Provides base object and offset as results. -static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, +static bool findBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, const GlobalValue *&GV, const void *&CV) { // Assume it is a primitive operation. Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr; @@ -16089,53 +16089,56 @@ return false; // Gather base node and offset information. - SDValue Base1, Base2; - int64_t Offset1, Offset2; - const GlobalValue *GV1, *GV2; - const void *CV1, *CV2; - bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(), + SDValue Base0, Base1; + int64_t Offset0, Offset1; + const GlobalValue *GV0, *GV1; + const void *CV0, *CV1; + bool IsFrameIndex0 = findBaseOffset(Op0->getBasePtr(), + Base0, Offset0, GV0, CV0); + bool IsFrameIndex1 = findBaseOffset(Op1->getBasePtr(), Base1, Offset1, GV1, CV1); - bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(), - Base2, Offset2, GV2, CV2); - // If they have a same base address then check to see if they overlap. - if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) - return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || - (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); + // If they have the same base address, then check to see if they overlap. + unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3; + unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3; + if (Base0 == Base1 || (GV0 && (GV0 == GV1)) || (CV0 && (CV0 == CV1))) + return !((Offset0 + NumBytes0) <= Offset1 || + (Offset1 + NumBytes1) <= Offset0); // It is possible for different frame indices to alias each other, mostly // when tail call optimization reuses return address slots for arguments. // To catch this case, look up the actual index of frame indices to compute // the real alias relationship. - if (isFrameIndex1 && isFrameIndex2) { + if (IsFrameIndex0 && IsFrameIndex1) { MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + Offset0 += MFI.getObjectOffset(cast(Base0)->getIndex()); Offset1 += MFI.getObjectOffset(cast(Base1)->getIndex()); - Offset2 += MFI.getObjectOffset(cast(Base2)->getIndex()); - return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || - (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); + return !((Offset0 + NumBytes0) <= Offset1 || + (Offset1 + NumBytes1) <= Offset0); } // Otherwise, if we know what the bases are, and they aren't identical, then // we know they cannot alias. - if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) + if ((IsFrameIndex0 || CV0 || GV0) && (IsFrameIndex1 || CV1 || GV1)) return false; // If we know required SrcValue1 and SrcValue2 have relatively large alignment // compared to the size and offset of the access, we may be able to prove they - // do not alias. This check is conservative for now to catch cases created by + // do not alias. This check is conservative for now to catch cases created by // splitting vector types. - if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) && - (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) && - (Op0->getMemoryVT().getSizeInBits() >> 3 == - Op1->getMemoryVT().getSizeInBits() >> 3) && - (Op0->getOriginalAlignment() > (Op0->getMemoryVT().getSizeInBits() >> 3))) { - int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment(); - int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment(); + int64_t SrcValOffset0 = Op0->getSrcValueOffset(); + int64_t SrcValOffset1 = Op1->getSrcValueOffset(); + unsigned OrigAlignment0 = Op0->getOriginalAlignment(); + unsigned OrigAlignment1 = Op1->getOriginalAlignment(); + if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && + NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) { + int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0; + int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1; // There is no overlap between these relatively aligned accesses of similar - // size, return no alias. - if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 || - (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1) + // size. Return no alias. + if ((OffAlign0 + NumBytes0) <= OffAlign1 || + (OffAlign1 + NumBytes1) <= OffAlign0) return false; } @@ -16147,19 +16150,17 @@ CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) UseAA = false; #endif + if (UseAA && Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) { // Use alias analysis information. - int64_t MinOffset = std::min(Op0->getSrcValueOffset(), - Op1->getSrcValueOffset()); - int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) + - Op0->getSrcValueOffset() - MinOffset; - int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) + - Op1->getSrcValueOffset() - MinOffset; + int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); + int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset; + int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset; AliasResult AAResult = - AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1, + AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0, UseTBAA ? Op0->getAAInfo() : AAMDNodes()), - MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2, + MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1, UseTBAA ? Op1->getAAInfo() : AAMDNodes())); if (AAResult == NoAlias) return false; Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1343,7 +1343,7 @@ // Convert to an integer of the same size. if (TLI.isTypeLegal(IVT)) { State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value); - State.SignMask = APInt::getSignBit(NumBits); + State.SignMask = APInt::getSignMask(NumBits); State.SignBit = NumBits - 1; return; } @@ -2984,7 +2984,7 @@ EVT NVT = Node->getValueType(0); APFloat apf(DAG.EVTToAPFloatSemantics(VT), APInt::getNullValue(VT.getSizeInBits())); - APInt x = APInt::getSignBit(NVT.getSizeInBits()); + APInt x = APInt::getSignMask(NVT.getSizeInBits()); (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); Tmp1 = DAG.getConstantFP(apf, dl, VT); Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT), Index: lib/CodeGen/SelectionDAG/LegalizeTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -925,9 +925,9 @@ assert(Op.getValueType().isVector() && "Only applies to vectors!"); unsigned EltWidth = Op.getScalarValueSizeInBits(); EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); - unsigned NumElts = Op.getValueType().getVectorNumElements(); + auto EltCnt = Op.getValueType().getVectorElementCount(); return DAG.getNode(ISD::BITCAST, SDLoc(Op), - EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op); + EVT::getVectorVT(*DAG.getContext(), EltNVT, EltCnt), Op); } SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -639,12 +639,15 @@ // If we have operands, deallocate them. removeOperands(N); + NodeAllocator.Deallocate(AllNodes.remove(N)); + // Set the opcode to DELETED_NODE to help catch bugs when node // memory is reallocated. + // FIXME: There are places in SDag that have grown a dependency on the opcode + // value in the released node. + __asan_unpoison_memory_region(&N->NodeType, sizeof(N->NodeType)); N->NodeType = ISD::DELETED_NODE; - NodeAllocator.Deallocate(AllNodes.remove(N)); - // If any of the SDDbgValue nodes refer to this SDNode, invalidate // them and forget about that node. DbgInfo->erase(N); @@ -1826,7 +1829,7 @@ std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign); int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false); - return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout())); + return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); } SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { @@ -1839,7 +1842,7 @@ MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); int FrameIdx = MFI.CreateStackObject(Bytes, Align, false); - return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout())); + return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); } SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, @@ -1955,7 +1958,7 @@ /// use this predicate to simplify operations downstream. bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { unsigned BitWidth = Op.getScalarValueSizeInBits(); - return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth); + return MaskedValueIsZero(Op, APInt::getSignMask(BitWidth), Depth); } /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use @@ -2344,11 +2347,11 @@ KnownOne.lshrInPlace(*ShAmt); // If we know the value of the sign bit, then we know it is copied across // the high bits by the shift amount. - APInt SignBit = APInt::getSignBit(BitWidth); - SignBit.lshrInPlace(*ShAmt); // Adjust to where it is now in the mask. - if (KnownZero.intersects(SignBit)) { + APInt SignMask = APInt::getSignMask(BitWidth); + SignMask.lshrInPlace(*ShAmt); // Adjust to where it is now in the mask. + if (KnownZero.intersects(SignMask)) { KnownZero.setHighBits(ShAmt->getZExtValue());// New bits are known zero. - } else if (KnownOne.intersects(SignBit)) { + } else if (KnownOne.intersects(SignMask)) { KnownOne.setHighBits(ShAmt->getZExtValue()); // New bits are known one. } } @@ -2361,14 +2364,14 @@ // present in the input. APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits); - APInt InSignBit = APInt::getSignBit(EBits); + APInt InSignMask = APInt::getSignMask(EBits); APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits); // If the sign extended bits are demanded, we know that the sign // bit is demanded. - InSignBit = InSignBit.zext(BitWidth); + InSignMask = InSignMask.zext(BitWidth); if (NewBits.getBoolValue()) - InputDemandedBits |= InSignBit; + InputDemandedBits |= InSignMask; computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, Depth + 1); @@ -2377,10 +2380,10 @@ // If the sign bit of the input is known set or clear, then we know the // top bits of the result. - if (KnownZero.intersects(InSignBit)) { // Input sign bit known clear + if (KnownZero.intersects(InSignMask)) { // Input sign bit known clear KnownZero |= NewBits; KnownOne &= ~NewBits; - } else if (KnownOne.intersects(InSignBit)) { // Input sign bit known set + } else if (KnownOne.intersects(InSignMask)) { // Input sign bit known set KnownOne |= NewBits; KnownZero &= ~NewBits; } else { // Input sign bit unknown @@ -2745,7 +2748,7 @@ // a set bit that isn't the sign bit (otherwise it could be INT_MIN). KnownOne2.clearBit(BitWidth - 1); if (KnownOne2.getBoolValue()) { - KnownZero = APInt::getSignBit(BitWidth); + KnownZero = APInt::getSignMask(BitWidth); break; } break; @@ -2874,7 +2877,7 @@ // one bit set. if (Val.getOpcode() == ISD::SRL) { auto *C = dyn_cast(Val.getOperand(0)); - if (C && C->getAPIntValue().isSignBit()) + if (C && C->getAPIntValue().isSignMask()) return true; } Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1151,7 +1151,7 @@ FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) return DAG.getFrameIndex(SI->second, - TLI.getPointerTy(DAG.getDataLayout())); + TLI.getFrameIndexTy(DAG.getDataLayout())); } // If this is an instruction which fast-isel has deferred, select it now. @@ -5617,7 +5617,7 @@ SDValue Ops[2]; Ops[0] = getRoot(); Ops[1] = - DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true); + DAG.getFrameIndex(FI, TLI.getFrameIndexTy(DAG.getDataLayout()), true); unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); @@ -6630,7 +6630,7 @@ unsigned Align = DL.getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false); - SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy(DL)); + SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL)); Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot, MachinePointerInfo::getFixedStack(MF, SSFI)); OpInfo.CallOperand = StackSlot; @@ -7393,7 +7393,7 @@ } else if (FrameIndexSDNode *FI = dyn_cast(OpVal)) { const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); Ops.push_back(Builder.DAG.getTargetFrameIndex( - FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout()))); + FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout()))); } else Ops.push_back(OpVal); } @@ -7661,7 +7661,7 @@ DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false); Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); - DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL)); + DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getFrameIndexTy(DL)); ArgListEntry Entry; Entry.Node = DemoteStackSlot; Entry.Ty = StackSlotPtrType; Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -342,11 +342,16 @@ /// If the specified instruction has a constant integer operand and there are /// bits set in that constant that are not demanded, then clear those bits and /// return true. -bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant( - SDValue Op, const APInt &Demanded) { +bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const { + SelectionDAG &DAG = TLO.DAG; SDLoc DL(Op); unsigned Opcode = Op.getOpcode(); + // Do target-specific constant optimization. + if (targetShrinkDemandedConstant(Op, Demanded, TLO)) + return TLO.New.getNode(); + // FIXME: ISD::SELECT, ISD::SELECT_CC switch (Opcode) { default: @@ -367,7 +372,7 @@ EVT VT = Op.getValueType(); SDValue NewC = DAG.getConstant(Demanded & C, DL, VT); SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC); - return CombineTo(Op, NewOp); + return TLO.CombineTo(Op, NewOp); } break; @@ -380,15 +385,17 @@ /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be /// generalized for targets with other types of implicit widening casts. -bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, - unsigned BitWidth, - const APInt &Demanded, - const SDLoc &dl) { +bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, + const APInt &Demanded, + TargetLoweringOpt &TLO) const { assert(Op.getNumOperands() == 2 && "ShrinkDemandedOp only supports binary operators!"); assert(Op.getNode()->getNumValues() == 1 && "ShrinkDemandedOp only supports nodes with one result!"); + SelectionDAG &DAG = TLO.DAG; + SDLoc dl(Op); + // Early return, as this function cannot handle vector types. if (Op.getValueType().isVector()) return false; @@ -418,23 +425,22 @@ bool NeedZext = DemandedSize > SmallVTBits; SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, dl, Op.getValueType(), X); - return CombineTo(Op, Z); + return TLO.CombineTo(Op, Z); } } return false; } bool -TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User, - unsigned OpIdx, - const APInt &Demanded, - DAGCombinerInfo &DCI) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); +TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx, + const APInt &Demanded, + DAGCombinerInfo &DCI, + TargetLoweringOpt &TLO) const { SDValue Op = User->getOperand(OpIdx); APInt KnownZero, KnownOne; - if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, - *this, 0, true)) + if (!SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, + TLO, 0, true)) return false; @@ -446,9 +452,9 @@ // with the value 'x', which will give us: // Old = i32 and x, 0xffffff // New = x - if (Old.hasOneUse()) { + if (TLO.Old.hasOneUse()) { // For the one use case, we just commit the change. - DCI.CommitTargetLoweringOpt(*this); + DCI.CommitTargetLoweringOpt(TLO); return true; } @@ -456,17 +462,17 @@ // AssumeSingleUse flag is not propogated to recursive calls of // SimplifyDemanded bits, so the only node with multiple use that // it will attempt to combine will be opt. - assert(Old == Op); + assert(TLO.Old == Op); SmallVector NewOps; for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { if (i == OpIdx) { - NewOps.push_back(New); + NewOps.push_back(TLO.New); continue; } NewOps.push_back(User->getOperand(i)); } - DAG.UpdateNodeOperands(User, NewOps); + TLO.DAG.UpdateNodeOperands(User, NewOps); // Op has less users now, so we may be able to perform additional combines // with it. DCI.AddToWorklist(Op.getNode()); @@ -585,7 +591,7 @@ // If any of the set bits in the RHS are known zero on the LHS, shrink // the constant. - if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask)) + if (ShrinkDemandedConstant(Op, ~LHSZero & NewMask, TLO)) return true; // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its @@ -620,10 +626,10 @@ if ((NewMask & (KnownZero|KnownZero2)) == NewMask) return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType())); // If the RHS is a constant, see if we can simplify it. - if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) + if (ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask, TLO)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) return true; // Output known-1 bits are only known if set in both the LHS & RHS. @@ -654,10 +660,10 @@ if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) return TLO.CombineTo(Op, Op.getOperand(1)); // If the RHS is a constant, see if we can simplify it. - if (TLO.ShrinkDemandedConstant(Op, NewMask)) + if (ShrinkDemandedConstant(Op, NewMask, TLO)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) return true; // Output known-0 bits are only known if clear in both the LHS & RHS. @@ -682,7 +688,7 @@ if ((KnownZero2 & NewMask) == NewMask) return TLO.CombineTo(Op, Op.getOperand(1)); // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) return true; // If all of the unknown bits are known to be zero on one side or the other @@ -727,7 +733,7 @@ } // If it already has all the bits set, nothing to change // but don't shrink either! - } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) { + } else if (ShrinkDemandedConstant(Op, NewMask, TLO)) { return true; } } @@ -746,7 +752,7 @@ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. - if (TLO.ShrinkDemandedConstant(Op, NewMask)) + if (ShrinkDemandedConstant(Op, NewMask, TLO)) return true; // Only known if known in both the LHS and RHS. @@ -764,7 +770,7 @@ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. - if (TLO.ShrinkDemandedConstant(Op, NewMask)) + if (ShrinkDemandedConstant(Op, NewMask, TLO)) return true; // Only known if known in both the LHS and RHS. @@ -778,7 +784,7 @@ // If (1) we only need the sign-bit, (2) the setcc operands are the same // width as the setcc result, and (3) the result of a setcc conforms to 0 or // -1, we may be able to bypass the setcc. - if (NewMask.isSignBit() && Op0.getScalarValueSizeInBits() == BitWidth && + if (NewMask.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth && getBooleanContents(Op.getValueType()) == BooleanContent::ZeroOrNegativeOneBooleanContent) { // If we're testing X < 0, then this compare isn't needed - just use X! @@ -964,7 +970,7 @@ // demand the input sign bit. APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); if (HighBits.intersects(NewMask)) - InDemandedMask |= APInt::getSignBit(VT.getScalarSizeInBits()); + InDemandedMask |= APInt::getSignMask(VT.getScalarSizeInBits()); if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, TLO, Depth+1)) @@ -974,11 +980,11 @@ KnownOne.lshrInPlace(ShAmt); // Handle the sign bit, adjusted to where it is now in the mask. - APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt); + APInt SignMask = APInt::getSignMask(BitWidth).lshr(ShAmt); // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. - if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) { + if (KnownZero.intersects(SignMask) || (HighBits & ~NewMask) == HighBits) { SDNodeFlags Flags; Flags.setExact(cast(Op)->Flags.hasExact()); return TLO.CombineTo(Op, @@ -996,7 +1002,7 @@ Op.getOperand(0), NewSA)); } - if (KnownOne.intersects(SignBit)) + if (KnownOne.intersects(SignMask)) // New bits are known one. KnownOne |= HighBits; } @@ -1040,7 +1046,7 @@ return TLO.CombineTo(Op, Op.getOperand(0)); APInt InSignBit = - APInt::getSignBit(ExVT.getScalarSizeInBits()).zext(BitWidth); + APInt::getSignMask(ExVT.getScalarSizeInBits()).zext(BitWidth); APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, ExVT.getScalarSizeInBits()) & @@ -1250,7 +1256,7 @@ if (!TLO.LegalOperations() && !Op.getValueType().isVector() && !Op.getOperand(0).getValueType().isVector() && - NewMask == APInt::getSignBit(Op.getValueSizeInBits()) && + NewMask == APInt::getSignMask(Op.getValueSizeInBits()) && Op.getOperand(0).getValueType().isFloatingPoint()) { bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); @@ -1284,7 +1290,7 @@ SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2, KnownOne2, TLO, Depth+1) || // See if the operation should be performed at a smaller bit width. - TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) { + ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) { const SDNodeFlags *Flags = Op.getNode()->getFlags(); if (Flags->hasNoSignedWrap() || Flags->hasNoUnsignedWrap()) { // Disable the nsw and nuw flags. We can no longer guarantee that we @@ -3356,7 +3362,7 @@ SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT); SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT); SDValue Bias = DAG.getConstant(127, dl, IntVT); - SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), dl, + SDValue SignMask = DAG.getConstant(APInt::getSignMask(VT.getSizeInBits()), dl, IntVT); SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT); SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT); Index: lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp =================================================================== --- lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -9,6 +9,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFFormValue.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Dwarf.h" @@ -112,10 +113,8 @@ continue; } while (AccelSection.isValidOffsetForDataOfSize(DataOffset, 4)) { - unsigned StringOffset = AccelSection.getU32(&DataOffset); - RelocAddrMap::const_iterator Reloc = Relocs.find(DataOffset-4); - if (Reloc != Relocs.end()) - StringOffset += Reloc->second.second; + unsigned StringOffset = + getRelocatedValue(AccelSection, 4, &DataOffset, &Relocs); if (!StringOffset) break; OS << format(" Name: %08x \"%s\"\n", StringOffset, Index: lib/DebugInfo/DWARF/DWARFContext.cpp =================================================================== --- lib/DebugInfo/DWARF/DWARFContext.cpp +++ lib/DebugInfo/DWARF/DWARFContext.cpp @@ -56,6 +56,16 @@ typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind; +uint64_t llvm::getRelocatedValue(const DataExtractor &Data, uint32_t Size, + uint32_t *Off, const RelocAddrMap *Relocs) { + if (!Relocs) + return Data.getUnsigned(Off, Size); + RelocAddrMap::const_iterator AI = Relocs->find(*Off); + if (AI == Relocs->end()) + return Data.getUnsigned(Off, Size); + return Data.getUnsigned(Off, Size) + AI->second.second; +} + static void dumpAccelSection(raw_ostream &OS, StringRef Name, const DWARFSection& Section, StringRef StringSection, bool LittleEndian) { Index: lib/DebugInfo/DWARF/DWARFDebugLine.cpp =================================================================== --- lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/SmallString.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Dwarf.h" @@ -302,16 +303,9 @@ // relocatable address. All of the other statement program opcodes // that affect the address register add a delta to it. This instruction // stores a relocatable value into it instead. - { - // If this address is in our relocation map, apply the relocation. - RelocAddrMap::const_iterator AI = RMap->find(*offset_ptr); - if (AI != RMap->end()) { - const std::pair &R = AI->second; - State.Row.Address = - debug_line_data.getAddress(offset_ptr) + R.second; - } else - State.Row.Address = debug_line_data.getAddress(offset_ptr); - } + State.Row.Address = + getRelocatedValue(debug_line_data, debug_line_data.getAddressSize(), + offset_ptr, RMap); break; case DW_LNE_define_file: Index: lib/DebugInfo/DWARF/DWARFDebugLoc.cpp =================================================================== --- lib/DebugInfo/DWARF/DWARFDebugLoc.cpp +++ lib/DebugInfo/DWARF/DWARFDebugLoc.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/StringRef.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h" #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h" #include "llvm/Support/Dwarf.h" @@ -48,18 +49,10 @@ // 2.6.2 Location Lists // A location list entry consists of: while (true) { + // A beginning and ending address offsets. Entry E; - RelocAddrMap::const_iterator AI = RelocMap.find(Offset); - // 1. A beginning address offset. ... - E.Begin = data.getUnsigned(&Offset, AddressSize); - if (AI != RelocMap.end()) - E.Begin += AI->second.second; - - AI = RelocMap.find(Offset); - // 2. An ending address offset. ... - E.End = data.getUnsigned(&Offset, AddressSize); - if (AI != RelocMap.end()) - E.End += AI->second.second; + E.Begin = getRelocatedValue(data, AddressSize, &Offset, &RelocMap); + E.End = getRelocatedValue(data, AddressSize, &Offset, &RelocMap); // The end of any given location list is marked by an end of list entry, // which consists of a 0 for the beginning address offset and a 0 for the Index: lib/DebugInfo/DWARF/DWARFFormValue.cpp =================================================================== --- lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -334,11 +334,8 @@ (Form == DW_FORM_addr) ? U->getAddressByteSize() : U->getRefAddrByteSize(); - RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr); - if (AI != U->getRelocMap()->end()) { - Value.uval = data.getUnsigned(offset_ptr, AddrSize) + AI->second.second; - } else - Value.uval = data.getUnsigned(offset_ptr, AddrSize); + Value.uval = + getRelocatedValue(data, AddrSize, offset_ptr, U->getRelocMap()); break; } case DW_FORM_exprloc: @@ -376,12 +373,8 @@ case DW_FORM_ref_sup4: case DW_FORM_strx4: case DW_FORM_addrx4: { - Value.uval = data.getU32(offset_ptr); - if (!U) - break; - RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr-4); - if (AI != U->getRelocMap()->end()) - Value.uval += AI->second.second; + const RelocAddrMap* RelocMap = U ? U->getRelocMap() : nullptr; + Value.uval = getRelocatedValue(data, 4, offset_ptr, RelocMap); break; } case DW_FORM_data8: @@ -411,11 +404,8 @@ case DW_FORM_strp_sup: { if (!U) return false; - RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr); - uint8_t Size = U->getDwarfOffsetByteSize(); - Value.uval = data.getUnsigned(offset_ptr, Size); - if (AI != U->getRelocMap()->end()) - Value.uval += AI->second.second; + Value.uval = getRelocatedValue(data, U->getDwarfOffsetByteSize(), + offset_ptr, U->getRelocMap()); break; } case DW_FORM_flag_present: Index: lib/Fuzzer/CMakeLists.txt =================================================================== --- lib/Fuzzer/CMakeLists.txt +++ lib/Fuzzer/CMakeLists.txt @@ -1,3 +1,18 @@ +include(CheckCXXSourceCompiles) + +if( APPLE ) + CHECK_CXX_SOURCE_COMPILES(" + static thread_local int blah; + int main() { + return 0; + } + " HAS_THREAD_LOCAL) + + if( NOT HAS_THREAD_LOCAL ) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Dthread_local=__thread") + endif() +endif() + set(LIBFUZZER_FLAGS_BASE "${CMAKE_CXX_FLAGS}") # Disable the coverage and sanitizer instrumentation for the fuzzer itself. set(CMAKE_CXX_FLAGS "${LIBFUZZER_FLAGS_BASE} -fno-sanitize-coverage=trace-pc-guard,edge,trace-cmp,indirect-calls,8bit-counters -Werror") Index: lib/Fuzzer/FuzzerDefs.h =================================================================== --- lib/Fuzzer/FuzzerDefs.h +++ lib/Fuzzer/FuzzerDefs.h @@ -36,12 +36,20 @@ #error "Support for your platform has not been implemented" #endif +#ifndef __has_attribute +# define __has_attribute(x) 0 +#endif + #define LIBFUZZER_POSIX LIBFUZZER_APPLE || LIBFUZZER_LINUX #ifdef __x86_64 -#define ATTRIBUTE_TARGET_POPCNT __attribute__((target("popcnt"))) +# if __has_attribute(target) +# define ATTRIBUTE_TARGET_POPCNT __attribute__((target("popcnt"))) +# else +# define ATTRIBUTE_TARGET_POPCNT +# endif #else -#define ATTRIBUTE_TARGET_POPCNT +# define ATTRIBUTE_TARGET_POPCNT #endif Index: lib/IR/AsmWriter.cpp =================================================================== --- lib/IR/AsmWriter.cpp +++ lib/IR/AsmWriter.cpp @@ -1103,35 +1103,34 @@ } if (const ConstantFP *CFP = dyn_cast(CV)) { - if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle() || - &CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble()) { + const APFloat &APF = CFP->getValueAPF(); + if (&APF.getSemantics() == &APFloat::IEEEsingle() || + &APF.getSemantics() == &APFloat::IEEEdouble()) { // We would like to output the FP constant value in exponential notation, // but we cannot do this if doing so will lose precision. Check here to // make sure that we only output it in exponential format if we can parse // the value back and get the same value. // bool ignored; - bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble(); - bool isInf = CFP->getValueAPF().isInfinity(); - bool isNaN = CFP->getValueAPF().isNaN(); + bool isDouble = &APF.getSemantics() == &APFloat::IEEEdouble(); + bool isInf = APF.isInfinity(); + bool isNaN = APF.isNaN(); if (!isInf && !isNaN) { - double Val = isDouble ? CFP->getValueAPF().convertToDouble() : - CFP->getValueAPF().convertToFloat(); + double Val = isDouble ? APF.convertToDouble() : APF.convertToFloat(); SmallString<128> StrVal; - raw_svector_ostream(StrVal) << Val; - + APF.toString(StrVal, 6, 0, false); // Check to make sure that the stringized number is not some string like // "Inf" or NaN, that atof will accept, but the lexer will not. Check // that the string matches the "[-+]?[0-9]" regex. // - if ((StrVal[0] >= '0' && StrVal[0] <= '9') || - ((StrVal[0] == '-' || StrVal[0] == '+') && - (StrVal[1] >= '0' && StrVal[1] <= '9'))) { - // Reparse stringized version! - if (APFloat(APFloat::IEEEdouble(), StrVal).convertToDouble() == Val) { - Out << StrVal; - return; - } + assert(((StrVal[0] >= '0' && StrVal[0] <= '9') || + ((StrVal[0] == '-' || StrVal[0] == '+') && + (StrVal[1] >= '0' && StrVal[1] <= '9'))) && + "[-+]?[0-9] regex does not match!"); + // Reparse stringized version! + if (APFloat(APFloat::IEEEdouble(), StrVal).convertToDouble() == Val) { + Out << StrVal; + return; } } // Otherwise we could not reparse it to exactly the same value, so we must @@ -1140,7 +1139,7 @@ // x86, so we must not use these types. static_assert(sizeof(double) == sizeof(uint64_t), "assuming that double is 64 bits!"); - APFloat apf = CFP->getValueAPF(); + APFloat apf = APF; // Floats are represented in ASCII IR as double, convert. if (!isDouble) apf.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, @@ -1153,27 +1152,27 @@ // These appear as a magic letter identifying the type, then a // fixed number of hex digits. Out << "0x"; - APInt API = CFP->getValueAPF().bitcastToAPInt(); - if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended()) { + APInt API = APF.bitcastToAPInt(); + if (&APF.getSemantics() == &APFloat::x87DoubleExtended()) { Out << 'K'; Out << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4, /*Upper=*/true); Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, /*Upper=*/true); return; - } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad()) { + } else if (&APF.getSemantics() == &APFloat::IEEEquad()) { Out << 'L'; Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, /*Upper=*/true); Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16, /*Upper=*/true); - } else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble()) { + } else if (&APF.getSemantics() == &APFloat::PPCDoubleDouble()) { Out << 'M'; Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, /*Upper=*/true); Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16, /*Upper=*/true); - } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf()) { + } else if (&APF.getSemantics() == &APFloat::IEEEhalf()) { Out << 'H'; Out << format_hex_no_prefix(API.getZExtValue(), 4, /*Upper=*/true); Index: lib/IR/Attributes.cpp =================================================================== --- lib/IR/Attributes.cpp +++ lib/IR/Attributes.cpp @@ -1043,46 +1043,7 @@ AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index, AttributeList Attrs) const { - if (!pImpl) - return AttributeList(); - if (!Attrs.pImpl) return *this; - - // FIXME it is not obvious how this should work for alignment. - // For now, say we can't pass in alignment, which no current use does. - assert(!Attrs.hasAttribute(Index, Attribute::Alignment) && - "Attempt to change alignment!"); - - // Add the attribute slots before the one we're trying to add. - SmallVector AttrSet; - uint64_t NumAttrs = pImpl->getNumSlots(); - AttributeList AL; - uint64_t LastIndex = 0; - for (unsigned I = 0, E = NumAttrs; I != E; ++I) { - if (getSlotIndex(I) >= Index) { - if (getSlotIndex(I) == Index) AL = getSlotAttributes(LastIndex++); - break; - } - LastIndex = I + 1; - AttrSet.push_back(getSlotAttributes(I)); - } - - // Now remove the attribute from the correct slot. There may already be an - // AttributeList there. - AttrBuilder B(AL, Index); - - for (unsigned I = 0, E = Attrs.pImpl->getNumSlots(); I != E; ++I) - if (Attrs.getSlotIndex(I) == Index) { - B.removeAttributes(Attrs.pImpl->getSlotAttributes(I), Index); - break; - } - - AttrSet.push_back(AttributeList::get(C, Index, B)); - - // Add the remaining attribute slots. - for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I) - AttrSet.push_back(getSlotAttributes(I)); - - return get(C, AttrSet); + return removeAttributes(C, Index, AttrBuilder(Attrs.getAttributes(Index))); } AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index, @@ -1095,31 +1056,30 @@ assert(!Attrs.hasAlignmentAttr() && "Attempt to change alignment!"); // Add the attribute slots before the one we're trying to add. - SmallVector AttrSet; + SmallVector AttrSets; uint64_t NumAttrs = pImpl->getNumSlots(); - AttributeList AL; + AttrBuilder B; uint64_t LastIndex = 0; for (unsigned I = 0, E = NumAttrs; I != E; ++I) { if (getSlotIndex(I) >= Index) { - if (getSlotIndex(I) == Index) AL = getSlotAttributes(LastIndex++); + if (getSlotIndex(I) == Index) + B = AttrBuilder(pImpl->getSlotNode(LastIndex++)); break; } LastIndex = I + 1; - AttrSet.push_back(getSlotAttributes(I)); + AttrSets.push_back({getSlotIndex(I), pImpl->getSlotNode(I)}); } - // Now remove the attribute from the correct slot. There may already be an - // AttributeList there. - AttrBuilder B(AL, Index); + // Remove the attributes from the existing set and add them. B.remove(Attrs); - - AttrSet.push_back(AttributeList::get(C, Index, B)); + if (B.hasAttributes()) + AttrSets.push_back({Index, AttributeSet::get(C, B)}); // Add the remaining attribute slots. for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I) - AttrSet.push_back(getSlotAttributes(I)); + AttrSets.push_back({getSlotIndex(I), pImpl->getSlotNode(I)}); - return get(C, AttrSet); + return get(C, AttrSets); } AttributeList AttributeList::removeAttributes(LLVMContext &C, @@ -1403,18 +1363,7 @@ } AttrBuilder &AttrBuilder::removeAttributes(AttributeList A, uint64_t Index) { - unsigned Slot = ~0U; - for (unsigned I = 0, E = A.getNumSlots(); I != E; ++I) - if (A.getSlotIndex(I) == Index) { - Slot = I; - break; - } - - assert(Slot != ~0U && "Couldn't find index in AttributeList!"); - - for (AttributeList::iterator I = A.begin(Slot), E = A.end(Slot); I != E; - ++I) { - Attribute Attr = *I; + for (Attribute Attr : A.getAttributes(Index)) { if (Attr.isEnumAttribute() || Attr.isIntAttribute()) { removeAttribute(Attr.getKindAsEnum()); } else { Index: lib/IR/DebugInfo.cpp =================================================================== --- lib/IR/DebugInfo.cpp +++ lib/IR/DebugInfo.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm-c/DebugInfo.h" #include "llvm/IR/DebugInfo.h" #include "LLVMContextImpl.h" #include "llvm/ADT/STLExtras.h" @@ -659,3 +660,60 @@ return Val->getZExtValue(); return 0; } + +//===----------------------------------------------------------------------===// +// LLVM C API implementations. +//===----------------------------------------------------------------------===// + +template DIT *unwrapDI(LLVMMetadataRef Ref) { + return (DIT *)(Ref ? unwrap(Ref) : nullptr); +} + +uint32_t LLVMDebugMetadataVersion() { + return DEBUG_METADATA_VERSION; +} + +unsigned LLVMGetModuleDebugMetadataVersion(LLVMModuleRef M) { + return getDebugMetadataVersionFromModule(*unwrap(M)); +} + +uint8_t LLVMStripModuleDebugInfo(LLVMModuleRef M) { + return StripDebugInfo(*unwrap(M)); +} + +void LLVMDIBuilderDispose(LLVMDIBuilderRef Builder) { + delete unwrap(Builder); +} + +void LLVMDIBuilderFinalize(LLVMDIBuilderRef Builder) { + unwrap(Builder)->finalize(); +} + +LLVMMetadataRef LLVMDIBuilderCreateCompileUnit( + LLVMDIBuilderRef Builder, LLVMDWARFSourceLanguage Lang, + LLVMMetadataRef FileRef, const char *Producer, uint8_t isOptimized, + const char *Flags, unsigned RuntimeVer, const char *SplitName, + LLVMDWARFEmissionKind Kind, uint64_t DWOId, uint8_t SplitDebugInlining, + uint8_t DebugInfoForProfiling) { + auto *File = unwrapDI(FileRef); + + return wrap(unwrap(Builder)->createCompileUnit( + static_cast(Lang), File, Producer, + isOptimized, Flags, RuntimeVer, SplitName, + static_cast(Kind), DWOId, + SplitDebugInlining, DebugInfoForProfiling)); +} + +LLVMMetadataRef +LLVMDIBuilderCreateFile(LLVMDIBuilderRef Builder, const char *Filename, + const char *Directory) { + return wrap(unwrap(Builder)->createFile(Filename, Directory)); +} + +LLVMMetadataRef +LLVMDIBuilderCreateDebugLocation(LLVMContextRef Ctx, unsigned Line, + unsigned Column, LLVMMetadataRef Scope, + LLVMMetadataRef InlinedAt) { + return wrap(DILocation::get(*unwrap(Ctx), Line, Column, unwrap(Scope), + unwrap(InlinedAt))); +} Index: lib/Support/APFloat.cpp =================================================================== --- lib/Support/APFloat.cpp +++ lib/Support/APFloat.cpp @@ -3393,7 +3393,7 @@ } void IEEEFloat::toString(SmallVectorImpl &Str, unsigned FormatPrecision, - unsigned FormatMaxPadding) const { + unsigned FormatMaxPadding, bool TruncateZero) const { switch (category) { case fcInfinity: if (isNegative()) @@ -3407,9 +3407,16 @@ if (isNegative()) Str.push_back('-'); - if (!FormatMaxPadding) - append(Str, "0.0E+0"); - else + if (!FormatMaxPadding) { + if (TruncateZero) + append(Str, "0.0E+0"); + else { + append(Str, "0.0"); + if (FormatPrecision > 1) + Str.append(FormatPrecision - 1, '0'); + append(Str, "e+00"); + } + } else Str.push_back('0'); return; @@ -3543,12 +3550,16 @@ Str.push_back(buffer[NDigits-1]); Str.push_back('.'); - if (NDigits == 1) + if (NDigits == 1 && TruncateZero) Str.push_back('0'); else for (unsigned I = 1; I != NDigits; ++I) Str.push_back(buffer[NDigits-1-I]); - Str.push_back('E'); + // Fill with zeros up to FormatPrecision. + if (!TruncateZero && FormatPrecision > NDigits - 1) + Str.append(FormatPrecision - NDigits + 1, '0'); + // For !TruncateZero we use lower 'e'. + Str.push_back(TruncateZero ? 'E' : 'e'); Str.push_back(exp >= 0 ? '+' : '-'); if (exp < 0) exp = -exp; @@ -3557,6 +3568,9 @@ expbuf.push_back((char) ('0' + (exp % 10))); exp /= 10; } while (exp); + // Exponent always at least two digits if we do not truncate zeros. + if (!TruncateZero && expbuf.size() < 2) + expbuf.push_back('0'); for (unsigned I = 0, E = expbuf.size(); I != E; ++I) Str.push_back(expbuf[E-1-I]); return; @@ -4362,10 +4376,11 @@ void DoubleAPFloat::toString(SmallVectorImpl &Str, unsigned FormatPrecision, - unsigned FormatMaxPadding) const { + unsigned FormatMaxPadding, + bool TruncateZero) const { assert(Semantics == &semPPCDoubleDouble && "Unexpected Semantics"); APFloat(semPPCDoubleDoubleLegacy, bitcastToAPInt()) - .toString(Str, FormatPrecision, FormatMaxPadding); + .toString(Str, FormatPrecision, FormatMaxPadding, TruncateZero); } bool DoubleAPFloat::getExactInverse(APFloat *inv) const { Index: lib/Support/APInt.cpp =================================================================== --- lib/Support/APInt.cpp +++ lib/Support/APInt.cpp @@ -364,44 +364,20 @@ return std::equal(pVal, pVal + getNumWords(), RHS.pVal); } -bool APInt::ult(const APInt& RHS) const { +int APInt::compare(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); if (isSingleWord()) - return VAL < RHS.VAL; + return VAL < RHS.VAL ? -1 : VAL > RHS.VAL; - // Get active bit length of both operands - unsigned n1 = getActiveBits(); - unsigned n2 = RHS.getActiveBits(); - - // If magnitude of LHS is less than RHS, return true. - if (n1 < n2) - return true; - - // If magnitude of RHS is greater than LHS, return false. - if (n2 < n1) - return false; - - // If they both fit in a word, just compare the low order word - if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD) - return pVal[0] < RHS.pVal[0]; - - // Otherwise, compare all words - unsigned topWord = whichWord(std::max(n1,n2)-1); - for (int i = topWord; i >= 0; --i) { - if (pVal[i] > RHS.pVal[i]) - return false; - if (pVal[i] < RHS.pVal[i]) - return true; - } - return false; + return tcCompare(pVal, RHS.pVal, getNumWords()); } -bool APInt::slt(const APInt& RHS) const { +int APInt::compareSigned(const APInt& RHS) const { assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison"); if (isSingleWord()) { int64_t lhsSext = SignExtend64(VAL, BitWidth); int64_t rhsSext = SignExtend64(RHS.VAL, BitWidth); - return lhsSext < rhsSext; + return lhsSext < rhsSext ? -1 : lhsSext > rhsSext; } bool lhsNeg = isNegative(); @@ -409,11 +385,11 @@ // If the sign bits don't match, then (LHS < RHS) if LHS is negative if (lhsNeg != rhsNeg) - return lhsNeg; + return lhsNeg ? -1 : 1; // Otherwise we can just use an unsigned comparison, because even negative // numbers compare correctly this way if both have the same signed-ness. - return ult(RHS); + return tcCompare(pVal, RHS.pVal, getNumWords()); } void APInt::setBit(unsigned bitPosition) { @@ -730,6 +706,14 @@ return false; } +bool APInt::isSubsetOfSlowCase(const APInt &RHS) const { + for (unsigned i = 0, e = getNumWords(); i != e; ++i) + if ((pVal[i] & ~RHS.pVal[i]) != 0) + return false; + + return true; +} + APInt APInt::byteSwap() const { assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!"); if (BitWidth == 16) @@ -2676,10 +2660,8 @@ unsigned parts) { while (parts) { parts--; - if (lhs[parts] == rhs[parts]) - continue; - - return (lhs[parts] > rhs[parts]) ? 1 : -1; + if (lhs[parts] != rhs[parts]) + return (lhs[parts] > rhs[parts]) ? 1 : -1; } return 0; Index: lib/Support/Dwarf.cpp =================================================================== --- lib/Support/Dwarf.cpp +++ lib/Support/Dwarf.cpp @@ -22,7 +22,7 @@ switch (Tag) { default: return StringRef(); -#define HANDLE_DW_TAG(ID, NAME) \ +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ case DW_TAG_##NAME: \ return "DW_TAG_" #NAME; #include "llvm/Support/Dwarf.def" @@ -31,11 +31,34 @@ unsigned llvm::dwarf::getTag(StringRef TagString) { return StringSwitch(TagString) -#define HANDLE_DW_TAG(ID, NAME) .Case("DW_TAG_" #NAME, DW_TAG_##NAME) +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ + .Case("DW_TAG_" #NAME, DW_TAG_##NAME) #include "llvm/Support/Dwarf.def" .Default(DW_TAG_invalid); } +unsigned llvm::dwarf::TagVersion(dwarf::Tag Tag) { + switch (Tag) { + default: + return 0; +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ + case DW_TAG_##NAME: \ + return VERSION; +#include "llvm/Support/Dwarf.def" + } +} + +unsigned llvm::dwarf::TagVendor(dwarf::Tag Tag) { + switch (Tag) { + default: + return 0; +#define HANDLE_DW_TAG(ID, NAME, VERSION, VENDOR) \ + case DW_TAG_##NAME: \ + return DWARF_VENDOR_##VENDOR; +#include "llvm/Support/Dwarf.def" + } +} + StringRef llvm::dwarf::ChildrenString(unsigned Children) { switch (Children) { case DW_CHILDREN_no: return "DW_CHILDREN_no"; @@ -48,29 +71,73 @@ switch (Attribute) { default: return StringRef(); -#define HANDLE_DW_AT(ID, NAME) \ - case DW_AT_##NAME: \ +#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) \ + case DW_AT_##NAME: \ return "DW_AT_" #NAME; #include "llvm/Support/Dwarf.def" } } +unsigned llvm::dwarf::AttributeVersion(dwarf::Attribute Attribute) { + switch (Attribute) { + default: + return 0; +#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) \ + case DW_AT_##NAME: \ + return VERSION; +#include "llvm/Support/Dwarf.def" + } +} + +unsigned llvm::dwarf::AttributeVendor(dwarf::Attribute Attribute) { + switch (Attribute) { + default: + return 0; +#define HANDLE_DW_AT(ID, NAME, VERSION, VENDOR) \ + case DW_AT_##NAME: \ + return DWARF_VENDOR_##VENDOR; +#include "llvm/Support/Dwarf.def" + } +} + StringRef llvm::dwarf::FormEncodingString(unsigned Encoding) { switch (Encoding) { default: return StringRef(); -#define HANDLE_DW_FORM(ID, NAME) \ - case DW_FORM_##NAME: \ +#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) \ + case DW_FORM_##NAME: \ return "DW_FORM_" #NAME; #include "llvm/Support/Dwarf.def" } } +unsigned llvm::dwarf::FormVersion(dwarf::Form Form) { + switch (Form) { + default: + return 0; +#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) \ + case DW_FORM_##NAME: \ + return VERSION; +#include "llvm/Support/Dwarf.def" + } +} + +unsigned llvm::dwarf::FormVendor(dwarf::Form Form) { + switch (Form) { + default: + return 0; +#define HANDLE_DW_FORM(ID, NAME, VERSION, VENDOR) \ + case DW_FORM_##NAME: \ + return DWARF_VENDOR_##VENDOR; +#include "llvm/Support/Dwarf.def" + } +} + StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) { switch (Encoding) { default: return StringRef(); -#define HANDLE_DW_OP(ID, NAME) \ +#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \ case DW_OP_##NAME: \ return "DW_OP_" #NAME; #include "llvm/Support/Dwarf.def" @@ -81,17 +148,40 @@ unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) { return StringSwitch(OperationEncodingString) -#define HANDLE_DW_OP(ID, NAME) .Case("DW_OP_" #NAME, DW_OP_##NAME) +#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \ + .Case("DW_OP_" #NAME, DW_OP_##NAME) #include "llvm/Support/Dwarf.def" .Case("DW_OP_LLVM_fragment", DW_OP_LLVM_fragment) .Default(0); } +unsigned llvm::dwarf::OperationVersion(dwarf::LocationAtom Op) { + switch (Op) { + default: + return 0; +#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \ + case DW_OP_##NAME: \ + return VERSION; +#include "llvm/Support/Dwarf.def" + } +} + +unsigned llvm::dwarf::OperationVendor(dwarf::LocationAtom Op) { + switch (Op) { + default: + return 0; +#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR) \ + case DW_OP_##NAME: \ + return DWARF_VENDOR_##VENDOR; +#include "llvm/Support/Dwarf.def" + } +} + StringRef llvm::dwarf::AttributeEncodingString(unsigned Encoding) { switch (Encoding) { default: return StringRef(); -#define HANDLE_DW_ATE(ID, NAME) \ +#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \ case DW_ATE_##NAME: \ return "DW_ATE_" #NAME; #include "llvm/Support/Dwarf.def" @@ -100,11 +190,34 @@ unsigned llvm::dwarf::getAttributeEncoding(StringRef EncodingString) { return StringSwitch(EncodingString) -#define HANDLE_DW_ATE(ID, NAME) .Case("DW_ATE_" #NAME, DW_ATE_##NAME) +#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \ + .Case("DW_ATE_" #NAME, DW_ATE_##NAME) #include "llvm/Support/Dwarf.def" .Default(0); } +unsigned llvm::dwarf::AttributeEncodingVersion(dwarf::TypeKind ATE) { + switch (ATE) { + default: + return 0; +#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \ + case DW_ATE_##NAME: \ + return VERSION; +#include "llvm/Support/Dwarf.def" + } +} + +unsigned llvm::dwarf::AttributeEncodingVendor(dwarf::TypeKind ATE) { + switch (ATE) { + default: + return 0; +#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) \ + case DW_ATE_##NAME: \ + return DWARF_VENDOR_##VENDOR; +#include "llvm/Support/Dwarf.def" + } +} + StringRef llvm::dwarf::DecimalSignString(unsigned Sign) { switch (Sign) { case DW_DS_unsigned: return "DW_DS_unsigned"; @@ -169,7 +282,7 @@ switch (Language) { default: return StringRef(); -#define HANDLE_DW_LANG(ID, NAME) \ +#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ case DW_LANG_##NAME: \ return "DW_LANG_" #NAME; #include "llvm/Support/Dwarf.def" @@ -178,11 +291,34 @@ unsigned llvm::dwarf::getLanguage(StringRef LanguageString) { return StringSwitch(LanguageString) -#define HANDLE_DW_LANG(ID, NAME) .Case("DW_LANG_" #NAME, DW_LANG_##NAME) +#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ + .Case("DW_LANG_" #NAME, DW_LANG_##NAME) #include "llvm/Support/Dwarf.def" .Default(0); } +unsigned llvm::dwarf::LanguageVersion(dwarf::SourceLanguage Lang) { + switch (Lang) { + default: + return 0; +#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ + case DW_LANG_##NAME: \ + return VERSION; +#include "llvm/Support/Dwarf.def" + } +} + +unsigned llvm::dwarf::LanguageVendor(dwarf::SourceLanguage Lang) { + switch (Lang) { + default: + return 0; +#define HANDLE_DW_LANG(ID, NAME, VERSION, VENDOR) \ + case DW_LANG_##NAME: \ + return DWARF_VENDOR_##VENDOR; +#include "llvm/Support/Dwarf.def" + } +} + StringRef llvm::dwarf::CaseString(unsigned Case) { switch (Case) { case DW_ID_case_sensitive: return "DW_ID_case_sensitive"; @@ -394,3 +530,12 @@ return StringRef(); } + +bool llvm::dwarf::isValidFormForVersion(Form F, unsigned Version, + bool ExtensionsOk) { + if (FormVendor(F) == DWARF_VENDOR_DWARF) { + unsigned FV = FormVersion(F); + return FV > 0 && FV <= Version; + } + return ExtensionsOk; +} Index: lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp =================================================================== --- lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -942,6 +942,7 @@ AArch64::XZR, NextMBBI); case AArch64::CMP_SWAP_128: return expandCMP_SWAP_128(MBB, MBBI, NextMBBI); + } return false; } Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -255,6 +255,9 @@ const SelectionDAG &DAG, unsigned Depth = 0) const override; + bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const override; + MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; /// Returns true if the target allows unaligned memory accesses of the @@ -508,6 +511,18 @@ const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override; + SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + template SDValue getGOT(NodeTy *N, SelectionDAG &DAG) const; + template + SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG) const; + template SDValue getAddr(NodeTy *N, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -91,6 +91,7 @@ STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumShiftInserts, "Number of vector shift inserts"); +STATISTIC(NumOptimizedImms, "Number of times immediates were optimized"); static cl::opt EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden, @@ -105,6 +106,12 @@ cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false)); +static cl::opt +EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden, + cl::desc("Enable AArch64 logical imm instruction " + "optimization"), + cl::init(true)); + /// Value type used for condition codes. static const MVT MVT_CC = MVT::i32; @@ -787,6 +794,140 @@ return VT.changeVectorElementTypeToInteger(); } +static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm, + const APInt &Demanded, + TargetLowering::TargetLoweringOpt &TLO, + unsigned NewOpc) { + uint64_t OldImm = Imm, NewImm, Enc; + uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask; + + // Return if the immediate is already all zeros, all ones, a bimm32 or a + // bimm64. + if (Imm == 0 || Imm == Mask || + AArch64_AM::isLogicalImmediate(Imm & Mask, Size)) + return false; + + unsigned EltSize = Size; + uint64_t DemandedBits = Demanded.getZExtValue(); + + // Clear bits that are not demanded. + Imm &= DemandedBits; + + while (true) { + // The goal here is to set the non-demanded bits in a way that minimizes + // the number of switching between 0 and 1. In order to achieve this goal, + // we set the non-demanded bits to the value of the preceding demanded bits. + // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a + // non-demanded bit), we copy bit0 (1) to the least significant 'x', + // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'. + // The final result is 0b11000011. + uint64_t NonDemandedBits = ~DemandedBits; + uint64_t InvertedImm = ~Imm & DemandedBits; + uint64_t RotatedImm = + ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) & + NonDemandedBits; + uint64_t Sum = RotatedImm + NonDemandedBits; + bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1)); + uint64_t Ones = (Sum + Carry) & NonDemandedBits; + NewImm = (Imm | Ones) & Mask; + + // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate + // or all-ones or all-zeros, in which case we can stop searching. Otherwise, + // we halve the element size and continue the search. + if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask))) + break; + + // We cannot shrink the element size any further if it is 2-bits. + if (EltSize == 2) + return false; + + EltSize /= 2; + Mask >>= EltSize; + uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize; + + // Return if there is mismatch in any of the demanded bits of Imm and Hi. + if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0) + return false; + + // Merge the upper and lower halves of Imm and DemandedBits. + Imm |= Hi; + DemandedBits |= DemandedBitsHi; + } + + ++NumOptimizedImms; + + // Replicate the element across the register width. + while (EltSize < Size) { + NewImm |= NewImm << EltSize; + EltSize *= 2; + } + + (void)OldImm; + assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && + "demanded bits should never be altered"); + assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm"); + + // Create the new constant immediate node. + EVT VT = Op.getValueType(); + SDLoc DL(Op); + + // If the new constant immediate is all-zeros or all-ones, let the target + // independent DAG combine optimize this node. + if (NewImm == 0 || NewImm == OrigMask) + return TLO.CombineTo(Op.getOperand(1), TLO.DAG.getConstant(NewImm, DL, VT)); + + // Otherwise, create a machine node so that target independent DAG combine + // doesn't undo this optimization. + Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size); + SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT); + SDValue New( + TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0); + + return TLO.CombineTo(Op, New); +} + +bool AArch64TargetLowering::targetShrinkDemandedConstant( + SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const { + // Delay this optimization to as late as possible. + if (!TLO.LegalOps) + return false; + + if (!EnableOptimizeLogicalImm) + return false; + + EVT VT = Op.getValueType(); + if (VT.isVector()) + return false; + + unsigned Size = VT.getSizeInBits(); + assert((Size == 32 || Size == 64) && + "i32 or i64 is expected after legalization."); + + // Exit early if we demand all bits. + if (Demanded.countPopulation() == Size) + return false; + + unsigned NewOpc; + switch (Op.getOpcode()) { + default: + return false; + case ISD::AND: + NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri; + break; + case ISD::OR: + NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri; + break; + case ISD::XOR: + NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri; + break; + } + ConstantSDNode *C = dyn_cast(Op.getOperand(1)); + if (!C) + return false; + uint64_t Imm = C->getZExtValue(); + return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc); +} + /// computeKnownBitsForTargetNode - Determine which of the bits specified in /// Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. @@ -3418,11 +3559,75 @@ // Other Lowering Code //===----------------------------------------------------------------------===// +SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag); +} + +SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag); +} + +SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(), + N->getOffset(), Flag); +} + +SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty, + SelectionDAG &DAG, + unsigned Flag) const { + return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag); +} + +// (loadGOT sym) +template +SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG) const { + DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n"); + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT); + // FIXME: Once remat is capable of dealing with instructions with register + // operands, expand this into two nodes instead of using a wrapper node. + return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr); +} + +// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym)) +template +SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG) + const { + DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n"); + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + const unsigned char MO_NC = AArch64II::MO_NC; + return DAG.getNode( + AArch64ISD::WrapperLarge, DL, Ty, + getTargetNode(N, Ty, DAG, AArch64II::MO_G3), + getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC), + getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC), + getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC)); +} + +// (addlow (adrp %hi(sym)) %lo(sym)) +template +SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG) const { + DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n"); + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE); + SDValue Lo = getTargetNode(N, Ty, DAG, + AArch64II::MO_PAGEOFF | AArch64II::MO_NC); + SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi); + return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo); +} + SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDLoc DL(Op); - const GlobalAddressSDNode *GN = cast(Op); + GlobalAddressSDNode *GN = cast(Op); const GlobalValue *GV = GN->getGlobal(); unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); @@ -3430,32 +3635,15 @@ assert(cast(Op)->getOffset() == 0 && "unexpected offset in global node"); - // This also catched the large code model case for Darwin. + // This also catches the large code model case for Darwin. if ((OpFlags & AArch64II::MO_GOT) != 0) { - SDValue GotAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags); - // FIXME: Once remat is capable of dealing with instructions with register - // operands, expand this into two nodes instead of using a wrapper node. - return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr); + return getGOT(GN, DAG); } if (getTargetMachine().getCodeModel() == CodeModel::Large) { - const unsigned char MO_NC = AArch64II::MO_NC; - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G3), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G2 | MO_NC), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G1 | MO_NC), - DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G0 | MO_NC)); + return getAddrLarge(GN, DAG); } else { - // Use ADRP/ADD or ADRP/LDR for everything else: the small model on ELF and - // the only correct model on Darwin. - SDValue Hi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, - OpFlags | AArch64II::MO_PAGE); - unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC; - SDValue Lo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, LoFlags); - - SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + return getAddr(GN, DAG); } } @@ -4232,90 +4420,37 @@ // Jump table entries as PC relative offsets. No additional tweaking // is necessary here. Just get the address of the jump table. JumpTableSDNode *JT = cast(Op); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDLoc DL(Op); if (getTargetMachine().getCodeModel() == CodeModel::Large && !Subtarget->isTargetMachO()) { - const unsigned char MO_NC = AArch64II::MO_NC; - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G3), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G2 | MO_NC), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G1 | MO_NC), - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, - AArch64II::MO_G0 | MO_NC)); + return getAddrLarge(JT, DAG); } - - SDValue Hi = - DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_PAGE); - SDValue Lo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, - AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + return getAddr(JT, DAG); } SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { ConstantPoolSDNode *CP = cast(Op); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDLoc DL(Op); if (getTargetMachine().getCodeModel() == CodeModel::Large) { // Use the GOT for the large code model on iOS. if (Subtarget->isTargetMachO()) { - SDValue GotAddr = DAG.getTargetConstantPool( - CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), - AArch64II::MO_GOT); - return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr); + return getGOT(CP, DAG); } - - const unsigned char MO_NC = AArch64II::MO_NC; - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), AArch64II::MO_G3), - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), AArch64II::MO_G2 | MO_NC), - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), AArch64II::MO_G1 | MO_NC), - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), AArch64II::MO_G0 | MO_NC)); + return getAddrLarge(CP, DAG); } else { - // Use ADRP/ADD or ADRP/LDR for everything else: the small memory model on - // ELF, the only valid one on Darwin. - SDValue Hi = - DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(), - CP->getOffset(), AArch64II::MO_PAGE); - SDValue Lo = DAG.getTargetConstantPool( - CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), - AArch64II::MO_PAGEOFF | AArch64II::MO_NC); - - SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + return getAddr(CP, DAG); } } SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { - const BlockAddress *BA = cast(Op)->getBlockAddress(); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDLoc DL(Op); + BlockAddressSDNode *BA = cast(Op); if (getTargetMachine().getCodeModel() == CodeModel::Large && !Subtarget->isTargetMachO()) { - const unsigned char MO_NC = AArch64II::MO_NC; - return DAG.getNode( - AArch64ISD::WrapperLarge, DL, PtrVT, - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G3), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G2 | MO_NC), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G1 | MO_NC), - DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G0 | MO_NC)); + return getAddrLarge(BA, DAG); } else { - SDValue Hi = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGE); - SDValue Lo = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGEOFF | - AArch64II::MO_NC); - SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi); - return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo); + return getAddr(BA, DAG); } } Index: lib/Target/AArch64/AArch64InstrAtomics.td =================================================================== --- lib/Target/AArch64/AArch64InstrAtomics.td +++ lib/Target/AArch64/AArch64InstrAtomics.td @@ -14,6 +14,9 @@ //===---------------------------------- // Atomic fences //===---------------------------------- +let AddedComplexity = 15, Size = 0 in +def CompilerBarrier : Pseudo<(outs), (ins i32imm:$ordering), + [(atomic_fence imm:$ordering, 0)]>, Sched<[]>; def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>; def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>; Index: lib/Target/AArch64/AArch64InstructionSelector.cpp =================================================================== --- lib/Target/AArch64/AArch64InstructionSelector.cpp +++ lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -41,12 +41,17 @@ namespace { +#define GET_GLOBALISEL_PREDICATE_BITSET +#include "AArch64GenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATE_BITSET + class AArch64InstructionSelector : public InstructionSelector { public: AArch64InstructionSelector(const AArch64TargetMachine &TM, const AArch64Subtarget &STI, const AArch64RegisterBankInfo &RBI); + void beginFunction(const MachineFunction &MF) override; bool select(MachineInstr &I) const override; private: @@ -70,6 +75,12 @@ const AArch64InstrInfo &TII; const AArch64RegisterInfo &TRI; const AArch64RegisterBankInfo &RBI; + bool ForCodeSize; + + PredicateBitset AvailableFeatures; + PredicateBitset + computeAvailableFeatures(const MachineFunction *MF, + const AArch64Subtarget *Subtarget) const; // We declare the temporaries used by selectImpl() in the class to minimize the // cost of constructing placeholder values. @@ -88,7 +99,7 @@ const AArch64TargetMachine &TM, const AArch64Subtarget &STI, const AArch64RegisterBankInfo &RBI) : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI) + TRI(*STI.getRegisterInfo()), RBI(RBI), ForCodeSize(), AvailableFeatures() #define GET_GLOBALISEL_TEMPORARIES_INIT #include "AArch64GenGlobalISel.inc" #undef GET_GLOBALISEL_TEMPORARIES_INIT @@ -567,6 +578,12 @@ return true; } +void AArch64InstructionSelector::beginFunction( + const MachineFunction &MF) { + ForCodeSize = MF.getFunction()->optForSize(); + AvailableFeatures = computeAvailableFeatures(&MF, &STI); +} + bool AArch64InstructionSelector::select(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); Index: lib/Target/AArch64/AArch64SchedFalkor.td =================================================================== --- lib/Target/AArch64/AArch64SchedFalkor.td +++ lib/Target/AArch64/AArch64SchedFalkor.td @@ -79,14 +79,14 @@ def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 3; } -def : WriteRes - { let Latency = 3; let NumMicroOps = 3; } +def : WriteRes + { let Latency = 0; let NumMicroOps = 2; } def : WriteRes { let Latency = 0; let NumMicroOps = 2; } -def : WriteRes { let Latency = 5; } +def : WriteRes { let Latency = 1; } def : WriteRes { let Latency = 5; } -def : WriteRes - { let Latency = 4; let NumMicroOps = 3; } +def : WriteRes + { let Latency = 0; let NumMicroOps = 2; } def : WriteRes { let Latency = 3; let NumMicroOps = 2; } def : WriteRes { let Latency = 2; } Index: lib/Target/AArch64/AArch64SchedFalkorDetails.td =================================================================== --- lib/Target/AArch64/AArch64SchedFalkorDetails.td +++ lib/Target/AArch64/AArch64SchedFalkorDetails.td @@ -265,6 +265,12 @@ // Arithmetic and Logical Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_ADD], (instregex "^ADD(S)?(W|X)r(s|x)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^AND(S)?(W|X)r(i|r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^BIC(S)?(W|X)r(r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EON(W|X)r(r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EOR(W|X)r(i|r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORN(W|X)r(r|s)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(i|r|s)$")>; def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^SUB(S)?(W|X)r(s|x)$")>; // SIMD Miscellaneous Instructions @@ -320,6 +326,10 @@ // SIMD Store Instructions // ----------------------------------------------------------------------------- +def : InstRW<[WriteVST], (instregex "^STP(D|S)(i)$")>; +def : InstRW<[WriteVST, WriteAdr], (instregex "^STP(D|S)(post|pre)$")>; +def : InstRW<[FalkorWr_2XYZ_2ST_2VSD_0cyc], (instregex "^STRQro(W|X)$")>; + def : InstRW<[WriteVST], (instregex "^ST1(One(v8b|v4h|v2s|v1d)(_POST)?|(i8|i16|i32|i64)(_POST)?|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>; def : InstRW<[WriteVST], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>; def : InstRW<[WriteVST, WriteAdr], (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>; @@ -415,6 +425,7 @@ def : InstRW<[FalkorWr_1VXVY_5cyc, FalkorReadFMA],(instregex "^F(N)?M(ADD|SUB)(H|S)rrr$")>; def : InstRW<[FalkorWr_1VXVY_6cyc, FalkorReadFMA],(instregex "^F(N)?M(ADD|SUB)Drrr$")>; + // FP Miscellaneous Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(HW|HX|SW|DX|DXHigh)r$")>; @@ -427,7 +438,6 @@ def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v8i16|v2f64|v4f32|v8f16)(_shift)?")>; - // Load Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>; @@ -455,6 +465,7 @@ def : InstRW<[WriteLD, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>; def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi],(instrs LDPSWi)>; def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi, WriteAdr],(instregex "^LDPSW(post|pre)$")>; + // Miscellaneous Data-Processing Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(S|U)?BFM(W|X)ri$")>; @@ -496,28 +507,30 @@ def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>; def : InstRW<[WriteVST], (instrs STNPDi, STNPSi)>; -def : InstRW<[WriteSTP], (instrs STNPWi, STNPXi)>; +def : InstRW<[WriteSTP], (instrs STNPWi, STNPXi)>; def : InstRW<[FalkorWr_2LD_1Z_3cyc], (instrs ERET)>; -def : InstRW<[WriteST], (instregex "^LDC.*$")>; -def : InstRW<[WriteST], (instregex "^STLR(B|H|W|X)$")>; -def : InstRW<[WriteST], (instregex "^STXP(W|X)$")>; -def : InstRW<[WriteST], (instregex "^STXR(B|H|W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDC.*$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STLR(B|H|W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXP(W|X)$")>; +def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXR(B|H|W|X)$")>; -def : InstRW<[WriteSTX], (instregex "^STLXP(W|X)$")>; -def : InstRW<[WriteSTX], (instregex "^STLXR(B|H|W|X)$")>; +def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXP(W|X)$")>; +def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXR(B|H|W|X)$")>; def : InstRW<[WriteVST, WriteVST], (instrs STNPQi)>; // Store Instructions // ----------------------------------------------------------------------------- -def : InstRW<[WriteVST], (instregex "^STP(D|S)(i|post|pre)$")>; -def : InstRW<[WriteST], (instregex "^STP(W|X)(i|post|pre)$")>; +def : InstRW<[WriteST], (instregex "^STP(W|X)i$")>; +def : InstRW<[WriteST, WriteAdr], (instregex "^STP(W|X)(post|pre)$")>; def : InstRW<[WriteST], (instregex "^STR(Q|D|S|BB|HH)ui$")>; def : InstRW<[WriteST], (instregex "^STUR(Q|D|S|BB|HH)i$")>; -def : InstRW<[WriteST], (instregex "^STR(B|H|W|X)(post|pre|ui)$")>; +def : InstRW<[WriteST], (instregex "^STR(B|H|W|X)ui$")>; +def : InstRW<[WriteST, WriteAdr], (instregex "^STR(B|H|W|X)(post|pre)$")>; def : InstRW<[WriteST], (instregex "^STTR(B|H|W|X)i$")>; def : InstRW<[WriteST], (instregex "^STUR(B|H|W|X)i$")>; def : InstRW<[WriteST, WriteAdr], (instregex "^STR(B|H|W|X)ro(W|X)$")>; -def : InstRW<[WriteVST, WriteVST], (instregex "^STPQ(i|post|pre)$")>; +def : InstRW<[WriteVST, WriteVST], (instregex "^STPQi$")>; +def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^STPQ(post|pre)$")>; Index: lib/Target/AArch64/AArch64SchedFalkorWriteRes.td =================================================================== --- lib/Target/AArch64/AArch64SchedFalkorWriteRes.td +++ lib/Target/AArch64/AArch64SchedFalkorWriteRes.td @@ -28,7 +28,6 @@ //===----------------------------------------------------------------------===// // Define 1 micro-op types - def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; } def FalkorWr_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; } def FalkorWr_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; } @@ -175,18 +174,33 @@ //===----------------------------------------------------------------------===// // Define 3 micro-op types +def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD, + FalkorUnitLD]> { + let Latency = 0; + let NumMicroOps = 3; +} + +def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD, + FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 3; +} + def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { let Latency = 3; let NumMicroOps = 3; } + def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { let Latency = 4; let NumMicroOps = 3; } + def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { let Latency = 5; let NumMicroOps = 3; } + def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { let Latency = 6; let NumMicroOps = 3; @@ -196,10 +210,12 @@ let Latency = 4; let NumMicroOps = 3; } + def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { let Latency = 3; let NumMicroOps = 3; } + def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, FalkorUnitLD]> { let Latency = 3; @@ -259,6 +275,12 @@ let NumMicroOps = 4; } +def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST, + FalkorUnitSD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 4; +} + //===----------------------------------------------------------------------===// // Define 5 micro-op types @@ -289,6 +311,13 @@ let NumMicroOps = 6; } +def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST, + FalkorUnitVSD, FalkorUnitXYZ, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 6; +} + //===----------------------------------------------------------------------===// // Define 8 micro-op types Index: lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp =================================================================== --- lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCRegisterInfo.h" @@ -275,6 +276,12 @@ } } + if (Opcode == AArch64::CompilerBarrier) { + O << '\t' << MAI.getCommentString() << " COMPILER BARRIER"; + printAnnotation(O, Annot); + return; + } + if (!printAliasInstr(MI, STI, O)) printInstruction(MI, STI, O); Index: lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp =================================================================== --- lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -565,6 +565,9 @@ MCFixupKind Fixup = MCFixupKind(AArch64::fixup_aarch64_tlsdesc_call); Fixups.push_back(MCFixup::create(0, MI.getOperand(0).getExpr(), Fixup)); return; + } else if (MI.getOpcode() == AArch64::CompilerBarrier) { + // This just prevents the compiler from reordering accesses, no actual code. + return; } uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI); Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2315,12 +2315,13 @@ SelectionDAG &DAG = DCI.DAG; SDValue Op = Node24->getOperand(OpIdx); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = Op.getValueType(); APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24); APInt KnownZero, KnownOne; TargetLowering::TargetLoweringOpt TLO(DAG, true, true); - if (TLO.SimplifyDemandedBits(Node24, OpIdx, Demanded, DCI)) + if (TLI.SimplifyDemandedBits(Node24, OpIdx, Demanded, DCI, TLO)) return true; return false; @@ -3361,7 +3362,7 @@ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) || + if (TLI.ShrinkDemandedConstant(BitsFrom, Demanded, TLO) || TLI.SimplifyDemandedBits(BitsFrom, Demanded, KnownZero, KnownOne, TLO)) { DCI.CommitTargetLoweringOpt(TLO); Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -4696,7 +4696,7 @@ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(Src, Demanded) || + if (TLI.ShrinkDemandedConstant(Src, Demanded, TLO) || TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) { DCI.CommitTargetLoweringOpt(TLO); } Index: lib/Target/ARM/ARMCallLowering.cpp =================================================================== --- lib/Target/ARM/ARMCallLowering.cpp +++ lib/Target/ARM/ARMCallLowering.cpp @@ -35,7 +35,8 @@ static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI, Type *T) { EVT VT = TLI.getValueType(DL, T, true); - if (!VT.isSimple() || VT.isVector()) + if (!VT.isSimple() || VT.isVector() || + !(VT.isInteger() || VT.isFloatingPoint())) return false; unsigned VTSize = VT.getSimpleVT().getSizeInBits(); Index: lib/Target/ARM/ARMConstantIslandPass.cpp =================================================================== --- lib/Target/ARM/ARMConstantIslandPass.cpp +++ lib/Target/ARM/ARMConstantIslandPass.cpp @@ -1741,10 +1741,9 @@ .add(MI->getOperand(1)); MI->eraseFromParent(); MadeChange = true; - } - if (MI->getOpcode() == ARM::tPUSH && - MI->getOperand(2).getReg() == ARM::LR && - MI->getNumExplicitOperands() == 3) { + } else if (MI->getOpcode() == ARM::tPUSH && + MI->getOperand(2).getReg() == ARM::LR && + MI->getNumExplicitOperands() == 3) { // Just remove the push. MI->eraseFromParent(); MadeChange = true; @@ -2158,6 +2157,15 @@ // If we're in PIC mode, there should be another ADD following. auto *TRI = STI->getRegisterInfo(); + + // %base cannot be redefined after the load as it will appear before + // TBB/TBH like: + // %base = + // %base = + // tBB %base, %idx + if (registerDefinedBetween(BaseReg, Load->getNextNode(), MBB->end(), TRI)) + continue; + if (isPositionIndependentOrROPI) { MachineInstr *Add = Load->getNextNode(); if (Add->getOpcode() != ARM::tADDrr || Index: lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- lib/Target/ARM/ARMISelDAGToDAG.cpp +++ lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -539,11 +539,11 @@ SDValue NewMulConst; if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { HandleSDNode Handle(N); + SDLoc Loc(N); replaceDAGValue(N.getOperand(1), NewMulConst); BaseReg = Handle.getValue(); - Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl, - PowerOfTwo), - SDLoc(N), MVT::i32); + Opc = CurDAG->getTargetConstant( + ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); return true; } } @@ -1854,6 +1854,14 @@ return Opc; // If not one we handle, return it unchanged. } +/// Returns true if the given increment is a Constant known to be equal to the +/// access size performed by a NEON load/store. This means the "[rN]!" form can +/// be used. +static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { + auto C = dyn_cast(Inc); + return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; +} + void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, const uint16_t *DOpcodes, const uint16_t *QOpcodes0, @@ -1921,13 +1929,13 @@ SDValue Inc = N->getOperand(AddrOpIdx + 1); // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0 // case entirely when the rest are updated to that form, too. - if ((NumVecs <= 2) && !isa(Inc.getNode())) + bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); + if ((NumVecs <= 2) && !IsImmUpdate) Opc = getVLDSTRegisterUpdateOpcode(Opc); // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if ((NumVecs > 2 && !isVLDfixed(Opc)) || - !isa(Inc.getNode())) - Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc); + if ((NumVecs > 2 && !isVLDfixed(Opc)) || !IsImmUpdate) + Ops.push_back(IsImmUpdate ? Reg0 : Inc); } Ops.push_back(Pred); Ops.push_back(Reg0); @@ -2075,11 +2083,12 @@ SDValue Inc = N->getOperand(AddrOpIdx + 1); // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0 // case entirely when the rest are updated to that form, too. - if (NumVecs <= 2 && !isa(Inc.getNode())) + bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); + if (NumVecs <= 2 && !IsImmUpdate) Opc = getVLDSTRegisterUpdateOpcode(Opc); // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so // check for that explicitly too. Horribly hacky, but temporary. - if (!isa(Inc.getNode())) + if (!IsImmUpdate) Ops.push_back(Inc); else if (NumVecs > 2 && !isVSTfixed(Opc)) Ops.push_back(Reg0); @@ -2209,7 +2218,9 @@ Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc); + bool IsImmUpdate = + isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); + Ops.push_back(IsImmUpdate ? Reg0 : Inc); } SDValue SuperReg; @@ -2313,9 +2324,11 @@ // fixed-stride update instructions don't have an explicit writeback // operand. It's implicit in the opcode itself. SDValue Inc = N->getOperand(2); - if (NumVecs <= 2 && !isa(Inc.getNode())) + bool IsImmUpdate = + isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); + if (NumVecs <= 2 && !IsImmUpdate) Opc = getVLDSTRegisterUpdateOpcode(Opc); - if (!isa(Inc.getNode())) + if (!IsImmUpdate) Ops.push_back(Inc); // FIXME: VLD3 and VLD4 haven't been updated to that form yet. else if (NumVecs > 2) Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -3358,8 +3358,12 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { - // FIXME: handle "fence singlethread" more efficiently. SDLoc dl(Op); + ConstantSDNode *ScopeN = cast(Op.getOperand(2)); + auto Scope = static_cast(ScopeN->getZExtValue()); + if (Scope == SynchronizationScope::SingleThread) + return Op; + if (!Subtarget->hasDataBarrier()) { // Some ARMv6 cpus can support data barriers with an mcr instruction. // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get @@ -9476,8 +9480,11 @@ return SDValue(); } - // Don't generate vpaddl+vmovn; we'll match it to vpadd later. - if (Vec.getValueType().getVectorElementType() == VT.getVectorElementType()) + // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also don't try + // to handle an i8 -> i32 situation (or similar). vpaddl can only double the + // size. + if (2 * Vec.getValueType().getVectorElementType().getSizeInBits() != + VT.getVectorElementType().getSizeInBits()) return SDValue(); // Create VPADDL node. @@ -10873,11 +10880,8 @@ // If the increment is a constant, it must match the memory ref size. SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); - if (ConstantSDNode *CInc = dyn_cast(Inc.getNode())) { - uint64_t IncVal = CInc->getZExtValue(); - if (IncVal != NumBytes) - continue; - } else if (NumBytes >= 3 * 16) { + ConstantSDNode *CInc = dyn_cast(Inc.getNode()); + if (NumBytes >= 3 * 16 && (!CInc || CInc->getZExtValue() != NumBytes)) { // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two // separate instructions that make it harder to use a non-constant update. continue; Index: lib/Target/ARM/ARMInstrInfo.td =================================================================== --- lib/Target/ARM/ARMInstrInfo.td +++ lib/Target/ARM/ARMInstrInfo.td @@ -5975,3 +5975,10 @@ (ins GPR:$addr, GPRPair:$desired, GPRPair:$new), NoItinerary, []>, Sched<[]>; } + +def CompilerBarrier : PseudoInst<(outs), (ins i32imm:$ordering), NoItinerary, + [(atomic_fence imm:$ordering, 0)]> { + let hasSideEffects = 1; + let Size = 0; + let AsmString = "@ COMPILER BARRIER"; +} Index: lib/Target/ARM/ARMInstrThumb.td =================================================================== --- lib/Target/ARM/ARMInstrThumb.td +++ lib/Target/ARM/ARMInstrThumb.td @@ -953,7 +953,7 @@ /// These opcodes will be converted to the real non-S opcodes by /// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. let hasPostISelHook = 1, Defs = [CPSR] in { - let isCommutable = 1 in + let isCommutable = 1, Uses = [CPSR] in def tADCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), 2, IIC_iALUr, [(set tGPR:$Rdn, CPSR, (ARMadde tGPR:$Rn, tGPR:$Rm, @@ -1292,6 +1292,7 @@ /// These opcodes will be converted to the real non-S opcodes by /// AdjustInstrPostInstrSelection after giving then an optional CPSR operand. let hasPostISelHook = 1, Defs = [CPSR] in { + let Uses = [CPSR] in def tSBCS : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), 2, IIC_iALUr, [(set tGPR:$Rdn, CPSR, (ARMsube tGPR:$Rn, tGPR:$Rm, Index: lib/Target/ARM/ARMInstructionSelector.cpp =================================================================== --- lib/Target/ARM/ARMInstructionSelector.cpp +++ lib/Target/ARM/ARMInstructionSelector.cpp @@ -47,12 +47,9 @@ unsigned SrcReg = I.getOperand(1).getReg(); const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); (void)SrcSize; - assert((DstSize == SrcSize || - // Copies are a means to setup initial types, the number of - // bits may not exactly match. - (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - DstSize <= SrcSize)) && - "Copy with different width?!"); + // We use copies for trunc, so it's ok for the size of the destination to be + // smaller (the higher bits will just be undefined). + assert(DstSize <= SrcSize && "Copy with different width?!"); assert((RegBank->getID() == ARM::GPRRegBankID || RegBank->getID() == ARM::FPRRegBankID) && @@ -294,6 +291,28 @@ } break; } + case G_TRUNC: { + // The high bits are undefined, so there's nothing special to do, just + // treat it as a copy. + auto SrcReg = I.getOperand(1).getReg(); + auto DstReg = I.getOperand(0).getReg(); + + const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); + const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); + + if (SrcRegBank.getID() != DstRegBank.getID()) { + DEBUG(dbgs() << "G_TRUNC operands on different register banks\n"); + return false; + } + + if (SrcRegBank.getID() != ARM::GPRRegBankID) { + DEBUG(dbgs() << "G_TRUNC on non-GPR not supported yet\n"); + return false; + } + + I.setDesc(TII.get(COPY)); + return selectCopy(I, TII, MRI, TRI, RBI); + } case G_ADD: case G_GEP: I.setDesc(TII.get(ARM::ADDrr)); Index: lib/Target/ARM/ARMRegisterBankInfo.cpp =================================================================== --- lib/Target/ARM/ARMRegisterBankInfo.cpp +++ lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -223,6 +223,7 @@ case G_MUL: case G_SEXT: case G_ZEXT: + case G_TRUNC: case G_GEP: // FIXME: We're abusing the fact that everything lives in a GPR for now; in // the real world we would use different mappings. Index: lib/Target/Mips/Relocation.txt =================================================================== --- /dev/null +++ lib/Target/Mips/Relocation.txt @@ -0,0 +1,125 @@ +MIPS Relocation Principles + +In LLVM, there are several elements of the llvm::ISD::NodeType enum +that deal with addresses and/or relocations. These are defined in +include/llvm/Target/TargetSelectionDAG.td, namely: + GlobalAddress, GlobalTLSAddress, JumpTable, ConstantPool, + ExternalSymbol, BlockAddress +The MIPS backend uses several principles to handle these. + +1. Code for lowering addresses references to machine dependent code is +factored into common code for generating different address forms and +is called by the relocation model specific lowering function, using +templated functions. For example: + + // lib/Target/Mips/MipsISelLowering.cpp + SDValue MipsTargetLowering:: + lowerJumpTable(SDValue Op, SelectionDAG &DAG) const + +calls + + template // lib/Target/Mips/MipsISelLowering.h + SDValue getAddrLocal(NodeTy *N, const SDLoc &DL, EVT Ty, + SelectionDAG &DAG, bool IsN32OrN64) const + +which calls the overloaded function: + + // lib/Target/Mips/MipsISelLowering.h + SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG, + unsigned Flag) const; + +2. Generic address nodes are lowered to some combination of target +independent and machine specific SDNodes (for example: +MipsISD::{Highest, Higher, Hi, Lo}) depending upon relocation model, +ABI, and compilation options. + +The choice of specific instructions that are to be used is delegated +to ISel which in turn relies on TableGen patterns to choose subtarget +specific instructions. For example, in getAddrLocal, the pseudo-code +generated is: + + (add (load (wrapper $gp, %got(sym)), %lo(sym)) + +where "%lo" represents an instance of an SDNode with opcode +"MipsISD::Lo", "wrapper" indicates one with opcode "MipsISD::Wrapper", +and "%got" the global table pointer "getGlobalReg(...)". The "add" is +"ISD::ADD", not a target dependent one. + +3. A TableGen multiclass pattern "MipsHiLoRelocs" is used to define a +template pattern parameterized over the load upper immediate +instruction, add operation, the zero register, and register class. +Here the instantiation of MipsHiLoRelocs in MipsInstrInfo.td is used +to MIPS32 to compute addresses for the static relocation model. + + // lib/Target/Mips/MipsInstrInfo.td + multiclass MipsHiLoRelocs { + def : MipsPat<(MipsHi tglobaladdr:$in), (Lui tglobaladdr:$in)>; + ... + def : MipsPat<(MipsLo tglobaladdr:$in), (Addiu ZeroReg, tglobaladdr:$in)>; + ... + def : MipsPat<(add GPROpnd:$hi, (MipsLo tglobaladdr:$lo)), + (Addiu GPROpnd:$hi, tglobaladdr:$lo)>; + ... + } + defm : MipsHiLoRelocs; + + // lib/Target/Mips/Mips64InstrInfo.td + defm : MipsHiLoRelocs, SYM_32; + +The instantiation in Mips64InstrInfo.td is used for MIPS64 in ILP32 +mode, as guarded by the predicate "SYM_32" and also for a submode of +LP64 where symbols are assumed to be 32 bits wide. A similar +multiclass for MIPS64 in LP64 mode is also defined: + + // lib/Target/Mips/Mips64InstrInfo.td + multiclass MipsHighestHigherHiLoRelocs { + ... + def : MipsPat<(MipsHighest (i64 tglobaladdr:$in)), + (Lui tglobaladdr:$in)>; + ... + def : MipsPat<(MipsHigher (i64 tglobaladdr:$in)), + (Daddiu ZERO_64, tglobaladdr:$in)>; + ... + def : MipsPat<(add GPR64:$hi, (MipsHigher (i64 tglobaladdr:$lo))), + (Daddiu GPR64:$hi, tglobaladdr:$lo)>; + ... + def : MipsPat<(add GPR64:$hi, (MipsHi (i64 tglobaladdr:$lo))), + (Daddiu GPR64:$hi, tglobaladdr:$lo)>; + ... + def : MipsPat<(add GPR64:$hi, (MipsLo (i64 tglobaladdr:$lo))), + (Daddiu GPR64:$hi, tglobaladdr:$lo)>; + } + +and it is instantiated twice: + + // lib/Target/Mips/Mips64InstrInfo.td + defm : MipsHighestHigherHiLoRelocs, SYM_64; + // lib/Target/Mips/MicroMips64r6InstrInfo.td + defm : MipsHighestHigherHiLoRelocs, SYM_64, + ISA_MICROMIPS64R6; + +These patterns are used during instruction selection to match +MipsISD::{Highest, Higher, Hi, Lo} to a specific machine instruction +and operands. + +More details on how multiclasses in TableGen work can be found in the +section "Multiclass definitions and instances" in the document +"TableGen Language Introduction" + +4. Instruction definitions are multiply defined to cover the different +register classes. In some cases, such as LW/LW64, this also accounts +for the difference in the results of instruction execution. On MIPS32, +"lw" loads a 32 bit value from memory. On MIPS64, "lw" loads a 32 bit +value from memory and sign extends the value to 64 bits. + + // lib/Target/Mips/MipsInstrInfo.td + def LUi : MMRel, LoadUpper<"lui", GPR32Opnd, uimm16_relaxed>, LUI_FM; + // lib/Target/Mips/Mips64InstrInfo.td + def LUi64 : LoadUpper<"lui", GPR64Opnd, uimm16_64_relaxed>, LUI_FM; + +defines two names "LUi" and "LUi64" with two different register +classes, but with the same encoding---"LUI_FM". These instructions load a +16-bit immediate into bits 31-16 and clear the lower 15 bits. On MIPS64, +the result is sign-extended to 64 bits. Index: lib/Target/PowerPC/PPCISelDAGToDAG.cpp =================================================================== --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -2977,10 +2977,10 @@ SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) { SDValue Chain = LD->getChain(); SDValue Ops[] = { Base, Offset, Chain }; - SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX, - N->getValueType(0), Ops); MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); MemOp[0] = LD->getMemOperand(); + SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX, + N->getValueType(0), Ops); cast(NewN)->setMemRefs(MemOp, MemOp + 1); return; } Index: lib/Target/WebAssembly/known_gcc_test_failures.txt =================================================================== --- lib/Target/WebAssembly/known_gcc_test_failures.txt +++ lib/Target/WebAssembly/known_gcc_test_failures.txt @@ -1,5 +1,15 @@ # Tests which are known to fail from the GCC torture test suite. +# Syntax: Each line has a single test to be marked as a 'known failure' (or +# 'exclusion'. Known failures are expected to fail, and will cause an error if +# they pass. (Known failures that do not run at all will not cause an +# error). The format is +# # comment +# +# The attributes in this case represent the different arguments used to +# compiler: 'wasm-s' is for compiling to .s files, and 'wasm-o' for compiling +# to wasm object files (.o). + # Computed gotos are not supported (Cannot select BlockAddress/BRIND) 20040302-1.c 20071210-1.c @@ -66,3 +76,21 @@ 920728-1.c pr28865.c widechar-2.c + +# crash: Running pass 'WebAssembly Explicit Locals' on function +20020107-1.c wasm-o +20030222-1.c wasm-o +20071220-1.c wasm-o +20071220-2.c wasm-o +990130-1.c wasm-o +pr38533.c wasm-o +pr41239.c wasm-o +pr43385.c wasm-o +pr43560.c wasm-o +pr45695.c wasm-o +pr49279.c wasm-o +pr49390.c wasm-o +pr52286.c wasm-o + +# fatal error: error in backend: data symbols must have a size set with .size +921110-1.c wasm-o Index: lib/Target/X86/X86.h =================================================================== --- lib/Target/X86/X86.h +++ lib/Target/X86/X86.h @@ -95,7 +95,8 @@ /// encoding when possible in order to reduce code size. FunctionPass *createX86EvexToVexInsts(); -InstructionSelector *createX86InstructionSelector(X86Subtarget &, +InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM, + X86Subtarget &, X86RegisterBankInfo &); void initializeEvexToVexInstPassPass(PassRegistry &); Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -273,6 +273,16 @@ "fast-shld-rotate", "HasFastSHLDRotate", "true", "SHLD can be used as a faster rotate">; +// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka +// "string operations"). See "REP String Enhancement" in the Intel Software +// Development Manual. This feature essentially means that REP MOVSB will copy +// using the largest available size instead of copying bytes one by one, making +// it at least as fast as REPMOVS{W,D,Q}. +def FeatureERMSB + : SubtargetFeature< + "ermsb", "HasERMSB", "true", + "REP MOVS/STOS are fast">; + //===----------------------------------------------------------------------===// // X86 processors supported. //===----------------------------------------------------------------------===// @@ -498,6 +508,7 @@ FeatureAVX2, FeatureBMI, FeatureBMI2, + FeatureERMSB, FeatureFMA, FeatureLZCNT, FeatureMOVBE, Index: lib/Target/X86/X86FrameLowering.h =================================================================== --- lib/Target/X86/X86FrameLowering.h +++ lib/Target/X86/X86FrameLowering.h @@ -100,6 +100,8 @@ int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override; + int getFrameIndexReferenceSP(const MachineFunction &MF, + int FI, unsigned &SPReg, int Adjustment) const; int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, unsigned &FrameReg, bool IgnoreSPUpdates) const override; Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -1783,6 +1783,14 @@ return Offset + FPDelta; } +int X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF, + int FI, unsigned &FrameReg, + int Adjustment) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + FrameReg = TRI->getStackRegister(); + return MFI.getObjectOffset(FI) - getOffsetOfLocalArea() + Adjustment; +} + int X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, unsigned &FrameReg, @@ -1839,9 +1847,6 @@ assert(MF.getInfo()->getTCReturnAddrDelta() >= 0 && "we don't handle this case!"); - // Fill in FrameReg output argument. - FrameReg = TRI->getStackRegister(); - // This is how the math works out: // // %rsp grows (i.e. gets lower) left to right. Each box below is @@ -1866,12 +1871,8 @@ // (C - E) == (C - A) - (B - A) + (B - E) // { Using [1], [2] and [3] above } // == getObjectOffset - LocalAreaOffset + StackSize - // - - // Get the Offset from the StackPointer - int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea(); - return Offset + StackSize; + return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize); } bool X86FrameLowering::assignCalleeSavedSpillSlots( Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1311,8 +1311,9 @@ ++Cost; // If the base is a register with multiple uses, this // transformation may save a mov. - if ((AM.BaseType == X86ISelAddressMode::RegBase && - AM.Base_Reg.getNode() && + // FIXME: Don't rely on DELETED_NODEs. + if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() && + AM.Base_Reg->getOpcode() != ISD::DELETED_NODE && !AM.Base_Reg.getNode()->hasOneUse()) || AM.BaseType == X86ISelAddressMode::FrameIndexBase) --Cost; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -16069,7 +16069,7 @@ unsigned EltBits = EltVT.getSizeInBits(); // For FABS, mask is 0x7f...; for FNEG, mask is 0x80... APInt MaskElt = - IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignBit(EltBits); + IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignMask(EltBits); const fltSemantics &Sem = EltVT == MVT::f64 ? APFloat::IEEEdouble() : (IsF128 ? APFloat::IEEEquad() : APFloat::IEEEsingle()); @@ -16132,9 +16132,9 @@ // The mask constants are automatically splatted for vector types. unsigned EltSizeInBits = VT.getScalarSizeInBits(); SDValue SignMask = DAG.getConstantFP( - APFloat(Sem, APInt::getSignBit(EltSizeInBits)), dl, LogicVT); + APFloat(Sem, APInt::getSignMask(EltSizeInBits)), dl, LogicVT); SDValue MagMask = DAG.getConstantFP( - APFloat(Sem, ~APInt::getSignBit(EltSizeInBits)), dl, LogicVT); + APFloat(Sem, ~APInt::getSignMask(EltSizeInBits)), dl, LogicVT); // First, clear all bits but the sign bit from the second operand (sign). if (IsFakeVector) @@ -17344,10 +17344,10 @@ // bits of the inputs before performing those operations. if (FlipSigns) { MVT EltVT = VT.getVectorElementType(); - SDValue SB = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), dl, + SDValue SM = DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl, VT); - Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SB); - Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SB); + Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SM); + Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SM); } SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); @@ -22111,11 +22111,11 @@ } // i64 vector arithmetic shift can be emulated with the transform: - // M = lshr(SIGN_BIT, Amt) + // M = lshr(SIGN_MASK, Amt) // ashr(R, Amt) === sub(xor(lshr(R, Amt), M), M) if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) && Op.getOpcode() == ISD::SRA) { - SDValue S = DAG.getConstant(APInt::getSignBit(64), dl, VT); + SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT); SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt); R = DAG.getNode(ISD::SRL, dl, VT, R, Amt); R = DAG.getNode(ISD::XOR, dl, VT, R, M); @@ -30152,7 +30152,7 @@ // x s< 0 ? x^C : 0 --> subus x, C if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR && ISD::isBuildVectorAllZeros(CondRHS.getNode()) && - OpRHSConst->getAPIntValue().isSignBit()) + OpRHSConst->getAPIntValue().isSignMask()) // Note that we have to rebuild the RHS constant here to ensure we // don't rely on particular values of undef lanes. return DAG.getNode( @@ -30203,11 +30203,11 @@ return SDValue(); assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size"); - APInt DemandedMask(APInt::getSignBit(BitWidth)); + APInt DemandedMask(APInt::getSignMask(BitWidth)); APInt KnownZero, KnownOne; TargetLowering::TargetLoweringOpt TLO(DAG, DCI.isBeforeLegalize(), DCI.isBeforeLegalizeOps()); - if (TLO.ShrinkDemandedConstant(Cond, DemandedMask) || + if (TLI.ShrinkDemandedConstant(Cond, DemandedMask, TLO) || TLI.SimplifyDemandedBits(Cond, DemandedMask, KnownZero, KnownOne, TLO)) { // If we changed the computation somewhere in the DAG, this change will @@ -33428,8 +33428,8 @@ SDValue Op0 = peekThroughBitcasts(Op.getOperand(0)); unsigned EltBits = Op1.getScalarValueSizeInBits(); - auto isSignBitValue = [&](const ConstantFP *C) { - return C->getValueAPF().bitcastToAPInt() == APInt::getSignBit(EltBits); + auto isSignMask = [&](const ConstantFP *C) { + return C->getValueAPF().bitcastToAPInt() == APInt::getSignMask(EltBits); }; // There is more than one way to represent the same constant on @@ -33440,21 +33440,21 @@ // We check all variants here. if (Op1.getOpcode() == X86ISD::VBROADCAST) { if (auto *C = getTargetConstantFromNode(Op1.getOperand(0))) - if (isSignBitValue(cast(C))) + if (isSignMask(cast(C))) return Op0; } else if (BuildVectorSDNode *BV = dyn_cast(Op1)) { if (ConstantFPSDNode *CN = BV->getConstantFPSplatNode()) - if (isSignBitValue(CN->getConstantFPValue())) + if (isSignMask(CN->getConstantFPValue())) return Op0; } else if (auto *C = getTargetConstantFromNode(Op1)) { if (C->getType()->isVectorTy()) { if (auto *SplatV = C->getSplatValue()) - if (isSignBitValue(cast(SplatV))) + if (isSignMask(cast(SplatV))) return Op0; } else if (auto *FPConst = dyn_cast(C)) - if (isSignBitValue(FPConst)) + if (isSignMask(FPConst)) return Op0; } return SDValue(); @@ -33777,7 +33777,7 @@ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) || + if (TLI.ShrinkDemandedConstant(Op1, DemandedMask, TLO) || TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO)) DCI.CommitTargetLoweringOpt(TLO); } Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -897,6 +897,7 @@ def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">; def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">; def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">; +def HasERMSB : Predicate<"Subtarget->hasERMSB()">; def HasMFence : Predicate<"Subtarget->hasMFence()">; //===----------------------------------------------------------------------===// Index: lib/Target/X86/X86InstructionSelector.cpp =================================================================== --- lib/Target/X86/X86InstructionSelector.cpp +++ lib/Target/X86/X86InstructionSelector.cpp @@ -39,11 +39,16 @@ namespace { +#define GET_GLOBALISEL_PREDICATE_BITSET +#include "X86GenGlobalISel.inc" +#undef GET_GLOBALISEL_PREDICATE_BITSET + class X86InstructionSelector : public InstructionSelector { public: - X86InstructionSelector(const X86Subtarget &STI, + X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI, const X86RegisterBankInfo &RBI); + void beginFunction(const MachineFunction &MF) override; bool select(MachineInstr &I) const override; private: @@ -70,10 +75,17 @@ bool selectTrunc(MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const; + const X86TargetMachine &TM; const X86Subtarget &STI; const X86InstrInfo &TII; const X86RegisterInfo &TRI; const X86RegisterBankInfo &RBI; + bool OptForSize; + bool OptForMinSize; + + PredicateBitset AvailableFeatures; + PredicateBitset computeAvailableFeatures(const MachineFunction *MF, + const X86Subtarget *Subtarget) const; #define GET_GLOBALISEL_TEMPORARIES_DECL #include "X86GenGlobalISel.inc" @@ -86,10 +98,12 @@ #include "X86GenGlobalISel.inc" #undef GET_GLOBALISEL_IMPL -X86InstructionSelector::X86InstructionSelector(const X86Subtarget &STI, +X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM, + const X86Subtarget &STI, const X86RegisterBankInfo &RBI) - : InstructionSelector(), STI(STI), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI) + : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), + TRI(*STI.getRegisterInfo()), RBI(RBI), OptForSize(false), + OptForMinSize(false), AvailableFeatures() #define GET_GLOBALISEL_TEMPORARIES_INIT #include "X86GenGlobalISel.inc" #undef GET_GLOBALISEL_TEMPORARIES_INIT @@ -181,6 +195,12 @@ return true; } +void X86InstructionSelector::beginFunction(const MachineFunction &MF) { + OptForSize = MF.getFunction()->optForSize(); + OptForMinSize = MF.getFunction()->optForMinSize(); + AvailableFeatures = computeAvailableFeatures(&MF, &STI); +} + bool X86InstructionSelector::select(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -571,7 +591,8 @@ } InstructionSelector * -llvm::createX86InstructionSelector(X86Subtarget &Subtarget, +llvm::createX86InstructionSelector(const X86TargetMachine &TM, + X86Subtarget &Subtarget, X86RegisterBankInfo &RBI) { - return new X86InstructionSelector(Subtarget, RBI); + return new X86InstructionSelector(TM, Subtarget, RBI); } Index: lib/Target/X86/X86RegisterInfo.cpp =================================================================== --- lib/Target/X86/X86RegisterInfo.cpp +++ lib/Target/X86/X86RegisterInfo.cpp @@ -669,32 +669,27 @@ MachineFunction &MF = *MI.getParent()->getParent(); const X86FrameLowering *TFI = getFrameLowering(MF); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); - unsigned BasePtr; - unsigned Opc = MI.getOpcode(); - bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm || - Opc == X86::TCRETURNmi || Opc == X86::TCRETURNmi64; - - if (hasBasePointer(MF)) - BasePtr = (FrameIndex < 0 ? FramePtr : getBaseRegister()); - else if (needsStackRealignment(MF)) - BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr); - else if (AfterFPPop) - BasePtr = StackPtr; - else - BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr); + // Determine base register and offset. + int FIOffset; + unsigned BasePtr; + if (MI.isReturn()) { + assert(MF.getFrameInfo().isFixedObjectIndex(FrameIndex) && + "Should only reference fixed stack objects here"); + FIOffset = TFI->getFrameIndexReferenceSP(MF, FrameIndex, BasePtr, 0); + } else { + FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, BasePtr); + } // LOCAL_ESCAPE uses a single offset, with no register. It only works in the // simple FP case, and doesn't work with stack realignment. On 32-bit, the // offset is from the traditional base pointer location. On 64-bit, the // offset is from the SP at the end of the prologue, not the FP location. This // matches the behavior of llvm.frameaddress. - unsigned IgnoredFrameReg; + unsigned Opc = MI.getOpcode(); if (Opc == TargetOpcode::LOCAL_ESCAPE) { MachineOperand &FI = MI.getOperand(FIOperandNum); - int Offset; - Offset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg); - FI.ChangeToImmediate(Offset); + FI.ChangeToImmediate(FIOffset); return; } @@ -710,15 +705,6 @@ // FrameIndex with base register. Add an offset to the offset. MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false); - // Now add the frame object offset to the offset from EBP. - int FIOffset; - if (AfterFPPop) { - // Tail call jmp happens after FP is popped. - const MachineFrameInfo &MFI = MF.getFrameInfo(); - FIOffset = MFI.getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea(); - } else - FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg); - if (BasePtr == StackPtr) FIOffset += SPAdj; Index: lib/Target/X86/X86SelectionDAGInfo.cpp =================================================================== --- lib/Target/X86/X86SelectionDAGInfo.cpp +++ lib/Target/X86/X86SelectionDAGInfo.cpp @@ -106,7 +106,6 @@ SDValue Count; ConstantSDNode *ValC = dyn_cast(Src); unsigned BytesLeft = 0; - bool TwoRepStos = false; if (ValC) { unsigned ValReg; uint64_t Val = ValC->getZExtValue() & 255; @@ -163,20 +162,7 @@ SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); - if (TwoRepStos) { - InFlag = Chain.getValue(1); - Count = Size; - EVT CVT = Count.getValueType(); - SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, - DAG.getConstant((AVT == MVT::i64) ? 7 : 3, dl, - CVT)); - Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : X86::ECX, - Left, InFlag); - InFlag = Chain.getValue(1); - Tys = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; - Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops); - } else if (BytesLeft) { + if (BytesLeft) { // Handle the last 1 - 7 bytes. unsigned Offset = SizeVal - BytesLeft; EVT AddrVT = Dst.getValueType(); @@ -195,6 +181,24 @@ return Chain; } +namespace { + +// Represents a cover of a buffer of SizeVal bytes with blocks of size +// AVT, as well as how many bytes remain (BytesLeft is always smaller than +// the block size). +struct RepMovsRepeats { + RepMovsRepeats(const uint64_t SizeVal, const MVT& AVT) { + const unsigned UBytes = AVT.getSizeInBits() / 8; + Count = SizeVal / UBytes; + BytesLeft = SizeVal % UBytes; + } + + unsigned Count; + unsigned BytesLeft; +}; + +} // namespace + SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline, @@ -229,7 +233,12 @@ return SDValue(); MVT AVT; - if (Align & 1) + if (Subtarget.hasERMSB()) + // If the target has enhanced REPMOVSB, then it's at least as fast to use + // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle + // BytesLeft. + AVT = MVT::i8; + else if (Align & 1) AVT = MVT::i8; else if (Align & 2) AVT = MVT::i16; @@ -240,14 +249,18 @@ // QWORD aligned AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; - unsigned UBytes = AVT.getSizeInBits() / 8; - unsigned CountVal = SizeVal / UBytes; - SDValue Count = DAG.getIntPtrConstant(CountVal, dl); - unsigned BytesLeft = SizeVal % UBytes; + RepMovsRepeats Repeats(SizeVal, AVT); + if (Repeats.BytesLeft > 0 && + DAG.getMachineFunction().getFunction()->optForMinSize()) { + // When agressively optimizing for size, avoid generating the code to handle + // BytesLeft. + AVT = MVT::i8; + Repeats = RepMovsRepeats(SizeVal, AVT); + } SDValue InFlag; Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX, - Count, InFlag); + DAG.getIntPtrConstant(Repeats.Count, dl), InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); @@ -262,9 +275,9 @@ SmallVector Results; Results.push_back(RepMovs); - if (BytesLeft) { + if (Repeats.BytesLeft) { // Handle the last 1 - 7 bytes. - unsigned Offset = SizeVal - BytesLeft; + unsigned Offset = SizeVal - Repeats.BytesLeft; EVT DstVT = Dst.getValueType(); EVT SrcVT = Src.getValueType(); EVT SizeVT = Size.getValueType(); @@ -275,7 +288,8 @@ DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)), - DAG.getConstant(BytesLeft, dl, SizeVT), + DAG.getConstant(Repeats.BytesLeft, dl, + SizeVT), Align, isVolatile, AlwaysInline, false, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -232,6 +232,9 @@ /// True if SHLD based rotate is fast. bool HasFastSHLDRotate; + /// True if the processor has enhanced REP MOVSB/STOSB. + bool HasERMSB; + /// True if the short functions should be padded to prevent /// a stall when returning too early. bool PadShortFunctions; @@ -472,6 +475,7 @@ bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; } bool hasFastLZCNT() const { return HasFastLZCNT; } bool hasFastSHLDRotate() const { return HasFastSHLDRotate; } + bool hasERMSB() const { return HasERMSB; } bool hasSlowDivide32() const { return HasSlowDivide32; } bool hasSlowDivide64() const { return HasSlowDivide64; } bool padShortFunctions() const { return PadShortFunctions; } Index: lib/Target/X86/X86Subtarget.cpp =================================================================== --- lib/Target/X86/X86Subtarget.cpp +++ lib/Target/X86/X86Subtarget.cpp @@ -303,6 +303,7 @@ HasFastVectorFSQRT = false; HasFastLZCNT = false; HasFastSHLDRotate = false; + HasERMSB = false; HasSlowDivide32 = false; HasSlowDivide64 = false; PadShortFunctions = false; Index: lib/Target/X86/X86TargetMachine.cpp =================================================================== --- lib/Target/X86/X86TargetMachine.cpp +++ lib/Target/X86/X86TargetMachine.cpp @@ -286,7 +286,7 @@ auto *RBI = new X86RegisterBankInfo(*I->getRegisterInfo()); GISel->RegBankInfo.reset(RBI); - GISel->InstSelector.reset(createX86InstructionSelector(*I, *RBI)); + GISel->InstSelector.reset(createX86InstructionSelector(*this, *I, *RBI)); #endif I->setGISelAccessor(*GISel); } Index: lib/Target/XCore/XCoreISelLowering.cpp =================================================================== --- lib/Target/XCore/XCoreISelLowering.cpp +++ lib/Target/XCore/XCoreISelLowering.cpp @@ -1605,7 +1605,7 @@ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(OutVal, DemandedMask) || + if (TLI.ShrinkDemandedConstant(OutVal, DemandedMask, TLO) || TLI.SimplifyDemandedBits(OutVal, DemandedMask, KnownZero, KnownOne, TLO)) DCI.CommitTargetLoweringOpt(TLO); @@ -1622,7 +1622,7 @@ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLO.ShrinkDemandedConstant(Time, DemandedMask) || + if (TLI.ShrinkDemandedConstant(Time, DemandedMask, TLO) || TLI.SimplifyDemandedBits(Time, DemandedMask, KnownZero, KnownOne, TLO)) DCI.CommitTargetLoweringOpt(TLO); Index: lib/Transforms/InstCombine/InstCombineAddSub.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1044,14 +1044,14 @@ const APInt *RHSC; if (match(RHS, m_APInt(RHSC))) { - if (RHSC->isSignBit()) { + if (RHSC->isSignMask()) { // If wrapping is not allowed, then the addition must set the sign bit: - // X + (signbit) --> X | signbit + // X + (signmask) --> X | signmask if (I.hasNoSignedWrap() || I.hasNoUnsignedWrap()) return BinaryOperator::CreateOr(LHS, RHS); // If wrapping is allowed, then the addition flips the sign bit of LHS: - // X + (signbit) --> X ^ signbit + // X + (signmask) --> X ^ signmask return BinaryOperator::CreateXor(LHS, RHS); } @@ -1120,9 +1120,9 @@ return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), XorLHS); } - // (X + signbit) + C could have gotten canonicalized to (X ^ signbit) + C, - // transform them into (X + (signbit ^ C)) - if (XorRHS->getValue().isSignBit()) + // (X + signmask) + C could have gotten canonicalized to (X^signmask) + C, + // transform them into (X + (signmask ^ C)) + if (XorRHS->getValue().isSignMask()) return BinaryOperator::CreateAdd(XorLHS, ConstantExpr::getXor(XorRHS, CI)); } @@ -1385,39 +1385,55 @@ // integer add followed by a promotion. if (SIToFPInst *LHSConv = dyn_cast(LHS)) { Value *LHSIntVal = LHSConv->getOperand(0); + Type *FPType = LHSConv->getType(); + + // TODO: This check is overly conservative. In many cases known bits + // analysis can tell us that the result of the addition has less significant + // bits than the integer type can hold. + auto IsValidPromotion = [](Type *FTy, Type *ITy) { + // Do we have enough bits in the significand to represent the result of + // the integer addition? + unsigned MaxRepresentableBits = + APFloat::semanticsPrecision(FTy->getFltSemantics()); + return ITy->getIntegerBitWidth() <= MaxRepresentableBits; + }; // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) // ... if the constant fits in the integer value. This is useful for things // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer // requires a constant pool load, and generally allows the add to be better // instcombined. - if (ConstantFP *CFP = dyn_cast(RHS)) { - Constant *CI = - ConstantExpr::getFPToSI(CFP, LHSIntVal->getType()); - if (LHSConv->hasOneUse() && - ConstantExpr::getSIToFP(CI, I.getType()) == CFP && - WillNotOverflowSignedAdd(LHSIntVal, CI, I)) { - // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, - CI, "addconv"); - return new SIToFPInst(NewAdd, I.getType()); + if (ConstantFP *CFP = dyn_cast(RHS)) + if (IsValidPromotion(FPType, LHSIntVal->getType())) { + Constant *CI = + ConstantExpr::getFPToSI(CFP, LHSIntVal->getType()); + if (LHSConv->hasOneUse() && + ConstantExpr::getSIToFP(CI, I.getType()) == CFP && + WillNotOverflowSignedAdd(LHSIntVal, CI, I)) { + // Insert the new integer add. + Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, + CI, "addconv"); + return new SIToFPInst(NewAdd, I.getType()); + } } - } // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) if (SIToFPInst *RHSConv = dyn_cast(RHS)) { Value *RHSIntVal = RHSConv->getOperand(0); - - // Only do this if x/y have the same type, if at least one of them has a - // single use (so we don't increase the number of int->fp conversions), - // and if the integer add will not overflow. - if (LHSIntVal->getType() == RHSIntVal->getType() && - (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && - WillNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) { - // Insert the new integer add. - Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, - RHSIntVal, "addconv"); - return new SIToFPInst(NewAdd, I.getType()); + // It's enough to check LHS types only because we require int types to + // be the same for this transform. + if (IsValidPromotion(FPType, LHSIntVal->getType())) { + // Only do this if x/y have the same type, if at least one of them has a + // single use (so we don't increase the number of int->fp conversions), + // and if the integer add will not overflow. + if (LHSIntVal->getType() == RHSIntVal->getType() && + (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && + WillNotOverflowSignedAdd(LHSIntVal, RHSIntVal, I)) { + // Insert the new integer add. + Value *NewAdd = Builder->CreateNSWAdd(LHSIntVal, + RHSIntVal, "addconv"); + return new SIToFPInst(NewAdd, I.getType()); + } } } } Index: lib/Transforms/InstCombine/InstCombineAndOrXor.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2480,8 +2480,8 @@ Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI); return BinaryOperator::CreateSub(SubOne(NegOp0CI), Op0I->getOperand(0)); - } else if (RHSC->getValue().isSignBit()) { - // (X + C) ^ signbit -> (X + C + signbit) + } else if (RHSC->getValue().isSignMask()) { + // (X + C) ^ signmask -> (X + C + signmask) Constant *C = Builder->getInt(RHSC->getValue() + Op0CI->getValue()); return BinaryOperator::CreateAdd(Op0I->getOperand(0), C); Index: lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCompares.cpp +++ lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -140,7 +140,7 @@ case ICmpInst::ICMP_UGE: // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc) TrueIfSigned = true; - return RHS.isSignBit(); + return RHS.isSignMask(); default: return false; } @@ -1532,14 +1532,14 @@ } if (Xor->hasOneUse()) { - // (icmp u/s (xor X SignBit), C) -> (icmp s/u X, (xor C SignBit)) - if (!Cmp.isEquality() && XorC->isSignBit()) { + // (icmp u/s (xor X SignMask), C) -> (icmp s/u X, (xor C SignMask)) + if (!Cmp.isEquality() && XorC->isSignMask()) { Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate() : Cmp.getSignedPredicate(); return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), *C ^ *XorC)); } - // (icmp u/s (xor X ~SignBit), C) -> (icmp s/u X, (xor C ~SignBit)) + // (icmp u/s (xor X ~SignMask), C) -> (icmp s/u X, (xor C ~SignMask)) if (!Cmp.isEquality() && XorC->isMaxSignedValue()) { Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate() : Cmp.getSignedPredicate(); @@ -2402,9 +2402,9 @@ const APInt &Upper = CR.getUpper(); const APInt &Lower = CR.getLower(); if (Cmp.isSigned()) { - if (Lower.isSignBit()) + if (Lower.isSignMask()) return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, Upper)); - if (Upper.isSignBit()) + if (Upper.isSignMask()) return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, Lower)); } else { if (Lower.isMinValue()) @@ -2604,7 +2604,7 @@ break; // Replace (and X, (1 << size(X)-1) != 0) with x s< 0 - if (BOC->isSignBit()) { + if (BOC->isSignMask()) { Constant *Zero = Constant::getNullValue(BOp0->getType()); auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE; return new ICmpInst(NewPred, BOp0, Zero); @@ -3032,9 +3032,9 @@ if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b return new ICmpInst(I.getPredicate(), BO0->getOperand(0), BO1->getOperand(0)); - // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b + // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b if (ConstantInt *CI = dyn_cast(BO0->getOperand(1))) { - if (CI->getValue().isSignBit()) { + if (CI->getValue().isSignMask()) { ICmpInst::Predicate Pred = I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate(); return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); @@ -3797,7 +3797,7 @@ static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth, bool isSignCheck) { if (isSignCheck) - return APInt::getSignBit(BitWidth); + return APInt::getSignMask(BitWidth); ConstantInt *CI = dyn_cast(I.getOperand(1)); if (!CI) return APInt::getAllOnesValue(BitWidth); Index: lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- lib/Transforms/InstCombine/InstCombineInternal.h +++ lib/Transforms/InstCombine/InstCombineInternal.h @@ -551,9 +551,10 @@ unsigned Depth, Instruction *CxtI); /// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded /// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence. - Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl, - const APInt &DemandedMask, APInt &KnownZero, - APInt &KnownOne); + Value *simplifyShrShlDemandedBits( + Instruction *Shr, const APInt &ShrOp1, Instruction *Shl, + const APInt &ShlOp1, const APInt &DemandedMask, APInt &KnownZero, + APInt &KnownOne); /// \brief Tries to simplify operands to an integer instruction based on its /// demanded bits. Index: lib/Transforms/InstCombine/InstCombineMulDivRem.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -1237,7 +1237,7 @@ // If the sign bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a udiv. - APInt Mask(APInt::getSignBit(I.getType()->getScalarSizeInBits())); + APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits())); if (MaskedValueIsZero(Op0, Mask, 0, &I)) { if (MaskedValueIsZero(Op1, Mask, 0, &I)) { // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set @@ -1543,7 +1543,7 @@ // If the sign bits of both operands are zero (i.e. we can prove they are // unsigned inputs), turn this into a urem. - APInt Mask(APInt::getSignBit(I.getType()->getScalarSizeInBits())); + APInt Mask(APInt::getSignMask(I.getType()->getScalarSizeInBits())); if (MaskedValueIsZero(Op1, Mask, 0, &I) && MaskedValueIsZero(Op0, Mask, 0, &I)) { // X srem Y -> X urem Y, iff X and Y don't have sign bit set Index: lib/Transforms/InstCombine/InstCombineSelect.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineSelect.cpp +++ lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -618,7 +618,7 @@ { unsigned BitWidth = DL.getTypeSizeInBits(TrueVal->getType()->getScalarType()); - APInt MinSignedValue = APInt::getSignBit(BitWidth); + APInt MinSignedValue = APInt::getSignedMinValue(BitWidth); Value *X; const APInt *Y, *C; bool TrueWhenUnset; Index: lib/Transforms/InstCombine/InstCombineShifts.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineShifts.cpp +++ lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -760,7 +760,7 @@ } // See if we can turn a signed shr into an unsigned shr. - if (MaskedValueIsZero(Op0, APInt::getSignBit(BitWidth), 0, &I)) + if (MaskedValueIsZero(Op0, APInt::getSignMask(BitWidth), 0, &I)) return BinaryOperator::CreateLShr(Op0, Op1); return nullptr; Index: lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -26,7 +26,7 @@ /// constant integer. If so, check to see if there are any bits set in the /// constant that are not demanded. If so, shrink the constant and return true. static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, - APInt Demanded) { + const APInt &Demanded) { assert(I && "No instruction?"); assert(OpNo < I->getNumOperands() && "Operand index too large"); @@ -37,13 +37,11 @@ return false; // If there are no bits set that aren't demanded, nothing to do. - Demanded = Demanded.zextOrTrunc(C->getBitWidth()); - if ((~Demanded & *C) == 0) + if (C->isSubsetOf(Demanded)) return false; // This instruction is producing bits that are not demanded. Shrink the RHS. - Demanded &= *C; - I->setOperand(OpNo, ConstantInt::get(Op->getType(), Demanded)); + I->setOperand(OpNo, ConstantInt::get(Op->getType(), *C & Demanded)); return true; } @@ -117,27 +115,16 @@ KnownOne.getBitWidth() == BitWidth && "Value *V, DemandedMask, KnownZero and KnownOne " "must have same BitWidth"); - const APInt *C; - if (match(V, m_APInt(C))) { - // We know all of the bits for a scalar constant or a splat vector constant! - KnownOne = *C; - KnownZero = ~KnownOne; - return nullptr; - } - if (isa(V)) { - // We know all of the bits for a constant! - KnownOne.clearAllBits(); - KnownZero.setAllBits(); + + if (isa(V)) { + computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); return nullptr; } KnownZero.clearAllBits(); KnownOne.clearAllBits(); - if (DemandedMask == 0) { // Not demanding any bits from V. - if (isa(V)) - return nullptr; + if (DemandedMask == 0) // Not demanding any bits from V. return UndefValue::get(VTy); - } if (Depth == 6) // Limit search depth. return nullptr; @@ -187,16 +174,14 @@ // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) return Constant::getIntegerValue(VTy, IKnownOne); // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and'. - if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == - (DemandedMask & ~LHSKnownZero)) + if (DemandedMask.isSubsetOf(LHSKnownZero | RHSKnownOne)) return I->getOperand(0); - if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == - (DemandedMask & ~RHSKnownZero)) + if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownOne)) return I->getOperand(1); // If the RHS is a constant, see if we can simplify it. @@ -224,25 +209,14 @@ // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) return Constant::getIntegerValue(VTy, IKnownOne); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. - if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == - (DemandedMask & ~LHSKnownOne)) + if (DemandedMask.isSubsetOf(LHSKnownOne | RHSKnownZero)) return I->getOperand(0); - if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == - (DemandedMask & ~RHSKnownOne)) - return I->getOperand(1); - - // If all of the potentially set bits on one side are known to be set on - // the other side, just use the 'other' side. - if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == - (DemandedMask & (~RHSKnownZero))) - return I->getOperand(0); - if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == - (DemandedMask & (~LHSKnownZero))) + if (DemandedMask.isSubsetOf(RHSKnownOne | LHSKnownZero)) return I->getOperand(1); // If the RHS is a constant, see if we can simplify it. @@ -271,20 +245,20 @@ // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) return Constant::getIntegerValue(VTy, IKnownOne); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. - if ((DemandedMask & RHSKnownZero) == DemandedMask) + if (DemandedMask.isSubsetOf(RHSKnownZero)) return I->getOperand(0); - if ((DemandedMask & LHSKnownZero) == DemandedMask) + if (DemandedMask.isSubsetOf(LHSKnownZero)) return I->getOperand(1); // If all of the demanded bits are known to be zero on one side or the // other, turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 - if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) { + if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownZero)) { Instruction *Or = BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1), I->getName()); @@ -295,20 +269,28 @@ // bits on that side are also known to be set on the other side, turn this // into an AND, as we know the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 - if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { - // all known - if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) { - Constant *AndC = Constant::getIntegerValue(VTy, - ~RHSKnownOne & DemandedMask); - Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC); - return InsertNewInstWith(And, *I); - } + if (DemandedMask.isSubsetOf(RHSKnownZero|RHSKnownOne) && + RHSKnownOne.isSubsetOf(LHSKnownOne)) { + Constant *AndC = Constant::getIntegerValue(VTy, + ~RHSKnownOne & DemandedMask); + Instruction *And = BinaryOperator::CreateAnd(I->getOperand(0), AndC); + return InsertNewInstWith(And, *I); } - // If the RHS is a constant, see if we can simplify it. - // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. - if (ShrinkDemandedConstant(I, 1, DemandedMask)) - return I; + // If the RHS is a constant, see if we can change it. Don't alter a -1 + // constant because that's a canonical 'not' op, and that is better for + // combining, SCEV, and codegen. + const APInt *C; + if (match(I->getOperand(1), m_APInt(C)) && !C->isAllOnesValue()) { + if ((*C | ~DemandedMask).isAllOnesValue()) { + // Force bits to 1 to create a 'not' op. + I->setOperand(1, ConstantInt::getAllOnesValue(VTy)); + return I; + } + // If we can't turn this into a 'not', try to shrink the constant. + if (ShrinkDemandedConstant(I, 1, DemandedMask)) + return I; + } // If our LHS is an 'and' and if it has one use, and if any of the bits we // are flipping are known to be set, then the xor is just resetting those @@ -495,17 +477,15 @@ computeKnownBits(V, KnownZero, KnownOne, Depth, CxtI); break; } - case Instruction::Shl: - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { - { - Value *VarX; ConstantInt *C1; - if (match(I->getOperand(0), m_Shr(m_Value(VarX), m_ConstantInt(C1)))) { - Instruction *Shr = cast(I->getOperand(0)); - Value *R = SimplifyShrShlDemandedBits(Shr, I, DemandedMask, - KnownZero, KnownOne); - if (R) - return R; - } + case Instruction::Shl: { + const APInt *SA; + if (match(I->getOperand(1), m_APInt(SA))) { + const APInt *ShrAmt; + if (match(I->getOperand(0), m_Shr(m_Value(), m_APInt(ShrAmt)))) { + Instruction *Shr = cast(I->getOperand(0)); + if (Value *R = simplifyShrShlDemandedBits( + Shr, *ShrAmt, I, *SA, DemandedMask, KnownZero, KnownOne)) + return R; } uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); @@ -529,9 +509,10 @@ KnownZero.setLowBits(ShiftAmt); } break; - case Instruction::LShr: - // For a logical shift right - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { + } + case Instruction::LShr: { + const APInt *SA; + if (match(I->getOperand(1), m_APInt(SA))) { uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Unsigned shift right. @@ -552,7 +533,8 @@ KnownZero.setHighBits(ShiftAmt); // high bits known zero. } break; - case Instruction::AShr: + } + case Instruction::AShr: { // If this is an arithmetic shift right and only the low-bit is set, we can // always convert this into a logical shr, even if the shift amount is // variable. The low bit of the shift cannot be an input sign bit unless @@ -566,15 +548,16 @@ // If the sign bit is the only bit demanded by this ashr, then there is no // need to do it, the shift doesn't change the high bit. - if (DemandedMask.isSignBit()) + if (DemandedMask.isSignMask()) return I->getOperand(0); - if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { + const APInt *SA; + if (match(I->getOperand(1), m_APInt(SA))) { uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1); // Signed shift right. APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt)); - // If any of the "high bits" are demanded, we should set the sign bit as + // If any of the high bits are demanded, we should set the sign bit as // demanded. if (DemandedMask.countLeadingZeros() <= ShiftAmt) DemandedMaskIn.setSignBit(); @@ -587,6 +570,7 @@ if (SimplifyDemandedBits(I, 0, DemandedMaskIn, KnownZero, KnownOne, Depth + 1)) return I; + assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); // Compute the new bits that are at the top now. APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); @@ -594,24 +578,24 @@ KnownOne.lshrInPlace(ShiftAmt); // Handle the sign bits. - APInt SignBit(APInt::getSignBit(BitWidth)); + APInt SignMask(APInt::getSignMask(BitWidth)); // Adjust to where it is now in the mask. - SignBit.lshrInPlace(ShiftAmt); + SignMask.lshrInPlace(ShiftAmt); // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] || - (HighBits & ~DemandedMask) == HighBits) { - // Perform the logical shift right. - BinaryOperator *NewVal = BinaryOperator::CreateLShr(I->getOperand(0), - SA, I->getName()); - NewVal->setIsExact(cast(I)->isExact()); - return InsertNewInstWith(NewVal, *I); - } else if ((KnownOne & SignBit) != 0) { // New bits are known one. + !DemandedMask.intersects(HighBits)) { + BinaryOperator *LShr = BinaryOperator::CreateLShr(I->getOperand(0), + I->getOperand(1)); + LShr->setIsExact(cast(I)->isExact()); + return InsertNewInstWith(LShr, *I); + } else if (KnownOne.intersects(SignMask)) { // New bits are known one. KnownOne |= HighBits; } } break; + } case Instruction::SRem: if (ConstantInt *Rem = dyn_cast(I->getOperand(1))) { // X % -1 demands all the bits because we don't want to introduce @@ -624,7 +608,7 @@ return I->getOperand(0); APInt LowBits = RA - 1; - APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); + APInt Mask2 = LowBits | APInt::getSignMask(BitWidth); if (SimplifyDemandedBits(I, 0, Mask2, LHSKnownZero, LHSKnownOne, Depth + 1)) return I; @@ -635,12 +619,12 @@ // If LHS is non-negative or has all low bits zero, then the upper bits // are all zero. - if (LHSKnownZero.isSignBitSet() || ((LHSKnownZero & LowBits) == LowBits)) + if (LHSKnownZero.isSignBitSet() || LowBits.isSubsetOf(LHSKnownZero)) KnownZero |= ~LowBits; // If LHS is negative and not all low bits are zero, then the upper bits // are all one. - if (LHSKnownOne.isSignBitSet() && ((LHSKnownOne & LowBits) != 0)) + if (LHSKnownOne.isSignBitSet() && LowBits.intersects(LHSKnownOne)) KnownOne |= ~LowBits; assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); @@ -744,7 +728,7 @@ // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(KnownZero|KnownOne)) return Constant::getIntegerValue(VTy, KnownOne); return nullptr; } @@ -783,17 +767,15 @@ // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) return Constant::getIntegerValue(ITy, IKnownOne); // If all of the demanded bits are known 1 on one side, return the other. // These bits cannot contribute to the result of the 'and' in this // context. - if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == - (DemandedMask & ~LHSKnownZero)) + if (DemandedMask.isSubsetOf(LHSKnownZero | RHSKnownOne)) return I->getOperand(0); - if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == - (DemandedMask & ~RHSKnownZero)) + if (DemandedMask.isSubsetOf(RHSKnownZero | LHSKnownOne)) return I->getOperand(1); KnownZero = std::move(IKnownZero); @@ -817,26 +799,15 @@ // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) return Constant::getIntegerValue(ITy, IKnownOne); // If all of the demanded bits are known zero on one side, return the // other. These bits cannot contribute to the result of the 'or' in this // context. - if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == - (DemandedMask & ~LHSKnownOne)) - return I->getOperand(0); - if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == - (DemandedMask & ~RHSKnownOne)) - return I->getOperand(1); - - // If all of the potentially set bits on one side are known to be set on - // the other side, just use the 'other' side. - if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == - (DemandedMask & (~RHSKnownZero))) + if (DemandedMask.isSubsetOf(LHSKnownOne | RHSKnownZero)) return I->getOperand(0); - if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == - (DemandedMask & (~LHSKnownZero))) + if (DemandedMask.isSubsetOf(RHSKnownOne | LHSKnownZero)) return I->getOperand(1); KnownZero = std::move(IKnownZero); @@ -861,14 +832,14 @@ // If the client is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (IKnownZero|IKnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(IKnownZero|IKnownOne)) return Constant::getIntegerValue(ITy, IKnownOne); // If all of the demanded bits are known zero on one side, return the // other. - if ((DemandedMask & RHSKnownZero) == DemandedMask) + if (DemandedMask.isSubsetOf(RHSKnownZero)) return I->getOperand(0); - if ((DemandedMask & LHSKnownZero) == DemandedMask) + if (DemandedMask.isSubsetOf(LHSKnownZero)) return I->getOperand(1); // Output known-0 bits are known if clear or set in both the LHS & RHS. @@ -883,7 +854,7 @@ // If this user is only demanding bits that we know, return the known // constant. - if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) + if (DemandedMask.isSubsetOf(KnownZero|KnownOne)) return Constant::getIntegerValue(ITy, KnownOne); break; @@ -910,20 +881,17 @@ /// /// As with SimplifyDemandedUseBits, it returns NULL if the simplification was /// not successful. -Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr, - Instruction *Shl, - const APInt &DemandedMask, - APInt &KnownZero, - APInt &KnownOne) { - - const APInt &ShlOp1 = cast(Shl->getOperand(1))->getValue(); - const APInt &ShrOp1 = cast(Shr->getOperand(1))->getValue(); +Value * +InstCombiner::simplifyShrShlDemandedBits(Instruction *Shr, const APInt &ShrOp1, + Instruction *Shl, const APInt &ShlOp1, + const APInt &DemandedMask, + APInt &KnownZero, APInt &KnownOne) { if (!ShlOp1 || !ShrOp1) - return nullptr; // Noop. + return nullptr; // No-op. Value *VarX = Shr->getOperand(0); Type *Ty = VarX->getType(); - unsigned BitWidth = Ty->getIntegerBitWidth(); + unsigned BitWidth = Ty->getScalarSizeInBits(); if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth)) return nullptr; // Undef. Index: lib/Transforms/Scalar/ConstantHoisting.cpp =================================================================== --- lib/Transforms/Scalar/ConstantHoisting.cpp +++ lib/Transforms/Scalar/ConstantHoisting.cpp @@ -53,6 +53,12 @@ STATISTIC(NumConstantsHoisted, "Number of constants hoisted"); STATISTIC(NumConstantsRebased, "Number of constants rebased"); +static cl::opt ConstHoistWithBlockFrequency( + "consthoist-with-block-frequency", cl::init(false), cl::Hidden, + cl::desc("Enable the use of the block frequency analysis to reduce the " + "chance to execute const materialization more frequently than " + "without hoisting.")); + namespace { /// \brief The constant hoisting pass. class ConstantHoistingLegacyPass : public FunctionPass { @@ -68,6 +74,8 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + if (ConstHoistWithBlockFrequency) + AU.addRequired(); AU.addRequired(); AU.addRequired(); } @@ -82,6 +90,7 @@ char ConstantHoistingLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(ConstantHoistingLegacyPass, "consthoist", "Constant Hoisting", false, false) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(ConstantHoistingLegacyPass, "consthoist", @@ -99,9 +108,13 @@ DEBUG(dbgs() << "********** Begin Constant Hoisting **********\n"); DEBUG(dbgs() << "********** Function: " << Fn.getName() << '\n'); - bool MadeChange = Impl.runImpl( - Fn, getAnalysis().getTTI(Fn), - getAnalysis().getDomTree(), Fn.getEntryBlock()); + bool MadeChange = + Impl.runImpl(Fn, getAnalysis().getTTI(Fn), + getAnalysis().getDomTree(), + ConstHoistWithBlockFrequency + ? &getAnalysis().getBFI() + : nullptr, + Fn.getEntryBlock()); if (MadeChange) { DEBUG(dbgs() << "********** Function after Constant Hoisting: " @@ -148,33 +161,142 @@ return IDom->getBlock()->getTerminator(); } +/// \brief Given \p BBs as input, find another set of BBs which collectively +/// dominates \p BBs and have the minimal sum of frequencies. Return the BB +/// set found in \p BBs. +void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, + BasicBlock *Entry, + SmallPtrSet &BBs) { + assert(!BBs.count(Entry) && "Assume Entry is not in BBs"); + // Nodes on the current path to the root. + SmallPtrSet Path; + // Candidates includes any block 'BB' in set 'BBs' that is not strictly + // dominated by any other blocks in set 'BBs', and all nodes in the path + // in the dominator tree from Entry to 'BB'. + SmallPtrSet Candidates; + for (auto BB : BBs) { + Path.clear(); + // Walk up the dominator tree until Entry or another BB in BBs + // is reached. Insert the nodes on the way to the Path. + BasicBlock *Node = BB; + // The "Path" is a candidate path to be added into Candidates set. + bool isCandidate = false; + do { + Path.insert(Node); + if (Node == Entry || Candidates.count(Node)) { + isCandidate = true; + break; + } + assert(DT.getNode(Node)->getIDom() && + "Entry doens't dominate current Node"); + Node = DT.getNode(Node)->getIDom()->getBlock(); + } while (!BBs.count(Node)); + + // If isCandidate is false, Node is another Block in BBs dominating + // current 'BB'. Drop the nodes on the Path. + if (!isCandidate) + continue; + + // Add nodes on the Path into Candidates. + Candidates.insert(Path.begin(), Path.end()); + } + + // Sort the nodes in Candidates in top-down order and save the nodes + // in Orders. + unsigned Idx = 0; + SmallVector Orders; + Orders.push_back(Entry); + while (Idx != Orders.size()) { + BasicBlock *Node = Orders[Idx++]; + for (auto ChildDomNode : DT.getNode(Node)->getChildren()) { + if (Candidates.count(ChildDomNode->getBlock())) + Orders.push_back(ChildDomNode->getBlock()); + } + } + + // Visit Orders in bottom-up order. + typedef std::pair, BlockFrequency> + InsertPtsCostPair; + // InsertPtsMap is a map from a BB to the best insertion points for the + // subtree of BB (subtree not including the BB itself). + DenseMap InsertPtsMap; + InsertPtsMap.reserve(Orders.size() + 1); + for (auto RIt = Orders.rbegin(); RIt != Orders.rend(); RIt++) { + BasicBlock *Node = *RIt; + bool NodeInBBs = BBs.count(Node); + SmallPtrSet &InsertPts = InsertPtsMap[Node].first; + BlockFrequency &InsertPtsFreq = InsertPtsMap[Node].second; + + // Return the optimal insert points in BBs. + if (Node == Entry) { + BBs.clear(); + if (InsertPtsFreq > BFI.getBlockFreq(Node)) + BBs.insert(Entry); + else + BBs.insert(InsertPts.begin(), InsertPts.end()); + break; + } + + BasicBlock *Parent = DT.getNode(Node)->getIDom()->getBlock(); + // Initially, ParentInsertPts is empty and ParentPtsFreq is 0. Every child + // will update its parent's ParentInsertPts and ParentPtsFreq. + SmallPtrSet &ParentInsertPts = InsertPtsMap[Parent].first; + BlockFrequency &ParentPtsFreq = InsertPtsMap[Parent].second; + // Choose to insert in Node or in subtree of Node. + if (InsertPtsFreq > BFI.getBlockFreq(Node) || NodeInBBs) { + ParentInsertPts.insert(Node); + ParentPtsFreq += BFI.getBlockFreq(Node); + } else { + ParentInsertPts.insert(InsertPts.begin(), InsertPts.end()); + ParentPtsFreq += InsertPtsFreq; + } + } +} + /// \brief Find an insertion point that dominates all uses. -Instruction *ConstantHoistingPass::findConstantInsertionPoint( +SmallPtrSet ConstantHoistingPass::findConstantInsertionPoint( const ConstantInfo &ConstInfo) const { assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry."); // Collect all basic blocks. SmallPtrSet BBs; + SmallPtrSet InsertPts; for (auto const &RCI : ConstInfo.RebasedConstants) for (auto const &U : RCI.Uses) BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent()); - if (BBs.count(Entry)) - return &Entry->front(); + if (BBs.count(Entry)) { + InsertPts.insert(&Entry->front()); + return InsertPts; + } + + if (BFI) { + findBestInsertionSet(*DT, *BFI, Entry, BBs); + for (auto BB : BBs) { + BasicBlock::iterator InsertPt = BB->begin(); + for (; isa(InsertPt) || InsertPt->isEHPad(); ++InsertPt) + ; + InsertPts.insert(&*InsertPt); + } + return InsertPts; + } while (BBs.size() >= 2) { BasicBlock *BB, *BB1, *BB2; BB1 = *BBs.begin(); BB2 = *std::next(BBs.begin()); BB = DT->findNearestCommonDominator(BB1, BB2); - if (BB == Entry) - return &Entry->front(); + if (BB == Entry) { + InsertPts.insert(&Entry->front()); + return InsertPts; + } BBs.erase(BB1); BBs.erase(BB2); BBs.insert(BB); } assert((BBs.size() == 1) && "Expected only one element."); Instruction &FirstInst = (*BBs.begin())->front(); - return findMatInsertPt(&FirstInst); + InsertPts.insert(findMatInsertPt(&FirstInst)); + return InsertPts; } @@ -557,29 +679,54 @@ bool MadeChange = false; for (auto const &ConstInfo : ConstantVec) { // Hoist and hide the base constant behind a bitcast. - Instruction *IP = findConstantInsertionPoint(ConstInfo); - IntegerType *Ty = ConstInfo.BaseConstant->getType(); - Instruction *Base = - new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP); - DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant << ") to BB " - << IP->getParent()->getName() << '\n' << *Base << '\n'); - NumConstantsHoisted++; + SmallPtrSet IPSet = findConstantInsertionPoint(ConstInfo); + assert(!IPSet.empty() && "IPSet is empty"); + + unsigned UsesNum = 0; + unsigned ReBasesNum = 0; + for (Instruction *IP : IPSet) { + IntegerType *Ty = ConstInfo.BaseConstant->getType(); + Instruction *Base = + new BitCastInst(ConstInfo.BaseConstant, Ty, "const", IP); + DEBUG(dbgs() << "Hoist constant (" << *ConstInfo.BaseConstant + << ") to BB " << IP->getParent()->getName() << '\n' + << *Base << '\n'); + + // Emit materialization code for all rebased constants. + unsigned Uses = 0; + for (auto const &RCI : ConstInfo.RebasedConstants) { + for (auto const &U : RCI.Uses) { + Uses++; + BasicBlock *OrigMatInsertBB = + findMatInsertPt(U.Inst, U.OpndIdx)->getParent(); + // If Base constant is to be inserted in multiple places, + // generate rebase for U using the Base dominating U. + if (IPSet.size() == 1 || + DT->dominates(Base->getParent(), OrigMatInsertBB)) { + emitBaseConstants(Base, RCI.Offset, U); + ReBasesNum++; + } + } + } + UsesNum = Uses; - // Emit materialization code for all rebased constants. - for (auto const &RCI : ConstInfo.RebasedConstants) { - NumConstantsRebased++; - for (auto const &U : RCI.Uses) - emitBaseConstants(Base, RCI.Offset, U); + // Use the same debug location as the last user of the constant. + assert(!Base->use_empty() && "The use list is empty!?"); + assert(isa(Base->user_back()) && + "All uses should be instructions."); + Base->setDebugLoc(cast(Base->user_back())->getDebugLoc()); } + (void)UsesNum; + (void)ReBasesNum; + // Expect all uses are rebased after rebase is done. + assert(UsesNum == ReBasesNum && "Not all uses are rebased"); + + NumConstantsHoisted++; - // Use the same debug location as the last user of the constant. - assert(!Base->use_empty() && "The use list is empty!?"); - assert(isa(Base->user_back()) && - "All uses should be instructions."); - Base->setDebugLoc(cast(Base->user_back())->getDebugLoc()); + // Base constant is also included in ConstInfo.RebasedConstants, so + // deduct 1 from ConstInfo.RebasedConstants.size(). + NumConstantsRebased = ConstInfo.RebasedConstants.size() - 1; - // Correct for base constant, which we counted above too. - NumConstantsRebased--; MadeChange = true; } return MadeChange; @@ -595,9 +742,11 @@ /// \brief Optimize expensive integer constants in the given function. bool ConstantHoistingPass::runImpl(Function &Fn, TargetTransformInfo &TTI, - DominatorTree &DT, BasicBlock &Entry) { + DominatorTree &DT, BlockFrequencyInfo *BFI, + BasicBlock &Entry) { this->TTI = &TTI; this->DT = &DT; + this->BFI = BFI; this->Entry = &Entry; // Collect all constant candidates. collectConstantCandidates(Fn); @@ -628,7 +777,10 @@ FunctionAnalysisManager &AM) { auto &DT = AM.getResult(F); auto &TTI = AM.getResult(F); - if (!runImpl(F, TTI, DT, F.getEntryBlock())) + auto BFI = ConstHoistWithBlockFrequency + ? &AM.getResult(F) + : nullptr; + if (!runImpl(F, TTI, DT, BFI, F.getEntryBlock())) return PreservedAnalyses::all(); PreservedAnalyses PA; Index: lib/Transforms/Utils/CmpInstAnalysis.cpp =================================================================== --- lib/Transforms/Utils/CmpInstAnalysis.cpp +++ lib/Transforms/Utils/CmpInstAnalysis.cpp @@ -73,17 +73,17 @@ default: return false; case ICmpInst::ICMP_SLT: - // X < 0 is equivalent to (X & SignBit) != 0. + // X < 0 is equivalent to (X & SignMask) != 0. if (!C->isZero()) return false; - Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth())); + Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth())); Pred = ICmpInst::ICMP_NE; break; case ICmpInst::ICMP_SGT: - // X > -1 is equivalent to (X & SignBit) == 0. + // X > -1 is equivalent to (X & SignMask) == 0. if (!C->isAllOnesValue()) return false; - Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth())); + Y = ConstantInt::get(I->getContext(), APInt::getSignMask(C->getBitWidth())); Pred = ICmpInst::ICMP_EQ; break; case ICmpInst::ICMP_ULT: Index: lib/Transforms/Utils/CodeExtractor.cpp =================================================================== --- lib/Transforms/Utils/CodeExtractor.cpp +++ lib/Transforms/Utils/CodeExtractor.cpp @@ -73,24 +73,26 @@ } /// \brief Build a set of blocks to extract if the input blocks are viable. -template -static SetVector buildExtractionBlockSet(IteratorT BBBegin, - IteratorT BBEnd) { +static SetVector +buildExtractionBlockSet(ArrayRef BBs, DominatorTree *DT) { + assert(!BBs.empty() && "The set of blocks to extract must be non-empty"); SetVector Result; - assert(BBBegin != BBEnd); - // Loop over the blocks, adding them to our set-vector, and aborting with an // empty set if we encounter invalid blocks. - do { - if (!Result.insert(*BBBegin)) - llvm_unreachable("Repeated basic blocks in extraction input"); + for (BasicBlock *BB : BBs) { - if (!CodeExtractor::isBlockValidForExtraction(**BBBegin)) { + // If this block is dead, don't process it. + if (DT && !DT->isReachableFromEntry(BB)) + continue; + + if (!Result.insert(BB)) + llvm_unreachable("Repeated basic blocks in extraction input"); + if (!CodeExtractor::isBlockValidForExtraction(*BB)) { Result.clear(); return Result; } - } while (++BBBegin != BBEnd); + } #ifndef NDEBUG for (SetVector::iterator I = std::next(Result.begin()), @@ -106,49 +108,19 @@ return Result; } -/// \brief Helper to call buildExtractionBlockSet with an ArrayRef. -static SetVector -buildExtractionBlockSet(ArrayRef BBs) { - return buildExtractionBlockSet(BBs.begin(), BBs.end()); -} - -/// \brief Helper to call buildExtractionBlockSet with a RegionNode. -static SetVector -buildExtractionBlockSet(const RegionNode &RN) { - if (!RN.isSubRegion()) - // Just a single BasicBlock. - return buildExtractionBlockSet(RN.getNodeAs()); - - const Region &R = *RN.getNodeAs(); - - return buildExtractionBlockSet(R.block_begin(), R.block_end()); -} - -CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs, - BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI) - : DT(nullptr), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {} - CodeExtractor::CodeExtractor(ArrayRef BBs, DominatorTree *DT, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(BBs)), NumExitBlocks(~0U) {} + BPI(BPI), Blocks(buildExtractionBlockSet(BBs, DT)), NumExitBlocks(~0U) {} CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks())), + BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT)), NumExitBlocks(~0U) {} -CodeExtractor::CodeExtractor(DominatorTree &DT, const RegionNode &RN, - bool AggregateArgs, BlockFrequencyInfo *BFI, - BranchProbabilityInfo *BPI) - : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), Blocks(buildExtractionBlockSet(RN)), NumExitBlocks(~0U) {} - /// definedInRegion - Return true if the specified value is defined in the /// extracted region. static bool definedInRegion(const SetVector &Blocks, Value *V) { @@ -218,9 +190,7 @@ // containing PHI nodes merging values from outside of the region, and a // second that contains all of the code for the block and merges back any // incoming values from inside of the region. - BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI()->getIterator(); - BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs, - Header->getName()+".ce"); + BasicBlock *NewBB = llvm::SplitBlock(Header, Header->getFirstNonPHI(), DT); // We only want to code extract the second block now, and it becomes the new // header of the region. @@ -229,11 +199,6 @@ Blocks.insert(NewBB); Header = NewBB; - // Okay, update dominator sets. The blocks that dominate the new one are the - // blocks that dominate TIBB plus the new block itself. - if (DT) - DT->splitBlock(NewBB); - // Okay, now we need to adjust the PHI nodes and any branches from within the // region to go to the new header block instead of the old header block. if (NumPredsFromRegion) { @@ -248,6 +213,7 @@ // Okay, everything within the region is now branching to the right block, we // just have to update the PHI nodes now, inserting PHI nodes into NewBB. + BasicBlock::iterator AfterPHIs; for (AfterPHIs = OldPred->begin(); isa(AfterPHIs); ++AfterPHIs) { PHINode *PN = cast(AfterPHIs); // Create a new PHI node in the new region, which has an incoming value Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -3055,6 +3055,15 @@ BasicBlock *QFB = QBI->getSuccessor(1); BasicBlock *PostBB = QFB->getSingleSuccessor(); + // Make sure we have a good guess for PostBB. If QTB's only successor is + // QFB, then QFB is a better PostBB. + if (QTB->getSingleSuccessor() == QFB) + PostBB = QFB; + + // If we couldn't find a good PostBB, stop. + if (!PostBB) + return false; + bool InvertPCond = false, InvertQCond = false; // Canonicalize fallthroughs to the true branches. if (PFB == QBI->getParent()) { @@ -3079,8 +3088,7 @@ auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) { return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S; }; - if (!PostBB || - !HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) || + if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) || !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB)) return false; if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) || Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7324,8 +7324,16 @@ return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector, VectorTy, VF - 1, VectorTy); - // TODO: IF-converted IFs become selects. - return 0; + // Phi nodes in non-header blocks (not inductions, reductions, etc.) are + // converted into select instructions. We require N - 1 selects per phi + // node, where N is the number of incoming values. + if (VF > 1 && Phi->getParent() != TheLoop->getHeader()) + return (Phi->getNumIncomingValues() - 1) * + TTI.getCmpSelInstrCost( + Instruction::Select, ToVectorTy(Phi->getType(), VF), + ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF)); + + return TTI.getCFInstrCost(Instruction::PHI); } case Instruction::UDiv: case Instruction::SDiv: Index: test/Analysis/ScalarEvolution/or-as-add.ll =================================================================== --- test/Analysis/ScalarEvolution/or-as-add.ll +++ /dev/null @@ -1,38 +0,0 @@ -; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s - -declare void @z(i32) -declare void @z2(i64) - -define void @fun(i1 %bool, i32 %x) { -entry: - br label %body -body: - %i = phi i32 [ 0, %entry ], [ %i.next, %body ] - %bottom_zero = mul i32 %i, 2 - %a = or i32 %bottom_zero, 1 - call void @z(i32 %a) - %bool_ext = zext i1 %bool to i32 - %b = or i32 %bool_ext, %bottom_zero - call void @z(i32 %b) - %shifted = lshr i32 %x, 31 - %c = or i32 %shifted, %bottom_zero - call void @z(i32 %c) - %i_ext = zext i32 %i to i64 - %d = or i64 %i_ext, 4294967296 - call void @z2(i64 %d) - %i.next = add i32 %i, 1 - %cond = icmp eq i32 %i.next, 10 - br i1 %cond, label %exit, label %body -exit: - ret void -} - -; CHECK: %a = or i32 %bottom_zero, 1 -; CHECK-NEXT: --> {1,+,2}<%body> -; CHECK: %b = or i32 %bool_ext, %bottom_zero -; CHECK-NEXT: --> {(zext i1 %bool to i32),+,2} -; CHECK: %c = or i32 %shifted, %bottom_zero -; CHECK-NEXT: --> {(%x /u -2147483648),+,2}<%body> -; CHECK: %d = or i64 %i_ext, 4294967296 -; CHECK-NEXT: --> {4294967296,+,1}<%body> - Index: test/CodeGen/AArch64/optimize-imm.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/optimize-imm.ll @@ -0,0 +1,64 @@ +; RUN: llc -o - %s -mtriple=aarch64-- | FileCheck %s + +; CHECK-LABEL: and1: +; CHECK: and {{w[0-9]+}}, w0, #0xfffffffd + +define void @and1(i32 %a, i8* nocapture %p) { +entry: + %and = and i32 %a, 253 + %conv = trunc i32 %and to i8 + store i8 %conv, i8* %p, align 1 + ret void +} + +; (a & 0x3dfd) | 0xffffc000 +; +; CHECK-LABEL: and2: +; CHECK: and {{w[0-9]+}}, w0, #0xfdfdfdfd + +define i32 @and2(i32 %a) { +entry: + %and = and i32 %a, 15869 + %or = or i32 %and, -16384 + ret i32 %or +} + +; (a & 0x19) | 0xffffffc0 +; +; CHECK-LABEL: and3: +; CHECK: and {{w[0-9]+}}, w0, #0x99999999 + +define i32 @and3(i32 %a) { +entry: + %and = and i32 %a, 25 + %or = or i32 %and, -64 + ret i32 %or +} + +; (a & 0xc5600) | 0xfff1f1ff +; +; CHECK-LABEL: and4: +; CHECK: and {{w[0-9]+}}, w0, #0xfffc07ff + +define i32 @and4(i32 %a) { +entry: + %and = and i32 %a, 787968 + %or = or i32 %and, -921089 + ret i32 %or +} + +; Make sure we don't shrink or optimize an XOR's immediate operand if the +; immediate is -1. Instruction selection turns (and ((xor $mask, -1), $v0)) into +; a BIC. + +; CHECK-LABEL: xor1: +; CHECK: orr [[R0:w[0-9]+]], wzr, #0x38 +; CHECK: bic {{w[0-9]+}}, [[R0]], w0, lsl #3 + +define i32 @xor1(i32 %a) { +entry: + %shl = shl i32 %a, 3 + %xor = and i32 %shl, 56 + %and = xor i32 %xor, 56 + ret i32 %and +} Index: test/CodeGen/AMDGPU/frame-index-amdgiz.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/frame-index-amdgiz.ll @@ -0,0 +1,55 @@ +; RUN: llc -verify-machineinstrs < %s | FileCheck %s +; +; The original OpenCL kernel: +; kernel void f(global int *a, int i, int j) { +; int x[100]; +; x[i] = 7; +; a[0] = x[j]; +; } +; clang -cc1 -triple amdgcn---amdgizcl -emit-llvm -o - + +target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5" +target triple = "amdgcn---amdgiz" + +define amdgpu_kernel void @f(i32 addrspace(1)* nocapture %a, i32 %i, i32 %j) local_unnamed_addr #0 { +entry: +; CHECK: s_load_dword s2, s[0:1], 0xb +; CHECK: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; CHECK: s_load_dword s0, s[0:1], 0xc +; CHECK: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 +; CHECK: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 +; CHECK: s_mov_b32 s10, -1 +; CHECK: s_waitcnt lgkmcnt(0) +; CHECK: s_lshl_b32 s1, s2, 2 +; CHECK: v_mov_b32_e32 v0, 4 +; CHECK: s_mov_b32 s11, 0xe8f000 +; CHECK: v_add_i32_e32 v1, vcc, s1, v0 +; CHECK: v_mov_b32_e32 v2, 7 +; CHECK: s_lshl_b32 s0, s0, 2 +; CHECK: buffer_store_dword v2, v1, s[8:11], s3 offen +; CHECK: v_add_i32_e32 v0, vcc, s0, v0 +; CHECK: buffer_load_dword v0, v0, s[8:11], s3 offen +; CHECK: s_mov_b32 s7, 0xf000 +; CHECK: s_mov_b32 s6, -1 +; CHECK: s_waitcnt vmcnt(0) +; CHECK: buffer_store_dword v0, off, s[4:7], 0 +; CHECK: s_endpgm + + %x = alloca [100 x i32], align 4, addrspace(5) + %0 = bitcast [100 x i32] addrspace(5)* %x to i8 addrspace(5)* + call void @llvm.lifetime.start.p5i8(i64 400, i8 addrspace(5)* nonnull %0) #0 + %arrayidx = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %x, i32 0, i32 %i + store i32 7, i32 addrspace(5)* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %x, i32 0, i32 %j + %1 = load i32, i32 addrspace(5)* %arrayidx2, align 4 + store i32 %1, i32 addrspace(1)* %a, align 4 + call void @llvm.lifetime.end.p5i8(i64 400, i8 addrspace(5)* nonnull %0) #0 + ret void +} + +declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #1 + +declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #1 + +attributes #0 = { nounwind } +attributes #1 = { argmemonly nounwind } Index: test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir =================================================================== --- test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir +++ test/CodeGen/ARM/GlobalISel/arm-instruction-select.mir @@ -5,6 +5,8 @@ define void @test_sext_s8() { ret void } define void @test_zext_s16() { ret void } + define void @test_trunc_s32_16() { ret void } + define void @test_add_s8() { ret void } define void @test_add_s16() { ret void } define void @test_add_s32() { ret void } @@ -142,6 +144,34 @@ ; CHECK: BX_RET 14, _, implicit %r0 ... --- +name: test_trunc_s32_16 +# CHECK-LABEL: name: test_trunc_s32_16 +legalized: true +regBankSelected: true +selected: false +# CHECK: selected: true +registers: + - { id: 0, class: gprb } + - { id: 1, class: gprb } +# CHECK-DAG: id: 0, class: gpr +# CHECK-DAG: id: 1, class: gpr +body: | + bb.0: + liveins: %r0 + + %0(s32) = COPY %r0 + ; CHECK: [[VREGX:%[0-9]+]] = COPY %r0 + + %1(s16) = G_TRUNC %0(s32) + ; CHECK: [[VREGTRUNC:%[0-9]+]] = COPY [[VREGX]] + + %r0 = COPY %1(s16) + ; CHECK: %r0 = COPY [[VREGTRUNC]] + + BX_RET 14, _, implicit %r0 + ; CHECK: BX_RET 14, _, implicit %r0 +... +--- name: test_add_s8 # CHECK-LABEL: name: test_add_s8 legalized: true Index: test/CodeGen/ARM/GlobalISel/arm-isel.ll =================================================================== --- test/CodeGen/ARM/GlobalISel/arm-isel.ll +++ test/CodeGen/ARM/GlobalISel/arm-isel.ll @@ -40,6 +40,30 @@ ret i16 %x } +define void @test_trunc_i32_i16(i32 %v, i16 *%p) { +; CHECK-LABEL: test_trunc_i32_i16: +; The trunc doesn't result in any instructions, but we +; expect the store to be explicitly 16-bit. +; CHECK: strh r0, [r1] +; CHECK: bx lr +entry: + %v16 = trunc i32 %v to i16 + store i16 %v16, i16 *%p + ret void +} + +define void @test_trunc_i32_i8(i32 %v, i8 *%p) { +; CHECK-LABEL: test_trunc_i32_i8: +; The trunc doesn't result in any instructions, but we +; expect the store to be explicitly 8-bit. +; CHECK: strb r0, [r1] +; CHECK: bx lr +entry: + %v8 = trunc i32 %v to i8 + store i8 %v8, i8 *%p + ret void +} + define i8 @test_add_i8(i8 %x, i8 %y) { ; CHECK-LABEL: test_add_i8: ; CHECK: add r0, r0, r1 Index: test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir =================================================================== --- test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir +++ test/CodeGen/ARM/GlobalISel/arm-regbankselect.mir @@ -22,6 +22,8 @@ define void @test_constants() { ret void } + define void @test_trunc_s32_16() { ret void } + define void @test_fadd_s32() #0 { ret void } define void @test_fadd_s64() #0 { ret void } @@ -442,6 +444,27 @@ BX_RET 14, _, implicit %r0 ... --- +name: test_trunc_s32_16 +# CHECK-LABEL: name: test_trunc_s32_16 +legalized: true +regBankSelected: false +selected: false +# CHECK: registers: +# CHECK: - { id: 0, class: gprb } +# CHECK: - { id: 1, class: gprb } +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } +body: | + bb.0: + liveins: %r0 + + %0(s32) = COPY %r0 + %1(s16) = G_TRUNC %0(s32) + %r0 = COPY %1(s16) + BX_RET 14, _, implicit %r0 +... +--- name: test_fadd_s32 # CHECK-LABEL: name: test_fadd_s32 legalized: true Index: test/CodeGen/ARM/GlobalISel/arm-unsupported.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/GlobalISel/arm-unsupported.ll @@ -0,0 +1,80 @@ +; RUN: llc -mtriple arm-unknown -verify-machineinstrs -global-isel -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o - 2>&1 | FileCheck %s + +; This file checks that we use the fallback path for things that are known to +; be unsupported on the ARM target. It should progressively shrink in size. + +define <4 x i32> @test_int_vectors(<4 x i32> %a, <4 x i32> %b) { +; CHECK: remark: {{.*}} unable to lower arguments: <4 x i32> (<4 x i32>, <4 x i32>)* +; CHECK-LABEL: warning: Instruction selection used fallback path for test_int_vectors + %res = add <4 x i32> %a, %b + ret <4 x i32> %res +} + +define <4 x float> @test_float_vectors(<4 x float> %a, <4 x float> %b) { +; CHECK: remark: {{.*}} unable to lower arguments: <4 x float> (<4 x float>, <4 x float>)* +; CHECK-LABEL: warning: Instruction selection used fallback path for test_float_vectors + %res = fadd <4 x float> %a, %b + ret <4 x float> %res +} + +define i64 @test_i64(i64 %a, i64 %b) { +; CHECK: remark: {{.*}} unable to lower arguments: i64 (i64, i64)* +; CHECK-LABEL: warning: Instruction selection used fallback path for test_i64 + %res = add i64 %a, %b + ret i64 %res +} + +define i128 @test_i128(i128 %a, i128 %b) { +; CHECK: remark: {{.*}} unable to lower arguments: i128 (i128, i128)* +; CHECK-LABEL: warning: Instruction selection used fallback path for test_i128 + %res = add i128 %a, %b + ret i128 %res +} + +define i17 @test_funny_ints(i17 %a, i17 %b) { +; CHECK: remark: {{.*}} unable to lower arguments: i17 (i17, i17)* +; CHECK-LABEL: warning: Instruction selection used fallback path for test_funny_ints + %res = add i17 %a, %b + ret i17 %res +} + +define half @test_half(half %a, half %b) { +; CHECK: remark: {{.*}} unable to lower arguments: half (half, half)* +; CHECK-LABEL: warning: Instruction selection used fallback path for test_half + %res = fadd half %a, %b + ret half %res +} + +; On ARM, clang lowers structs to arrays. +define void @test_arrays([2 x i32] %this.could.come.from.a.struct) { +; CHECK: remark: {{.*}} unable to lower arguments: void ([2 x i32])* +; CHECK-LABEL: warning: Instruction selection used fallback path for test_arrays + ret void +} + +define void @test_structs({i32, i32} %struct) { +; CHECK: remark: {{.*}} unable to lower arguments: void ({ i32, i32 })* +; CHECK-LABEL: warning: Instruction selection used fallback path for test_structs + ret void +} + +define void @test_vararg_definition(i32 %a, ...) { +; CHECK: remark: {{.*}} unable to lower arguments: void (i32, ...)* +; CHECK-LABEL: warning: Instruction selection used fallback path for test_vararg_definition + ret void +} + +define void @test_vararg_call(i32 %a) { +; CHECK: remark: {{.*}} unable to translate instruction: call +; CHECK-LABEL: warning: Instruction selection used fallback path for test_vararg_call + call void(i32, ...) @test_vararg_definition(i32 %a, i32 %a, i32 %a) + ret void +} + +define i32 @test_thumb(i32 %a) #0 { +; CHECK: remark: {{.*}} unable to lower arguments: i32 (i32)* +; CHECK-LABEL: warning: Instruction selection used fallback path for test_thumb + ret i32 %a +} + +attributes #0 = { "target-features"="+thumb-mode" } Index: test/CodeGen/ARM/alloc-no-stack-realign.ll =================================================================== --- test/CodeGen/ARM/alloc-no-stack-realign.ll +++ test/CodeGen/ARM/alloc-no-stack-realign.ll @@ -7,31 +7,32 @@ define void @test1(<16 x float>* noalias sret %agg.result) nounwind ssp "no-realign-stack" { entry: -; CHECK-LABEL: test1 -; CHECK: ldr r[[R1:[0-9]+]], [pc, r1] -; CHECK: add r[[R2:[0-9]+]], r1, #48 -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: mov r[[R2:[0-9]+]], r[[R1]] -; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32 -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: mov r[[R1:[0-9]+]], sp -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: add r[[R2:[0-9]+]], r[[R1]], #32 -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]! -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: add r[[R1:[0-9]+]], r0, #48 -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: add r[[R1:[0-9]+]], r0, #32 -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]! -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128] +; CHECK-LABEL: test1: +; CHECK: ldr r[[R1:[0-9]+]], [pc, r[[R1]]] +; CHECK: mov r[[R2:[0-9]+]], r[[R1]] +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48 +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32 +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] +; CHECK: mov r[[R1:[0-9]+]], #32 +; CHECK: mov r[[R2:[0-9]+]], sp +; CHECK: mov r[[R3:[0-9]+]], r[[R2]] +; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128], r[[R1]] +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]! +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #48 +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #32 +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]! +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128] %retval = alloca <16 x float>, align 16 %0 = load <16 x float>, <16 x float>* @T3_retval, align 16 store <16 x float> %0, <16 x float>* %retval @@ -42,30 +43,32 @@ define void @test2(<16 x float>* noalias sret %agg.result) nounwind ssp { entry: -; CHECK: ldr r[[R1:[0-9]+]], [pc, r1] -; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48 -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: mov r[[R2:[0-9]+]], r[[R1]] -; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32 -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: mov r[[R1:[0-9]+]], sp -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: orr r[[R2:[0-9]+]], r[[R1]], #32 -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128]! -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128]! -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R2]]:128] -; CHECK: add r[[R1:[0-9]+]], r0, #48 -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: add r[[R1:[0-9]+]], r0, #32 -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r[[R1]]:128] -; CHECK: vst1.32 {{{d[0-9]+, d[0-9]+}}}, [r0:128]! -; CHECK: vst1.64 {{{d[0-9]+, d[0-9]+}}}, [r0:128] +; CHECK-LABEL: test2: +; CHECK: ldr r[[R1:[0-9]+]], [pc, r[[R1]]] +; CHECK: mov r[[R2:[0-9]+]], r[[R1]] +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R2:[0-9]+]], r[[R1]], #48 +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r[[R1]], #32 +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] +; CHECK: mov r[[R1:[0-9]+]], #32 +; CHECK: mov r[[R2:[0-9]+]], sp +; CHECK: mov r[[R3:[0-9]+]], r[[R2]] +; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128], r[[R1]] +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128]! +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128]! +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R3]]:128] +; CHECK: vld1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R2]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #48 +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] +; CHECK: add r[[R1:[0-9]+]], r0, #32 +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r[[R1]]:128] +; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128]! +; CHECK: vst1.64 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0:128] %retval = alloca <16 x float>, align 16 Index: test/CodeGen/ARM/fence-singlethread.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/fence-singlethread.ll @@ -0,0 +1,16 @@ +; RUN: llc -mtriple=thumbv7-linux-gnueabihf %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv7-apple-ios %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv7-linux-gnueabihf %s -filetype=obj -o %t +; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=OBJ + +; OBJ-NOT: dmb + +define void @fence_singlethread() { +; CHECK-LABEL: fence_singlethread: +; CHECK-NOT: dmb +; CHECK: @ COMPILER BARRIER +; CHECK-NOT: dmb + + fence singlethread seq_cst + ret void +} Index: test/CodeGen/ARM/memcpy-inline.ll =================================================================== --- test/CodeGen/ARM/memcpy-inline.ll +++ test/CodeGen/ARM/memcpy-inline.ll @@ -30,10 +30,9 @@ define void @t1(i8* nocapture %C) nounwind { entry: ; CHECK-LABEL: t1: -; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] -; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] -; CHECK: adds r0, #15 -; CHECK: adds r1, #15 +; CHECK: movs [[INC:r[0-9]+]], #15 +; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1], [[INC]] +; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0], [[INC]] ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false) @@ -43,13 +42,15 @@ define void @t2(i8* nocapture %C) nounwind { entry: ; CHECK-LABEL: t2: +; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]! +; CHECK: movs [[INC:r[0-9]+]], #32 +; CHECK: add.w r3, r0, #16 +; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0], [[INC]] ; CHECK: movw [[REG2:r[0-9]+]], #16716 ; CHECK: movt [[REG2:r[0-9]+]], #72 -; CHECK: str [[REG2]], [r0, #32] -; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1]! -; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]! +; CHECK: str [[REG2]], [r0] ; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] -; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] +; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r3] tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false) ret void } Index: test/CodeGen/ARM/memset-inline.ll =================================================================== --- test/CodeGen/ARM/memset-inline.ll +++ test/CodeGen/ARM/memset-inline.ll @@ -13,10 +13,10 @@ define void @t2() nounwind ssp { entry: ; CHECK-LABEL: t2: -; CHECK: add.w r1, r0, #10 ; CHECK: vmov.i32 {{q[0-9]+}}, #0x0 -; CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] -; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] +; CHECK: movs r1, #10 +; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r2], r1 +; CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r2] %buf = alloca [26 x i8], align 1 %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0 call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false) Index: test/CodeGen/ARM/vector-load.ll =================================================================== --- test/CodeGen/ARM/vector-load.ll +++ test/CodeGen/ARM/vector-load.ll @@ -253,11 +253,22 @@ } ; CHECK-LABEL: test_silly_load: -; CHECK: ldr {{r[0-9]+}}, [r0, #24] -; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128]! -; CHECK: vldr d{{[0-9]+}}, [r0] +; CHECK: vldr d{{[0-9]+}}, [r0, #16] +; CHECK: movs r1, #24 +; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0:128], r1 +; CHECK: ldr {{r[0-9]+}}, [r0] define void @test_silly_load(<28 x i8>* %addr) { load volatile <28 x i8>, <28 x i8>* %addr ret void } + +define <4 x i32>* @test_vld1_immoffset(<4 x i32>* %ptr.in, <4 x i32>* %ptr.out) { +; CHECK-LABEL: test_vld1_immoffset: +; CHECK: movs [[INC:r[0-9]+]], #32 +; CHECK: vld1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r0], [[INC]] + %val = load <4 x i32>, <4 x i32>* %ptr.in + store <4 x i32> %val, <4 x i32>* %ptr.out + %next = getelementptr <4 x i32>, <4 x i32>* %ptr.in, i32 2 + ret <4 x i32>* %next +} Index: test/CodeGen/ARM/vector-store.ll =================================================================== --- test/CodeGen/ARM/vector-store.ll +++ test/CodeGen/ARM/vector-store.ll @@ -256,3 +256,13 @@ store <4 x i8>* %inc, <4 x i8>** %ptr ret void } + +define <4 x i32>* @test_vst1_1reg(<4 x i32>* %ptr.in, <4 x i32>* %ptr.out) { +; CHECK-LABEL: test_vst1_1reg: +; CHECK: movs [[INC:r[0-9]+]], #32 +; CHECK: vst1.32 {{{d[0-9]+}}, {{d[0-9]+}}}, [r1], [[INC]] + %val = load <4 x i32>, <4 x i32>* %ptr.in + store <4 x i32> %val, <4 x i32>* %ptr.out + %next = getelementptr <4 x i32>, <4 x i32>* %ptr.out, i32 2 + ret <4 x i32>* %next +} Index: test/CodeGen/ARM/vlddup.ll =================================================================== --- test/CodeGen/ARM/vlddup.ll +++ test/CodeGen/ARM/vlddup.ll @@ -310,6 +310,23 @@ ret <4 x i16> %tmp5 } +define <4 x i16> @vld2dupi16_odd_update(i16** %ptr) nounwind { +;CHECK-LABEL: vld2dupi16_odd_update: +;CHECK: mov [[INC:r[0-9]+]], #6 +;CHECK: vld2.16 {d16[], d17[]}, [r1], [[INC]] + %A = load i16*, i16** %ptr + %A2 = bitcast i16* %A to i8* + %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) + %tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0 + %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1 + %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer + %tmp5 = add <4 x i16> %tmp2, %tmp4 + %tmp6 = getelementptr i16, i16* %A, i32 3 + store i16* %tmp6, i16** %ptr + ret <4 x i16> %tmp5 +} + define <2 x i32> @vld2dupi32(i8* %A) nounwind { ;CHECK-LABEL: vld2dupi32: ;Check the alignment value. Max for this instruction is 64 bits: Index: test/CodeGen/ARM/vldlane.ll =================================================================== --- test/CodeGen/ARM/vldlane.ll +++ test/CodeGen/ARM/vldlane.ll @@ -150,6 +150,22 @@ ret <2 x i32> %tmp5 } +define <2 x i32> @vld2lanei32_odd_update(i32** %ptr, <2 x i32>* %B) nounwind { +;CHECK-LABEL: vld2lanei32_odd_update: +;CHECK: mov [[INC:r[0-9]+]], #12 +;CHECK: vld2.32 {d16[1], d17[1]}, [{{r[0-9]+}}], [[INC]] + %A = load i32*, i32** %ptr + %tmp0 = bitcast i32* %A to i8* + %tmp1 = load <2 x i32>, <2 x i32>* %B + %tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) + %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0 + %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1 + %tmp5 = add <2 x i32> %tmp3, %tmp4 + %tmp6 = getelementptr i32, i32* %A, i32 3 + store i32* %tmp6, i32** %ptr + ret <2 x i32> %tmp5 +} + define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind { ;CHECK-LABEL: vld2lanef: ;CHECK: vld2.32 Index: test/CodeGen/ARM/vpadd.ll =================================================================== --- test/CodeGen/ARM/vpadd.ll +++ test/CodeGen/ARM/vpadd.ll @@ -485,6 +485,17 @@ ret <2 x i16> %x } +; And <2 x i8> to <2 x i32> +define <2 x i8> @fromExtendingExtractVectorElt_2i8(<8 x i8> %in) { +; CHECK-LABEL: fromExtendingExtractVectorElt_2i8: +; CHECK: vadd.i32 + %tmp1 = shufflevector <8 x i8> %in, <8 x i8> undef, <2 x i32> + %tmp2 = shufflevector <8 x i8> %in, <8 x i8> undef, <2 x i32> + %x = add <2 x i8> %tmp2, %tmp1 + ret <2 x i8> %x +} + + declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) nounwind readnone declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) nounwind readnone Index: test/CodeGen/Thumb/long.ll =================================================================== --- test/CodeGen/Thumb/long.ll +++ test/CodeGen/Thumb/long.ll @@ -206,3 +206,34 @@ ; CHECK: adds r0, r0, r2 ; CHECK: sbcs r1, r3 } + +declare void @f13(i64 %x) + +define void @f14(i1 %x, i64 %y) #0 { +; CHECK-LABEL: f14: +entry: + %a = add i64 %y, 47 + call void @f13(i64 %a) +; CHECK: bl + br i1 %x, label %if.end, label %if.then + +if.then: + call void @f13(i64 %y) +; CHECK: bl + br label %if.end + +if.end: + %b = add i64 %y, 45 + call void @f13(i64 %b) +; CHECK: adds +; CHECK: adcs +; CHECK: bl + %c = add i64 %y, 47 + call void @f13(i64 %c) +; CHECK: adds +; CHECK-NEXT: adcs +; CHECK: bl + ret void +} + +attributes #0 = { optsize } Index: test/CodeGen/X86/constant-hoisting-bfi.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/constant-hoisting-bfi.ll @@ -0,0 +1,115 @@ +; RUN: opt -consthoist -mtriple=x86_64-unknown-linux-gnu -consthoist-with-block-frequency=true -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Check when BFI is enabled for constant hoisting, constant 214748364701 +; will not be hoisted to the func entry. +; CHECK-LABEL: @foo( +; CHECK: entry: +; CHECK-NOT: bitcast i64 214748364701 to i64 +; CHECK: if.then: + +; Function Attrs: norecurse nounwind uwtable +define i64 @foo(i64* nocapture %a) { +entry: + %arrayidx = getelementptr inbounds i64, i64* %a, i64 9 + %t0 = load i64, i64* %arrayidx, align 8 + %cmp = icmp slt i64 %t0, 564 + br i1 %cmp, label %if.then, label %if.else5 + +if.then: ; preds = %entry + %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 5 + %t1 = load i64, i64* %arrayidx1, align 8 + %cmp2 = icmp slt i64 %t1, 1009 + br i1 %cmp2, label %if.then3, label %return + +if.then3: ; preds = %if.then + %arrayidx4 = getelementptr inbounds i64, i64* %a, i64 6 + %t2 = load i64, i64* %arrayidx4, align 8 + %inc = add nsw i64 %t2, 1 + store i64 %inc, i64* %arrayidx4, align 8 + br label %return + +if.else5: ; preds = %entry + %arrayidx6 = getelementptr inbounds i64, i64* %a, i64 6 + %t3 = load i64, i64* %arrayidx6, align 8 + %cmp7 = icmp slt i64 %t3, 3512 + br i1 %cmp7, label %if.then8, label %return + +if.then8: ; preds = %if.else5 + %arrayidx9 = getelementptr inbounds i64, i64* %a, i64 7 + %t4 = load i64, i64* %arrayidx9, align 8 + %inc10 = add nsw i64 %t4, 1 + store i64 %inc10, i64* %arrayidx9, align 8 + br label %return + +return: ; preds = %if.else5, %if.then, %if.then8, %if.then3 + %retval.0 = phi i64 [ 214748364701, %if.then3 ], [ 214748364701, %if.then8 ], [ 250148364702, %if.then ], [ 256148364704, %if.else5 ] + ret i64 %retval.0 +} + +; Check when BFI is enabled for constant hoisting, constant 214748364701 +; in while.body will be hoisted to while.body.preheader. 214748364701 in +; if.then16 and if.else10 will be merged and hoisted to the beginning of +; if.else10 because if.else10 dominates if.then16. +; CHECK-LABEL: @goo( +; CHECK: entry: +; CHECK-NOT: bitcast i64 214748364701 to i64 +; CHECK: while.body.preheader: +; CHECK-NEXT: bitcast i64 214748364701 to i64 +; CHECK-NOT: bitcast i64 214748364701 to i64 +; CHECK: if.else10: +; CHECK-NEXT: bitcast i64 214748364701 to i64 +; CHECK-NOT: bitcast i64 214748364701 to i64 +define i64 @goo(i64* nocapture %a) { +entry: + %arrayidx = getelementptr inbounds i64, i64* %a, i64 9 + %t0 = load i64, i64* %arrayidx, align 8 + %cmp = icmp ult i64 %t0, 56 + br i1 %cmp, label %if.then, label %if.else10, !prof !0 + +if.then: ; preds = %entry + %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 5 + %t1 = load i64, i64* %arrayidx1, align 8 + %cmp2 = icmp ult i64 %t1, 10 + br i1 %cmp2, label %while.cond.preheader, label %return, !prof !0 + +while.cond.preheader: ; preds = %if.then + %arrayidx7 = getelementptr inbounds i64, i64* %a, i64 6 + %t2 = load i64, i64* %arrayidx7, align 8 + %cmp823 = icmp ugt i64 %t2, 10000 + br i1 %cmp823, label %while.body.preheader, label %return + +while.body.preheader: ; preds = %while.cond.preheader + br label %while.body + +while.body: ; preds = %while.body.preheader, %while.body + %t3 = phi i64 [ %add, %while.body ], [ %t2, %while.body.preheader ] + %add = add i64 %t3, 214748364701 + %cmp8 = icmp ugt i64 %add, 10000 + br i1 %cmp8, label %while.body, label %while.cond.return.loopexit_crit_edge + +if.else10: ; preds = %entry + %arrayidx11 = getelementptr inbounds i64, i64* %a, i64 6 + %t4 = load i64, i64* %arrayidx11, align 8 + %add2 = add i64 %t4, 214748364701 + %cmp12 = icmp ult i64 %add2, 35 + br i1 %cmp12, label %if.then16, label %return, !prof !0 + +if.then16: ; preds = %if.else10 + %arrayidx17 = getelementptr inbounds i64, i64* %a, i64 7 + %t5 = load i64, i64* %arrayidx17, align 8 + %inc = add i64 %t5, 1 + store i64 %inc, i64* %arrayidx17, align 8 + br label %return + +while.cond.return.loopexit_crit_edge: ; preds = %while.body + store i64 %add, i64* %arrayidx7, align 8 + br label %return + +return: ; preds = %while.cond.preheader, %while.cond.return.loopexit_crit_edge, %if.else10, %if.then, %if.then16 + %retval.0 = phi i64 [ 214748364701, %if.then16 ], [ 0, %if.then ], [ 0, %if.else10 ], [ 0, %while.cond.return.loopexit_crit_edge ], [ 0, %while.cond.preheader ] + ret i64 %retval.0 +} + +!0 = !{!"branch_weights", i32 1, i32 2000} Index: test/CodeGen/X86/fold-tied-op.ll =================================================================== --- test/CodeGen/X86/fold-tied-op.ll +++ test/CodeGen/X86/fold-tied-op.ll @@ -7,7 +7,6 @@ ; CHECK-LABEL: fn1 ; CHECK: addl {{.*#+}} 4-byte Folded Reload -; CHECK: addl {{.*#+}} 4-byte Folded Reload ; CHECK: imull {{.*#+}} 4-byte Folded Reload ; CHECK: orl {{.*#+}} 4-byte Folded Reload ; CHECK: retl Index: test/CodeGen/X86/memcpy-struct-by-value.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/memcpy-struct-by-value.ll @@ -0,0 +1,48 @@ +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=-ermsb < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOFAST +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+ermsb < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=FAST +; RUN: llc -mtriple=i686-linux-gnu -mattr=-ermsb < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOFAST32 +; RUN: llc -mtriple=i686-linux-gnu -mattr=+ermsb < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=FAST +; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=generic < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOFAST +; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=haswell < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=FAST +; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=skylake < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=FAST +; FIXME: The documentation states that ivybridge has ermsb, but this is not +; enabled right now since I could not confirm by testing. +; RUN: llc -mtriple=x86_64-linux-gnu -mcpu=ivybridge < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOFAST + +%struct.large = type { [4096 x i8] } + +declare void @foo(%struct.large* align 8 byval) nounwind + +define void @test1(%struct.large* nocapture %x) nounwind { + call void @foo(%struct.large* align 8 byval %x) + ret void + +; ALL-LABEL: test1: +; NOFAST: rep;movsq +; NOFAST32: rep;movsl +; FAST: rep;movsb +} + +define void @test2(%struct.large* nocapture %x) nounwind minsize { + call void @foo(%struct.large* align 8 byval %x) + ret void + +; ALL-LABEL: test2: +; NOFAST: rep;movsq +; NOFAST32: rep;movsl +; FAST: rep;movsb +} + +%struct.large_oddsize = type { [4095 x i8] } + +declare void @foo_oddsize(%struct.large_oddsize* align 8 byval) nounwind + +define void @test3(%struct.large_oddsize* nocapture %x) nounwind minsize { + call void @foo_oddsize(%struct.large_oddsize* align 8 byval %x) + ret void + +; ALL-LABEL: test3: +; NOFAST: rep;movsb +; NOFAST32: rep;movsb +; FAST: rep;movsb +} Index: test/DebugInfo/AMDGPU/pointer-address-space-dwarf-v1.ll =================================================================== --- test/DebugInfo/AMDGPU/pointer-address-space-dwarf-v1.ll +++ /dev/null @@ -1,70 +0,0 @@ -; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s - -; LLVM IR generated with the following command and OpenCL source: -; -; $clang -cl-std=CL2.0 -g -O0 -target amdgcn-amd-amdhsa -S -emit-llvm -; -; kernel void kernel1() { -; global int *FuncVar0 = 0; -; constant int *FuncVar1 = 0; -; local int *FuncVar2 = 0; -; private int *FuncVar3 = 0; -; int *FuncVar4 = 0; -; } - -; DW_AT_address_class is available since Dwarf Version 2. -; CHECK-NOT: DW_AT_address_class - -declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 - -define amdgpu_kernel void @kernel1() #0 !dbg !7 { -entry: - %FuncVar0 = alloca i32 addrspace(1)*, align 4 - %FuncVar1 = alloca i32 addrspace(2)*, align 4 - %FuncVar2 = alloca i32 addrspace(3)*, align 4 - %FuncVar3 = alloca i32*, align 4 - %FuncVar4 = alloca i32 addrspace(4)*, align 4 - call void @llvm.dbg.declare(metadata i32 addrspace(1)** %FuncVar0, metadata !10, metadata !13), !dbg !14 - store i32 addrspace(1)* null, i32 addrspace(1)** %FuncVar0, align 4, !dbg !14 - call void @llvm.dbg.declare(metadata i32 addrspace(2)** %FuncVar1, metadata !15, metadata !13), !dbg !16 - store i32 addrspace(2)* null, i32 addrspace(2)** %FuncVar1, align 4, !dbg !16 - call void @llvm.dbg.declare(metadata i32 addrspace(3)** %FuncVar2, metadata !17, metadata !13), !dbg !19 - store i32 addrspace(3)* addrspacecast (i32 addrspace(4)* null to i32 addrspace(3)*), i32 addrspace(3)** %FuncVar2, align 4, !dbg !19 - call void @llvm.dbg.declare(metadata i32** %FuncVar3, metadata !20, metadata !13), !dbg !22 - store i32* addrspacecast (i32 addrspace(4)* null to i32*), i32** %FuncVar3, align 4, !dbg !22 - call void @llvm.dbg.declare(metadata i32 addrspace(4)** %FuncVar4, metadata !23, metadata !13), !dbg !24 - store i32 addrspace(4)* null, i32 addrspace(4)** %FuncVar4, align 4, !dbg !24 - ret void, !dbg !25 -} - -!llvm.dbg.cu = !{!0} -!opencl.ocl.version = !{!3} -!llvm.module.flags = !{!4, !5} -!llvm.ident = !{!6} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) -!1 = !DIFile(filename: "pointer-address-space-dwarf-v1.cl", directory: "/some/random/directory") -!2 = !{} -!3 = !{i32 2, i32 0} -!4 = !{i32 2, !"Dwarf Version", i32 1} -!5 = !{i32 2, !"Debug Info Version", i32 3} -!6 = !{!""} -!7 = distinct !DISubprogram(name: "kernel1", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, unit: !0, variables: !2) -!8 = !DISubroutineType(types: !9) -!9 = !{null} -!10 = !DILocalVariable(name: "FuncVar0", scope: !7, file: !1, line: 2, type: !11) -!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64) -!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!13 = !DIExpression() -!14 = !DILocation(line: 2, column: 15, scope: !7) -!15 = !DILocalVariable(name: "FuncVar1", scope: !7, file: !1, line: 3, type: !11) -!16 = !DILocation(line: 3, column: 17, scope: !7) -!17 = !DILocalVariable(name: "FuncVar2", scope: !7, file: !1, line: 4, type: !18) -!18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 32, dwarfAddressSpace: 2) -!19 = !DILocation(line: 4, column: 14, scope: !7) -!20 = !DILocalVariable(name: "FuncVar3", scope: !7, file: !1, line: 5, type: !21) -!21 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 32, dwarfAddressSpace: 1) -!22 = !DILocation(line: 5, column: 16, scope: !7) -!23 = !DILocalVariable(name: "FuncVar4", scope: !7, file: !1, line: 6, type: !11) -!24 = !DILocation(line: 6, column: 8, scope: !7) -!25 = !DILocation(line: 7, column: 1, scope: !7) Index: test/DebugInfo/AMDGPU/variable-locations-dwarf-v1.ll =================================================================== --- test/DebugInfo/AMDGPU/variable-locations-dwarf-v1.ll +++ /dev/null @@ -1,92 +0,0 @@ -; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck %s - -; LLVM IR generated with the following command and OpenCL source: -; -; $clang -cl-std=CL2.0 -g -O0 -target amdgcn-amd-amdhsa -S -emit-llvm -; -; global int GlobA; -; global int GlobB; -; -; kernel void kernel1(unsigned int ArgN, global int *ArgA, global int *ArgB) { -; ArgA[ArgN] += ArgB[ArgN]; -; } - -declare void @llvm.dbg.declare(metadata, metadata, metadata) - -; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x05> 03 00 00 00 00 ) -@GlobA = common addrspace(1) global i32 0, align 4, !dbg !0 -; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x05> 03 00 00 00 00 ) -@GlobB = common addrspace(1) global i32 0, align 4, !dbg !6 - -define amdgpu_kernel void @kernel1( -; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x06> 91 04 10 01 16 18 ) - i32 %ArgN, -; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x06> 91 08 10 01 16 18 ) - i32 addrspace(1)* %ArgA, -; CHECK-NOT: DW_AT_location [DW_FORM_block1] (<0x06> 91 10 10 01 16 18 ) - i32 addrspace(1)* %ArgB) !dbg !13 { -entry: - %ArgN.addr = alloca i32, align 4 - %ArgA.addr = alloca i32 addrspace(1)*, align 4 - %ArgB.addr = alloca i32 addrspace(1)*, align 4 - store i32 %ArgN, i32* %ArgN.addr, align 4 - call void @llvm.dbg.declare(metadata i32* %ArgN.addr, metadata !22, metadata !23), !dbg !24 - store i32 addrspace(1)* %ArgA, i32 addrspace(1)** %ArgA.addr, align 4 - call void @llvm.dbg.declare(metadata i32 addrspace(1)** %ArgA.addr, metadata !25, metadata !23), !dbg !26 - store i32 addrspace(1)* %ArgB, i32 addrspace(1)** %ArgB.addr, align 4 - call void @llvm.dbg.declare(metadata i32 addrspace(1)** %ArgB.addr, metadata !27, metadata !23), !dbg !28 - %0 = load i32 addrspace(1)*, i32 addrspace(1)** %ArgB.addr, align 4, !dbg !29 - %1 = load i32, i32* %ArgN.addr, align 4, !dbg !30 - %idxprom = zext i32 %1 to i64, !dbg !29 - %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i64 %idxprom, !dbg !29 - %2 = load i32, i32 addrspace(1)* %arrayidx, align 4, !dbg !29 - %3 = load i32 addrspace(1)*, i32 addrspace(1)** %ArgA.addr, align 4, !dbg !31 - %4 = load i32, i32* %ArgN.addr, align 4, !dbg !32 - %idxprom1 = zext i32 %4 to i64, !dbg !31 - %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %3, i64 %idxprom1, !dbg !31 - %5 = load i32, i32 addrspace(1)* %arrayidx2, align 4, !dbg !33 - %add = add nsw i32 %5, %2, !dbg !33 - store i32 %add, i32 addrspace(1)* %arrayidx2, align 4, !dbg !33 - ret void, !dbg !34 -} - -!llvm.dbg.cu = !{!2} -!opencl.ocl.version = !{!9} -!llvm.module.flags = !{!10, !11} -!llvm.ident = !{!12} - -!0 = !DIGlobalVariableExpression(var: !1) -!1 = distinct !DIGlobalVariable(name: "GlobA", scope: !2, file: !3, line: 1, type: !8, isLocal: false, isDefinition: true) -!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 5.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5) -!3 = !DIFile(filename: "variable-locations-dwarf-v1.cl", directory: "/some/random/directory") -!4 = !{} -!5 = !{!0, !6} -!6 = !DIGlobalVariableExpression(var: !7) -!7 = distinct !DIGlobalVariable(name: "GlobB", scope: !2, file: !3, line: 2, type: !8, isLocal: false, isDefinition: true) -!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!9 = !{i32 2, i32 0} -!10 = !{i32 2, !"Dwarf Version", i32 1} -!11 = !{i32 2, !"Debug Info Version", i32 3} -!12 = !{!"clang version 5.0.0"} -!13 = distinct !DISubprogram(name: "kernel1", scope: !3, file: !3, line: 4, type: !14, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, unit: !2, variables: !4) -!14 = !DISubroutineType(types: !15) -!15 = !{null, !16, !17, !17} -!16 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) -!17 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64) -!18 = !{i32 0, i32 1, i32 1} -!19 = !{!"none", !"none", !"none"} -!20 = !{!"uint", !"int*", !"int*"} -!21 = !{!"", !"", !""} -!22 = !DILocalVariable(name: "ArgN", arg: 1, scope: !13, file: !3, line: 4, type: !16) -!23 = !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef) -!24 = !DILocation(line: 4, column: 34, scope: !13) -!25 = !DILocalVariable(name: "ArgA", arg: 2, scope: !13, file: !3, line: 4, type: !17) -!26 = !DILocation(line: 4, column: 52, scope: !13) -!27 = !DILocalVariable(name: "ArgB", arg: 3, scope: !13, file: !3, line: 4, type: !17) -!28 = !DILocation(line: 4, column: 70, scope: !13) -!29 = !DILocation(line: 5, column: 17, scope: !13) -!30 = !DILocation(line: 5, column: 22, scope: !13) -!31 = !DILocation(line: 5, column: 3, scope: !13) -!32 = !DILocation(line: 5, column: 8, scope: !13) -!33 = !DILocation(line: 5, column: 14, scope: !13) -!34 = !DILocation(line: 6, column: 1, scope: !13) Index: test/DebugInfo/X86/dw_op_minus_direct.ll =================================================================== --- test/DebugInfo/X86/dw_op_minus_direct.ll +++ test/DebugInfo/X86/dw_op_minus_direct.ll @@ -1,11 +1,20 @@ ; Test dwarf codegen of DW_OP_minus. ; RUN: llc -filetype=obj < %s | llvm-dwarfdump - | FileCheck %s +; RUN: llc -dwarf-version=2 -filetype=obj < %s | llvm-dwarfdump - \ +; RUN: | FileCheck %s --check-prefix=DWARF2 +; RUN: llc -dwarf-version=3 -filetype=obj < %s | llvm-dwarfdump - \ +; RUN: | FileCheck %s --check-prefix=DWARF2 ; This was derived manually from: ; int inc(int i) { ; return i+1; ; } +; DWARF2: .debug_info +; DWARF2: DW_TAG_formal_parameter +; DWARF2-NEXT: DW_AT_name {{.*}}"i" +; DWARF2-NOT: DW_AT_location + ; CHECK: Beginning address offset: 0x0000000000000000 ; CHECK: Ending address offset: 0x0000000000000004 ; CHECK: Location description: 70 00 10 ff ff ff ff 0f 1a 10 01 1c 9f Index: test/TableGen/GlobalISelEmitter.td =================================================================== --- test/TableGen/GlobalISelEmitter.td +++ test/TableGen/GlobalISelEmitter.td @@ -29,7 +29,31 @@ def Z : OperandWithDefaultOps ; def m1Z : OperandWithDefaultOps ; -//===- Test the function definition boilerplate. --------------------------===// +def HasA : Predicate<"Subtarget->hasA()">; +def HasB : Predicate<"Subtarget->hasB()">; + +//===- Test the function boilerplate. -------------------------------------===// + +// CHECK-LABEL: enum SubtargetFeatureBits : uint8_t { +// CHECK-NEXT: Feature_HasABit = 0, +// CHECK-NEXT: Feature_HasBBit = 1, +// CHECK-NEXT: }; + +// CHECK-LABEL: static const char *SubtargetFeatureNames[] = { +// CHECK-NEXT: "Feature_HasA", +// CHECK-NEXT: "Feature_HasB", +// CHECK-NEXT: nullptr +// CHECK-NEXT: }; + +// CHECK-LABEL: PredicateBitset MyTargetInstructionSelector:: +// CHECK-NEXT: computeAvailableFeatures(const MachineFunction *MF, const MyTargetSubtarget *Subtarget) const { +// CHECK-NEXT: PredicateBitset Features; +// CHECK-NEXT: if (Subtarget->hasA()) +// CHECK-NEXT: Features[Feature_HasABit] = 1; +// CHECK-NEXT: if (Subtarget->hasB()) +// CHECK-NEXT: Features[Feature_HasBBit] = 1; +// CHECK-NEXT: return Features; +// CHECK-NEXT: } // CHECK: bool MyTargetInstructionSelector::selectImpl(MachineInstr &I) const { // CHECK: MachineFunction &MF = *I.getParent()->getParent(); @@ -103,6 +127,9 @@ //===- Test a nested instruction match. -----------------------------------===// // CHECK-LABEL: if ([&]() { +// CHECK-NEXT: PredicateBitset ExpectedFeatures = {Feature_HasABit}; +// CHECK-NEXT: if ((AvailableFeatures & ExpectedFeatures) != ExpectedFeatures) +// CHECK-NEXT: return false; // CHECK-NEXT: MachineInstr &MI0 = I; // CHECK-NEXT: if (MI0.getNumOperands() < 3) // CHECK-NEXT: return false; @@ -142,6 +169,9 @@ // We also get a second rule by commutativity. // CHECK-LABEL: if ([&]() { +// CHECK-NEXT: PredicateBitset ExpectedFeatures = {Feature_HasABit}; +// CHECK-NEXT: if ((AvailableFeatures & ExpectedFeatures) != ExpectedFeatures) +// CHECK-NEXT: return false; // CHECK-NEXT: MachineInstr &MI0 = I; // CHECK-NEXT: if (MI0.getNumOperands() < 3) // CHECK-NEXT: return false; @@ -181,11 +211,15 @@ def MULADD : I<(outs GPR32:$dst), (ins GPR32:$src1, GPR32:$src2, GPR32:$src3), [(set GPR32:$dst, - (mul (add GPR32:$src1, GPR32:$src2), GPR32:$src3))]>; + (mul (add GPR32:$src1, GPR32:$src2), GPR32:$src3))]>, + Requires<[HasA]>; //===- Test another simple pattern with regclass operands. ----------------===// // CHECK-LABEL: if ([&]() { +// CHECK-NEXT: PredicateBitset ExpectedFeatures = {Feature_HasABit, Feature_HasBBit}; +// CHECK-NEXT: if ((AvailableFeatures & ExpectedFeatures) != ExpectedFeatures) +// CHECK-NEXT: return false; // CHECK-NEXT: MachineInstr &MI0 = I; // CHECK-NEXT: if (MI0.getNumOperands() < 3) // CHECK-NEXT: return false; @@ -213,7 +247,8 @@ // CHECK-NEXT: }()) { return true; } def MUL : I<(outs GPR32:$dst), (ins GPR32:$src2, GPR32:$src1), - [(set GPR32:$dst, (mul GPR32:$src1, GPR32:$src2))]>; + [(set GPR32:$dst, (mul GPR32:$src1, GPR32:$src2))]>, + Requires<[HasA, HasB]>; //===- Test a pattern with ComplexPattern operands. -----------------------===// // Index: test/TableGen/intrinsic-long-name.td =================================================================== --- test/TableGen/intrinsic-long-name.td +++ test/TableGen/intrinsic-long-name.td @@ -22,7 +22,7 @@ list IntrProperties = []; } -def iAny : ValueType<0, 125>; +def iAny : ValueType<0, 253>; def llvm_anyint_ty : LLVMType; // Make sure we generate the long name without crashing Index: test/TableGen/intrinsic-varargs.td =================================================================== --- test/TableGen/intrinsic-varargs.td +++ test/TableGen/intrinsic-varargs.td @@ -23,7 +23,7 @@ } // isVoid needs to match the definition in ValueTypes.td -def isVoid : ValueType<0, 66>; // Produces no value +def isVoid : ValueType<0, 108>; // Produces no value def llvm_vararg_ty : LLVMType; // this means vararg here // CHECK: /* 0 */ 0, 29, 0, Index: test/Transforms/CodeExtractor/unreachable-block.ll =================================================================== --- /dev/null +++ test/Transforms/CodeExtractor/unreachable-block.ll @@ -0,0 +1,38 @@ +; RUN: opt -S -partial-inliner %s | FileCheck %s + +; CHECK-LABEL: define void @dipsy( +; CHECK-NEXT: call void @tinkywinky.1_ontrue() +; CHECK-NEXT: call void @patatuccio() +; CHECK-NEXT: ret void +; CHECK-NEXT: } + +; CHECK-LABEL: define internal void @tinkywinky.1_ontrue() { +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label %ontrue +; CHECK: .exitStub: +; CHECK-NEXT: ret void +; CHECK: ontrue: +; CHECK-NEXT: call void @patatino() +; CHECK-NEXT: br label %onfalse +; CHECK: onfalse: +; CHECK-NEXT: br label %.exitStub +; CHECK-NEXT: } + +declare void @patatino() +declare void @patatuccio() + +define fastcc void @tinkywinky() { + br i1 true, label %ontrue, label %onfalse +ontrue: + call void @patatino() + br label %onfalse +onfalse: + call void @patatuccio() + ret void +cantreachme: + ret void +} +define void @dipsy() { + call fastcc void @tinkywinky() + ret void +} Index: test/Transforms/ConstantHoisting/X86/ehpad.ll =================================================================== --- test/Transforms/ConstantHoisting/X86/ehpad.ll +++ test/Transforms/ConstantHoisting/X86/ehpad.ll @@ -1,4 +1,5 @@ ; RUN: opt -S -consthoist < %s | FileCheck %s +; RUN: opt -S -consthoist -consthoist-with-block-frequency=true < %s | FileCheck --check-prefix=BFIHOIST %s ; FIXME: The catchpad doesn't even use the constant, so a better fix would be to ; insert the bitcast in the catchpad block. @@ -11,6 +12,16 @@ ; CHECK-NEXT: bitcast i64 9209618997431186100 to i64 ; CHECK-NEXT: br i1 %tobool +; BFIHOIST-LABEL: define i32 @main +; BFIHOIST: then: +; BFIHOIST: %[[CONST1:.*]] = bitcast i64 9209618997431186100 to i64 +; BFIHOIST: %add = add i64 %call4, %[[CONST1]] +; BFIHOIST: br label %endif +; BFIHOIST: else: +; BFIHOIST: %[[CONST2:.*]] = bitcast i64 9209618997431186100 to i64 +; BFIHOIST: %add6 = add i64 %call5, %[[CONST2]] +; BFIHOIST: br label %endif + ; Function Attrs: norecurse define i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { %call = tail call i64 @fn(i64 0) Index: test/Transforms/InstCombine/add-sitofp.ll =================================================================== --- test/Transforms/InstCombine/add-sitofp.ll +++ test/Transforms/InstCombine/add-sitofp.ll @@ -15,3 +15,88 @@ %p = fadd double %o, 1.0 ret double %p } + +define double @test(i32 %a) { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823 +; CHECK-NEXT: [[ADDCONV:%.*]] = add nuw nsw i32 [[A_AND]], 1 +; CHECK-NEXT: [[RES:%.*]] = sitofp i32 [[ADDCONV]] to double +; CHECK-NEXT: ret double [[RES]] +; + ; Drop two highest bits to guarantee that %a + 1 doesn't overflow + %a_and = and i32 %a, 1073741823 + %a_and_fp = sitofp i32 %a_and to double + %res = fadd double %a_and_fp, 1.0 + ret double %res +} + +define float @test_neg(i32 %a) { +; CHECK-LABEL: @test_neg( +; CHECK-NEXT: [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823 +; CHECK-NEXT: [[A_AND_FP:%.*]] = sitofp i32 [[A_AND]] to float +; CHECK-NEXT: [[RES:%.*]] = fadd float [[A_AND_FP]], 1.000000e+00 +; CHECK-NEXT: ret float [[RES]] +; + ; Drop two highest bits to guarantee that %a + 1 doesn't overflow + %a_and = and i32 %a, 1073741823 + %a_and_fp = sitofp i32 %a_and to float + %res = fadd float %a_and_fp, 1.0 + ret float %res +} + +define double @test_2(i32 %a, i32 %b) { +; CHECK-LABEL: @test_2( +; CHECK-NEXT: [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823 +; CHECK-NEXT: [[B_AND:%.*]] = and i32 [[B:%.*]], 1073741823 +; CHECK-NEXT: [[ADDCONV:%.*]] = add nuw nsw i32 [[A_AND]], [[B_AND]] +; CHECK-NEXT: [[RES:%.*]] = sitofp i32 [[ADDCONV]] to double +; CHECK-NEXT: ret double [[RES]] +; + ; Drop two highest bits to guarantee that %a + %b doesn't overflow + %a_and = and i32 %a, 1073741823 + %b_and = and i32 %b, 1073741823 + + %a_and_fp = sitofp i32 %a_and to double + %b_and_fp = sitofp i32 %b_and to double + + %res = fadd double %a_and_fp, %b_and_fp + ret double %res +} + +define float @test_2_neg(i32 %a, i32 %b) { +; CHECK-LABEL: @test_2_neg( +; CHECK-NEXT: [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823 +; CHECK-NEXT: [[B_AND:%.*]] = and i32 [[B:%.*]], 1073741823 +; CHECK-NEXT: [[A_AND_FP:%.*]] = sitofp i32 [[A_AND]] to float +; CHECK-NEXT: [[B_AND_FP:%.*]] = sitofp i32 [[B_AND]] to float +; CHECK-NEXT: [[RES:%.*]] = fadd float [[A_AND_FP]], [[B_AND_FP]] +; CHECK-NEXT: ret float [[RES]] +; + ; Drop two highest bits to guarantee that %a + %b doesn't overflow + %a_and = and i32 %a, 1073741823 + %b_and = and i32 %b, 1073741823 + + %a_and_fp = sitofp i32 %a_and to float + %b_and_fp = sitofp i32 %b_and to float + + %res = fadd float %a_and_fp, %b_and_fp + ret float %res +} + +; This test demonstrates overly conservative legality check. The float addition +; can be replaced with the integer addition because the result of the operation +; can be represented in float, but we don't do that now. +define float @test_3(i32 %a, i32 %b) { +; CHECK-LABEL: @test_3( +; CHECK-NEXT: [[M:%.*]] = lshr i32 [[A:%.*]], 24 +; CHECK-NEXT: [[N:%.*]] = and i32 [[M]], [[B:%.*]] +; CHECK-NEXT: [[O:%.*]] = sitofp i32 [[N]] to float +; CHECK-NEXT: [[P:%.*]] = fadd float [[O]], 1.000000e+00 +; CHECK-NEXT: ret float [[P]] +; + %m = lshr i32 %a, 24 + %n = and i32 %m, %b + %o = sitofp i32 %n to float + %p = fadd float %o, 1.0 + ret float %p +} Index: test/Transforms/InstCombine/apint-shift.ll =================================================================== --- test/Transforms/InstCombine/apint-shift.ll +++ test/Transforms/InstCombine/apint-shift.ll @@ -287,13 +287,10 @@ ret i47 %sh2 } -; FIXME: Same as above with vectors. - define <2 x i47> @test12_splat_vec(<2 x i47> %X) { ; CHECK-LABEL: @test12_splat_vec( -; CHECK-NEXT: [[SH1:%.*]] = ashr <2 x i47> %X, -; CHECK-NEXT: [[SH2:%.*]] = shl nsw <2 x i47> [[SH1]], -; CHECK-NEXT: ret <2 x i47> [[SH2]] +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i47> %X, +; CHECK-NEXT: ret <2 x i47> [[TMP1]] ; %sh1 = ashr <2 x i47> %X, %sh2 = shl <2 x i47> %sh1, Index: test/Transforms/InstCombine/pr17827.ll =================================================================== --- test/Transforms/InstCombine/pr17827.ll +++ test/Transforms/InstCombine/pr17827.ll @@ -52,9 +52,7 @@ define <2 x i1> @test_shift_and_cmp_changed1_vec(<2 x i8> %p, <2 x i8> %q) { ; CHECK-LABEL: @test_shift_and_cmp_changed1_vec( ; CHECK-NEXT: [[ANDP:%.*]] = and <2 x i8> %p, -; CHECK-NEXT: [[ANDQ:%.*]] = and <2 x i8> %q, -; CHECK-NEXT: [[OR:%.*]] = or <2 x i8> [[ANDQ]], [[ANDP]] -; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i8> [[OR]], +; CHECK-NEXT: [[SHL:%.*]] = shl nuw <2 x i8> [[ANDP]], ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> [[SHL]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; Index: test/Transforms/InstCombine/shift.ll =================================================================== --- test/Transforms/InstCombine/shift.ll +++ test/Transforms/InstCombine/shift.ll @@ -1049,12 +1049,11 @@ } ; (X << C1) >>u C2 --> X << (C1 - C2) & (-1 >> C2) -; FIXME: Demanded bits should change the mask constant as it does for the scalar case. define <2 x i8> @test53_no_nuw_splat_vec(<2 x i8> %x) { ; CHECK-LABEL: @test53_no_nuw_splat_vec( ; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> %x, -; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[TMP1]], +; CHECK-NEXT: [[B:%.*]] = and <2 x i8> [[TMP1]], ; CHECK-NEXT: ret <2 x i8> [[B]] ; %A = shl <2 x i8> %x, @@ -1074,6 +1073,17 @@ ret i32 %and } +define <2 x i32> @test54_splat_vec(<2 x i32> %x) { +; CHECK-LABEL: @test54_splat_vec( +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> %x, +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: ret <2 x i32> [[AND]] +; + %shr2 = lshr <2 x i32> %x, + %shl = shl <2 x i32> %shr2, + %and = and <2 x i32> %shl, + ret <2 x i32> %and +} define i32 @test55(i32 %x) { ; CHECK-LABEL: @test55( @@ -1100,7 +1110,6 @@ ret i32 %or } - define i32 @test57(i32 %x) { ; CHECK-LABEL: @test57( ; CHECK-NEXT: [[SHR1:%.*]] = lshr i32 %x, 1 @@ -1114,7 +1123,6 @@ ret i32 %or } - define i32 @test58(i32 %x) { ; CHECK-LABEL: @test58( ; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 %x, 3 @@ -1127,6 +1135,17 @@ ret i32 %or } +define <2 x i32> @test58_splat_vec(<2 x i32> %x) { +; CHECK-LABEL: @test58_splat_vec( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <2 x i32> %x, +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[TMP1]], +; CHECK-NEXT: ret <2 x i32> [[OR]] +; + %shr = ashr <2 x i32> %x, + %shl = shl <2 x i32> %shr, + %or = or <2 x i32> %shl, + ret <2 x i32> %or +} define i32 @test59(i32 %x) { ; CHECK-LABEL: @test59( @@ -1257,8 +1276,7 @@ define <2 x i64> @test_64_splat_vec(<2 x i32> %t) { ; CHECK-LABEL: @test_64_splat_vec( -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> %t, -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw <2 x i32> [[AND]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> %t, ; CHECK-NEXT: [[SHL:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[SHL]] ; @@ -1268,3 +1286,23 @@ ret <2 x i64> %shl } +define <2 x i8> @ashr_demanded_bits_splat(<2 x i8> %x) { +; CHECK-LABEL: @ashr_demanded_bits_splat( +; CHECK-NEXT: [[SHR:%.*]] = ashr <2 x i8> %x, +; CHECK-NEXT: ret <2 x i8> [[SHR]] +; + %and = and <2 x i8> %x, + %shr = ashr <2 x i8> %and, + ret <2 x i8> %shr +} + +define <2 x i8> @lshr_demanded_bits_splat(<2 x i8> %x) { +; CHECK-LABEL: @lshr_demanded_bits_splat( +; CHECK-NEXT: [[SHR:%.*]] = lshr <2 x i8> %x, +; CHECK-NEXT: ret <2 x i8> [[SHR]] +; + %and = and <2 x i8> %x, + %shr = lshr <2 x i8> %and, + ret <2 x i8> %shr +} + Index: test/Transforms/InstCombine/vector-casts.ll =================================================================== --- test/Transforms/InstCombine/vector-casts.ll +++ test/Transforms/InstCombine/vector-casts.ll @@ -15,9 +15,9 @@ ; The ashr turns into an lshr. define <2 x i64> @test2(<2 x i64> %a) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[B:%.*]] = and <2 x i64> %a, -; CHECK-NEXT: [[T:%.*]] = lshr <2 x i64> [[B]], -; CHECK-NEXT: ret <2 x i64> [[T]] +; CHECK-NEXT: [[B:%.*]] = and <2 x i64> %a, +; CHECK-NEXT: [[TMP1:%.*]] = lshr exact <2 x i64> [[B]], +; CHECK-NEXT: ret <2 x i64> [[TMP1]] ; %b = and <2 x i64> %a, %t = ashr <2 x i64> %b, Index: test/Transforms/InstCombine/xor.ll =================================================================== --- test/Transforms/InstCombine/xor.ll +++ test/Transforms/InstCombine/xor.ll @@ -536,3 +536,20 @@ %xor = xor i32 %and, %B ret i32 %xor } + +; PR32706 - https://bugs.llvm.org/show_bug.cgi?id=32706 +; Pin an xor constant operand to -1 if possible because 'not' is better for SCEV and codegen. + +define i32 @not_is_canonical(i32 %x, i32 %y) { +; CHECK-LABEL: @not_is_canonical( +; CHECK-NEXT: [[SUB:%.*]] = xor i32 %x, -1 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], %y +; CHECK-NEXT: [[MUL:%.*]] = shl i32 [[ADD]], 2 +; CHECK-NEXT: ret i32 [[MUL]] +; + %sub = xor i32 %x, 1073741823 + %add = add i32 %sub, %y + %mul = shl i32 %add, 2 + ret i32 %mul +} + Index: test/Transforms/InstCombine/zext.ll =================================================================== --- test/Transforms/InstCombine/zext.ll +++ test/Transforms/InstCombine/zext.ll @@ -35,7 +35,7 @@ define <2 x i64> @test4(<2 x i64> %A) { ; CHECK-LABEL: @test4( -; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i64> %A, +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i64> %A, ; CHECK-NEXT: [[XOR:%.*]] = and <2 x i64> [[TMP1]], ; CHECK-NEXT: ret <2 x i64> [[XOR]] ; Index: test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll =================================================================== --- test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll +++ test/Transforms/LoopStrengthReduce/ARM/ivchain-ARM.ll @@ -198,7 +198,7 @@ ; @testNeon is an important example of the nead for ivchains. ; -; Currently we have three extra add.w's that keep the store address +; Currently we have two extra add.w's that keep the store address ; live past the next increment because ISEL is unfortunately undoing ; the store chain. ISEL also fails to convert all but one of the stores to ; post-increment addressing. However, the loads should use @@ -207,12 +207,10 @@ ; ; A9: testNeon: ; A9: %.lr.ph -; A9-NOT: lsl.w -; A9-NOT: {{ldr|str|adds|add r}} -; A9: vst1.8 {{.*}} [r{{[0-9]+}}]! -; A9-NOT: {{ldr|str|adds|add r}} ; A9: add.w r +; A9-NOT: lsl.w ; A9-NOT: {{ldr|str|adds|add r}} +; A9: vst1.8 {{.*}} [r{{[0-9]+}}], r{{[0-9]+}} ; A9: add.w r ; A9-NOT: {{ldr|str|adds|add r}} ; A9-NOT: add.w r Index: test/Transforms/LoopVectorize/phi-cost.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/phi-cost.ll @@ -0,0 +1,86 @@ +; REQUIRES: asserts +; RUN: opt < %s -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -instcombine -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" + +; CHECK-LABEL: phi_two_incoming_values +; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %i = phi i64 [ %i.next, %if.end ], [ 0, %entry ] +; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %tmp5 = phi i32 [ %tmp1, %for.body ], [ %tmp4, %if.then ] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] +; CHECK: [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* {{.*}} +; CHECK: [[TMP5:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[TMP6]], <2 x i32> [[WIDE_LOAD]] +; CHECK: store <2 x i32> [[PREDPHI]], <2 x i32>* {{.*}} +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 +; +define void @phi_two_incoming_values(i32* %a, i32* %b, i64 %n) { +entry: + br label %for.body + +for.body: + %i = phi i64 [ %i.next, %if.end ], [ 0, %entry ] + %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i + %tmp1 = load i32, i32* %tmp0, align 4 + %tmp2 = getelementptr inbounds i32, i32* %b, i64 %i + %tmp3 = icmp sgt i32 %tmp1, 0 + br i1 %tmp3, label %if.then, label %if.end + +if.then: + %tmp4 = add i32 %tmp1, 1 + br label %if.end + +if.end: + %tmp5 = phi i32 [ %tmp1, %for.body ], [ %tmp4, %if.then ] + store i32 %tmp5, i32* %tmp2, align 4 + %i.next = add i64 %i, 1 + %cond = icmp eq i64 %i, %n + br i1 %cond, label %for.end, label %for.body + +for.end: + ret void +} + +; CHECK-LABEL: phi_three_incoming_values +; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %i = phi i64 [ %i.next, %if.end ], [ 0, %entry ] +; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %tmp8 = phi i32 [ 9, %for.body ], [ 3, %if.then ], [ %tmp7, %if.else ] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ] +; CHECK: [[PREDPHI:%.*]] = select <2 x i1> {{.*}}, <2 x i32> , <2 x i32> +; CHECK: [[PREDPHI7:%.*]] = select <2 x i1> {{.*}}, <2 x i32> {{.*}}, <2 x i32> [[PREDPHI]] +; CHECK: store <2 x i32> [[PREDPHI7]], <2 x i32>* {{.*}} +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 +; +define void @phi_three_incoming_values(i32* %a, i32* %b, i64 %n) { +entry: + br label %for.body + +for.body: + %i = phi i64 [ %i.next, %if.end ], [ 0, %entry ] + %tmp0 = getelementptr inbounds i32, i32* %a, i64 %i + %tmp1 = load i32, i32* %tmp0, align 4 + %tmp2 = getelementptr inbounds i32, i32* %b, i64 %i + %tmp3 = load i32, i32* %tmp2, align 4 + %tmp4 = icmp sgt i32 %tmp1, %tmp3 + br i1 %tmp4, label %if.then, label %if.end + +if.then: + %tmp5 = icmp sgt i32 %tmp1, 19 + br i1 %tmp5, label %if.end, label %if.else + +if.else: + %tmp6 = icmp slt i32 %tmp3, 4 + %tmp7 = select i1 %tmp6, i32 4, i32 5 + br label %if.end + +if.end: + %tmp8 = phi i32 [ 9, %for.body ], [ 3, %if.then ], [ %tmp7, %if.else ] + store i32 %tmp8, i32* %tmp0, align 4 + %i.next = add i64 %i, 1 + %cond = icmp eq i64 %i, %n + br i1 %cond, label %for.end, label %for.body + +for.end: + ret void +} Index: test/Transforms/SimplifyCFG/merge-cond-stores.ll =================================================================== --- test/Transforms/SimplifyCFG/merge-cond-stores.ll +++ test/Transforms/SimplifyCFG/merge-cond-stores.ll @@ -36,6 +36,39 @@ ret void } +; This is the same as test_simple, but the branch target order has been swapped +define void @test_simple_commuted(i32* %p, i32 %a, i32 %b) { +; CHECK-LABEL: @test_simple_commuted( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[X1:%.*]] = icmp eq i32 [[A:%.*]], 0 +; CHECK-NEXT: [[X2:%.*]] = icmp eq i32 [[B:%.*]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = or i1 [[X1]], [[X2]] +; CHECK-NEXT: br i1 [[TMP0]], label [[TMP1:%.*]], label [[TMP2:%.*]] +; CHECK: [[DOT:%.*]] = zext i1 [[X2]] to i32 +; CHECK-NEXT: store i32 [[DOT]], i32* [[P:%.*]], align 4 +; CHECK-NEXT: br label [[TMP2]] +; CHECK: ret void +; +entry: + %x1 = icmp eq i32 %a, 0 + br i1 %x1, label %yes1, label %fallthrough + +yes1: + store i32 0, i32* %p + br label %fallthrough + +fallthrough: + %x2 = icmp eq i32 %b, 0 + br i1 %x2, label %yes2, label %end + +yes2: + store i32 1, i32* %p + br label %end + +end: + ret void +} + ; This test should entirely fold away, leaving one large basic block. define void @test_recursive(i32* %p, i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-LABEL: @test_recursive( Index: test/tools/llvm-cvtres/basic.test =================================================================== --- /dev/null +++ test/tools/llvm-cvtres/basic.test @@ -0,0 +1,4 @@ +; RUN: llvm-cvtres /h > %t +; RUN: FileCheck -input-file=%t %s -check-prefix=HELP_TEST + +; HELP_TEST: OVERVIEW: Resource Converter Index: tools/LLVMBuild.txt =================================================================== --- tools/LLVMBuild.txt +++ tools/LLVMBuild.txt @@ -26,6 +26,7 @@ llvm-bcanalyzer llvm-cat llvm-cov + llvm-cvtres llvm-diff llvm-dis llvm-dwarfdump Index: tools/dsymutil/DwarfLinker.cpp =================================================================== --- tools/dsymutil/DwarfLinker.cpp +++ tools/dsymutil/DwarfLinker.cpp @@ -522,7 +522,8 @@ /// \brief Emit the abbreviation table \p Abbrevs to the /// debug_abbrev section. - void emitAbbrevs(const std::vector> &Abbrevs); + void emitAbbrevs(const std::vector> &Abbrevs, + unsigned DwarfVersion); /// \brief Emit the string table described by \p Pool. void emitStrings(const NonRelocatableStringpool &Pool); @@ -690,8 +691,10 @@ /// \brief Emit the \p Abbrevs array as the shared abbreviation table /// for the linked Dwarf file. void DwarfStreamer::emitAbbrevs( - const std::vector> &Abbrevs) { + const std::vector> &Abbrevs, + unsigned DwarfVersion) { MS->SwitchSection(MOFI->getDwarfAbbrevSection()); + MC->setDwarfVersion(DwarfVersion); Asm->emitDwarfAbbrevs(Abbrevs); } @@ -1129,6 +1132,12 @@ /// \brief Called at the end of a debug object link. void endDebugObject(); + /// Remembers the newest DWARF version we've seen in a unit. + void maybeUpdateMaxDwarfVersion(unsigned Version) { + if (MaxDwarfVersion < Version) + MaxDwarfVersion = Version; + } + /// Keeps track of relocations. class RelocationManager { struct ValidReloc { @@ -1430,6 +1439,7 @@ std::unique_ptr Streamer; uint64_t OutputDebugInfoSize; unsigned UnitID; ///< A unique ID that identifies each compile unit. + unsigned MaxDwarfVersion = 0; /// The units of the current debug map object. std::vector> Units; @@ -3435,9 +3445,11 @@ CUDie.dump(outs(), 0); } - if (!registerModuleReference(CUDie, *CU, ModuleMap)) + if (!registerModuleReference(CUDie, *CU, ModuleMap)) { Units.push_back(llvm::make_unique(*CU, UnitID++, !Options.NoODR, "")); + maybeUpdateMaxDwarfVersion(CU->getVersion()); + } } // Now build the DIE parent links that we will use during the next phase. @@ -3471,7 +3483,7 @@ // Emit everything that's global. if (!Options.NoOutput) { - Streamer->emitAbbrevs(Abbreviations); + Streamer->emitAbbrevs(Abbreviations, MaxDwarfVersion); Streamer->emitStrings(StringPool); } Index: tools/llvm-cvtres/CMakeLists.txt =================================================================== --- /dev/null +++ tools/llvm-cvtres/CMakeLists.txt @@ -0,0 +1,13 @@ +set(LLVM_LINK_COMPONENTS + Option + Support + ) + +set(LLVM_TARGET_DEFINITIONS Opts.td) + +tablegen(LLVM Opts.inc -gen-opt-parser-defs) +add_public_tablegen_target(CvtResTableGen) + +add_llvm_tool(llvm-cvtres + llvm-cvtres.cpp + ) Index: tools/llvm-cvtres/LLVMBuild.txt =================================================================== --- tools/llvm-cvtres/LLVMBuild.txt +++ tools/llvm-cvtres/LLVMBuild.txt @@ -1,4 +1,4 @@ -;===- ./tools/LLVMBuild.txt ------------------------------------*- Conf -*--===; +;===- ./tools/llvm-cvtres/LLVMBuild.txt ------------------------*- Conf -*--===; ; ; The LLVM Compiler Infrastructure ; @@ -15,39 +15,8 @@ ; ;===------------------------------------------------------------------------===; -[common] -subdirectories = - bugpoint - dsymutil - llc - lli - llvm-ar - llvm-as - llvm-bcanalyzer - llvm-cat - llvm-cov - llvm-diff - llvm-dis - llvm-dwarfdump - llvm-dwp - llvm-extract - llvm-jitlistener - llvm-link - llvm-lto - llvm-mc - llvm-mcmarkup - llvm-modextract - llvm-nm - llvm-objdump - llvm-pdbdump - llvm-profdata - llvm-rtdyld - llvm-size - llvm-split - opt - verify-uselistorder - [component_0] -type = Group -name = Tools -parent = $ROOT +type = Tool +name = llvm-cvtres +parent = Tools +required_libraries = Option Support Index: tools/llvm-cvtres/Opts.td =================================================================== --- /dev/null +++ tools/llvm-cvtres/Opts.td @@ -0,0 +1,11 @@ +include "llvm/Option/OptParser.td" + +def DEFINE : Joined<["/"], "DEFINE:">, HelpText<"">, MetaVarName<"symbol">; +def FOLDDUPS : Flag<["/"], "FOLDDUPS:">, HelpText<"">; +def MACHINE : Joined<["/"], "MACHINE:">, HelpText<"">, MetaVarName<"{ARM|EBC|IA64|X64|X86}">; +def NOLOGO : Flag<["/"], "NOLOGO">, HelpText<"">; +def OUT : Joined<["/"], "OUT:">, HelpText<"">, MetaVarName<"filename">; +def READONLY : Flag<["/"], "READONLY">, HelpText<"">; +def VERBOSE : Flag<["/"], "VERBOSE">, HelpText<"">; +def HELP : Flag<["/"], "HELP">; +def H : Flag<["/"], "H">, Alias; Index: tools/llvm-cvtres/llvm-cvtres.h =================================================================== --- /dev/null +++ tools/llvm-cvtres/llvm-cvtres.h @@ -0,0 +1,13 @@ +//===- llvm-cvtres.h ------------------------------------------ *- C++ --*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVMCVTRES_LLVMCVTRES_H +#define LLVM_TOOLS_LLVMCVTRES_LLVMCVTRES_H + +#endif Index: tools/llvm-cvtres/llvm-cvtres.cpp =================================================================== --- /dev/null +++ tools/llvm-cvtres/llvm-cvtres.cpp @@ -0,0 +1,86 @@ +//===- llvm-cvtres.cpp - Serialize .res files into .obj ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Serialize .res files into .obj files. This is intended to be a +// platform-independent port of Microsoft's cvtres.exe. +// +//===----------------------------------------------------------------------===// + +#include "llvm-cvtres.h" + +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + +enum ID { + OPT_INVALID = 0, // This is not an option ID. +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR) \ + OPT_##ID, +#include "Opts.inc" +#undef OPTION +}; + +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "Opts.inc" +#undef PREFIX + +static const opt::OptTable::Info InfoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR) \ + { \ + PREFIX, NAME, HELPTEXT, \ + METAVAR, OPT_##ID, opt::Option::KIND##Class, \ + PARAM, FLAGS, OPT_##GROUP, \ + OPT_##ALIAS, ALIASARGS}, +#include "Opts.inc" +#undef OPTION +}; + +class CvtResOptTable : public opt::OptTable { +public: + CvtResOptTable() : OptTable(InfoTable, true) {} +}; + +static ExitOnError ExitOnErr; +} + +int main(int argc_, const char *argv_[]) { + sys::PrintStackTraceOnErrorSignal(argv_[0]); + PrettyStackTraceProgram X(argc_, argv_); + + ExitOnErr.setBanner("llvm-cvtres: "); + + SmallVector argv; + SpecificBumpPtrAllocator ArgAllocator; + ExitOnErr(errorCodeToError(sys::Process::GetArgumentVector( + argv, makeArrayRef(argv_, argc_), ArgAllocator))); + + llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. + + CvtResOptTable T; + unsigned MAI, MAC; + ArrayRef ArgsArr = makeArrayRef(argv_, argc_); + opt::InputArgList InputArgs = T.ParseArgs(ArgsArr, MAI, MAC); + + if (InputArgs.hasArg(OPT_HELP)) + T.PrintHelp(outs(), "cvtres", "Resource Converter", false); + + return 0; +} Index: unittests/ADT/APFloatTest.cpp =================================================================== --- unittests/ADT/APFloatTest.cpp +++ unittests/ADT/APFloatTest.cpp @@ -27,10 +27,11 @@ return F.convertToDouble(); } -static std::string convertToString(double d, unsigned Prec, unsigned Pad) { +static std::string convertToString(double d, unsigned Prec, unsigned Pad, + bool Tr = true) { llvm::SmallVector Buffer; llvm::APFloat F(d); - F.toString(Buffer, Prec, Pad); + F.toString(Buffer, Prec, Pad, Tr); return std::string(Buffer.data(), Buffer.size()); } @@ -949,6 +950,22 @@ ASSERT_EQ("873.18340000000001", convertToString(873.1834, 0, 1)); ASSERT_EQ("8.7318340000000001E+2", convertToString(873.1834, 0, 0)); ASSERT_EQ("1.7976931348623157E+308", convertToString(1.7976931348623157E+308, 0, 0)); + ASSERT_EQ("10", convertToString(10.0, 6, 3, false)); + ASSERT_EQ("1.000000e+01", convertToString(10.0, 6, 0, false)); + ASSERT_EQ("10100", convertToString(1.01E+4, 5, 2, false)); + ASSERT_EQ("1.0100e+04", convertToString(1.01E+4, 4, 2, false)); + ASSERT_EQ("1.01000e+04", convertToString(1.01E+4, 5, 1, false)); + ASSERT_EQ("0.0101", convertToString(1.01E-2, 5, 2, false)); + ASSERT_EQ("0.0101", convertToString(1.01E-2, 4, 2, false)); + ASSERT_EQ("1.01000e-02", convertToString(1.01E-2, 5, 1, false)); + ASSERT_EQ("0.78539816339744828", + convertToString(0.78539816339744830961, 0, 3, false)); + ASSERT_EQ("4.94065645841246540e-324", + convertToString(4.9406564584124654e-324, 0, 3, false)); + ASSERT_EQ("873.18340000000001", convertToString(873.1834, 0, 1, false)); + ASSERT_EQ("8.73183400000000010e+02", convertToString(873.1834, 0, 0, false)); + ASSERT_EQ("1.79769313486231570e+308", + convertToString(1.7976931348623157E+308, 0, 0, false)); } TEST(APFloatTest, toInteger) { Index: unittests/ADT/APIntTest.cpp =================================================================== --- unittests/ADT/APIntTest.cpp +++ unittests/ADT/APIntTest.cpp @@ -2057,4 +2057,33 @@ EXPECT_EQ(0, neg_one.shl(128)); } +TEST(APIntTest, isSubsetOf) { + APInt i32_1(32, 1); + APInt i32_2(32, 2); + APInt i32_3(32, 3); + EXPECT_FALSE(i32_3.isSubsetOf(i32_1)); + EXPECT_TRUE(i32_1.isSubsetOf(i32_3)); + EXPECT_FALSE(i32_2.isSubsetOf(i32_1)); + EXPECT_FALSE(i32_1.isSubsetOf(i32_2)); + EXPECT_TRUE(i32_3.isSubsetOf(i32_3)); + + APInt i128_1(128, 1); + APInt i128_2(128, 2); + APInt i128_3(128, 3); + EXPECT_FALSE(i128_3.isSubsetOf(i128_1)); + EXPECT_TRUE(i128_1.isSubsetOf(i128_3)); + EXPECT_FALSE(i128_2.isSubsetOf(i128_1)); + EXPECT_FALSE(i128_1.isSubsetOf(i128_2)); + EXPECT_TRUE(i128_3.isSubsetOf(i128_3)); + + i128_1 <<= 64; + i128_2 <<= 64; + i128_3 <<= 64; + EXPECT_FALSE(i128_3.isSubsetOf(i128_1)); + EXPECT_TRUE(i128_1.isSubsetOf(i128_3)); + EXPECT_FALSE(i128_2.isSubsetOf(i128_1)); + EXPECT_FALSE(i128_1.isSubsetOf(i128_2)); + EXPECT_TRUE(i128_3.isSubsetOf(i128_3)); +} + } // end anonymous namespace Index: unittests/ADT/BitVectorTest.cpp =================================================================== --- unittests/ADT/BitVectorTest.cpp +++ unittests/ADT/BitVectorTest.cpp @@ -186,7 +186,9 @@ // Test finding in an empty BitVector. TypeParam A; EXPECT_EQ(-1, A.find_first()); + EXPECT_EQ(-1, A.find_last()); EXPECT_EQ(-1, A.find_first_unset()); + EXPECT_EQ(-1, A.find_last_unset()); EXPECT_EQ(-1, A.find_next(0)); EXPECT_EQ(-1, A.find_next_unset(0)); @@ -196,12 +198,14 @@ A.set(13); A.set(75); + EXPECT_EQ(75, A.find_last()); EXPECT_EQ(12, A.find_first()); EXPECT_EQ(13, A.find_next(12)); EXPECT_EQ(75, A.find_next(13)); EXPECT_EQ(-1, A.find_next(75)); EXPECT_EQ(0, A.find_first_unset()); + EXPECT_EQ(99, A.find_last_unset()); EXPECT_EQ(14, A.find_next_unset(11)); EXPECT_EQ(14, A.find_next_unset(12)); EXPECT_EQ(14, A.find_next_unset(13)); @@ -213,12 +217,16 @@ A.set(0, 100); EXPECT_EQ(100U, A.count()); EXPECT_EQ(0, A.find_first()); + EXPECT_EQ(99, A.find_last()); EXPECT_EQ(-1, A.find_first_unset()); + EXPECT_EQ(-1, A.find_last_unset()); A.reset(0, 100); EXPECT_EQ(0U, A.count()); EXPECT_EQ(-1, A.find_first()); + EXPECT_EQ(-1, A.find_last()); EXPECT_EQ(0, A.find_first_unset()); + EXPECT_EQ(99, A.find_last_unset()); } TYPED_TEST(BitVectorTest, CompoundAssignment) { @@ -345,6 +353,128 @@ EXPECT_FALSE(B.anyCommon(A)); } +typedef std::vector> RangeList; + +template +static inline VecType createBitVector(uint32_t Size, + const RangeList &setRanges) { + VecType V; + V.resize(Size); + for (auto &R : setRanges) + V.set(R.first, R.second); + return V; +} + +TYPED_TEST(BitVectorTest, ShiftOpsSingleWord) { + // Test that shift ops work when the desired shift amount is less + // than one word. + + // 1. Case where the number of bits in the BitVector also fit into a single + // word. + TypeParam A = createBitVector(12, {{2, 4}, {8, 10}}); + TypeParam B = A; + + EXPECT_EQ(4U, A.count()); + EXPECT_TRUE(A.test(2)); + EXPECT_TRUE(A.test(3)); + EXPECT_TRUE(A.test(8)); + EXPECT_TRUE(A.test(9)); + + A >>= 1; + EXPECT_EQ(createBitVector(12, {{1, 3}, {7, 9}}), A); + + A <<= 1; + EXPECT_EQ(B, A); + + A >>= 10; + EXPECT_EQ(createBitVector(12, {}), A); + + A = B; + A <<= 10; + EXPECT_EQ(createBitVector(12, {}), A); + + // 2. Case where the number of bits in the BitVector do not fit into a single + // word. + + // 31----------------------------------------------------------------------0 + // XXXXXXXX XXXXXXXX XXXXXXXX 00000111 | 11111110 00000000 00001111 11111111 + A = createBitVector(40, {{0, 12}, {25, 35}}); + EXPECT_EQ(40U, A.size()); + EXPECT_EQ(22U, A.count()); + + // 2a. Make sure that left shifting some 1 bits out of the vector works. + // 31----------------------------------------------------------------------0 + // Before: + // XXXXXXXX XXXXXXXX XXXXXXXX 00000111 | 11111110 00000000 00001111 11111111 + // After: + // XXXXXXXX XXXXXXXX XXXXXXXX 11111100 | 00000000 00011111 11111110 00000000 + A <<= 9; + EXPECT_EQ(createBitVector(40, {{9, 21}, {34, 40}}), A); + + // 2b. Make sure that keeping the number of one bits unchanged works. + // 31----------------------------------------------------------------------0 + // Before: + // XXXXXXXX XXXXXXXX XXXXXXXX 11111100 | 00000000 00011111 11111110 00000000 + // After: + // XXXXXXXX XXXXXXXX XXXXXXXX 00000011 | 11110000 00000000 01111111 11111000 + A >>= 6; + EXPECT_EQ(createBitVector(40, {{3, 15}, {28, 34}}), A); + + // 2c. Make sure that right shifting some 1 bits out of the vector works. + // 31----------------------------------------------------------------------0 + // Before: + // XXXXXXXX XXXXXXXX XXXXXXXX 00000011 | 11110000 00000000 01111111 11111000 + // After: + // XXXXXXXX XXXXXXXX XXXXXXXX 00000000 | 00000000 11111100 00000000 00011111 + A >>= 10; + EXPECT_EQ(createBitVector(40, {{0, 5}, {18, 24}}), A); + + // 3. Big test. + A = createBitVector(300, {{1, 30}, {60, 95}, {200, 275}}); + A <<= 29; + EXPECT_EQ(createBitVector( + 300, {{1 + 29, 30 + 29}, {60 + 29, 95 + 29}, {200 + 29, 300}}), + A); +} + +TYPED_TEST(BitVectorTest, ShiftOpsMultiWord) { + // Test that shift ops work when the desired shift amount is greater than or + // equal to the size of a single word. + auto A = createBitVector(300, {{1, 30}, {60, 95}, {200, 275}}); + + // Make a copy so we can re-use it later. + auto B = A; + + // 1. Shift left by an exact multiple of the word size. This should invoke + // only a memmove and no per-word bit operations. + A <<= 64; + auto Expected = createBitVector( + 300, {{1 + 64, 30 + 64}, {60 + 64, 95 + 64}, {200 + 64, 300}}); + EXPECT_EQ(Expected, A); + + // 2. Shift left by a non multiple of the word size. This should invoke both + // a memmove and per-word bit operations. + A = B; + A <<= 93; + EXPECT_EQ(createBitVector( + 300, {{1 + 93, 30 + 93}, {60 + 93, 95 + 93}, {200 + 93, 300}}), + A); + + // 1. Shift right by an exact multiple of the word size. This should invoke + // only a memmove and no per-word bit operations. + A = B; + A >>= 64; + EXPECT_EQ( + createBitVector(300, {{0, 95 - 64}, {200 - 64, 275 - 64}}), A); + + // 2. Shift left by a non multiple of the word size. This should invoke both + // a memmove and per-word bit operations. + A = B; + A >>= 93; + EXPECT_EQ( + createBitVector(300, {{0, 95 - 93}, {200 - 93, 275 - 93}}), A); +} + TYPED_TEST(BitVectorTest, RangeOps) { TypeParam A; A.resize(256); Index: unittests/CodeGen/CMakeLists.txt =================================================================== --- unittests/CodeGen/CMakeLists.txt +++ unittests/CodeGen/CMakeLists.txt @@ -9,6 +9,7 @@ DIEHashTest.cpp LowLevelTypeTest.cpp MachineInstrBundleIteratorTest.cpp + ScalableVectorMVTsTest.cpp ) add_llvm_unittest(CodeGenTests Index: unittests/CodeGen/ScalableVectorMVTsTest.cpp =================================================================== --- /dev/null +++ unittests/CodeGen/ScalableVectorMVTsTest.cpp @@ -0,0 +1,88 @@ +//===-------- llvm/unittest/CodeGen/ScalableVectorMVTsTest.cpp ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/LLVMContext.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +TEST(ScalableVectorMVTsTest, IntegerMVTs) { + for (auto VecTy : MVT::integer_scalable_vector_valuetypes()) { + ASSERT_TRUE(VecTy.isValid()); + ASSERT_TRUE(VecTy.isInteger()); + ASSERT_TRUE(VecTy.isVector()); + ASSERT_TRUE(VecTy.isScalableVector()); + ASSERT_TRUE(VecTy.getScalarType().isValid()); + + ASSERT_FALSE(VecTy.isFloatingPoint()); + } +} + +TEST(ScalableVectorMVTsTest, FloatMVTs) { + for (auto VecTy : MVT::fp_scalable_vector_valuetypes()) { + ASSERT_TRUE(VecTy.isValid()); + ASSERT_TRUE(VecTy.isFloatingPoint()); + ASSERT_TRUE(VecTy.isVector()); + ASSERT_TRUE(VecTy.isScalableVector()); + ASSERT_TRUE(VecTy.getScalarType().isValid()); + + ASSERT_FALSE(VecTy.isInteger()); + } +} + +TEST(ScalableVectorMVTsTest, HelperFuncs) { + LLVMContext Ctx; + + // Create with scalable flag + EVT Vnx4i32 = EVT::getVectorVT(Ctx, MVT::i32, 4, /*Scalable=*/true); + ASSERT_TRUE(Vnx4i32.isScalableVector()); + + // Create with separate MVT::ElementCount + auto EltCnt = MVT::ElementCount(2, true); + EVT Vnx2i32 = EVT::getVectorVT(Ctx, MVT::i32, EltCnt); + ASSERT_TRUE(Vnx2i32.isScalableVector()); + + // Create with inline MVT::ElementCount + EVT Vnx2i64 = EVT::getVectorVT(Ctx, MVT::i64, {2, true}); + ASSERT_TRUE(Vnx2i64.isScalableVector()); + + // Check that changing scalar types/element count works + EXPECT_EQ(Vnx2i32.widenIntegerVectorElementType(Ctx), Vnx2i64); + EXPECT_EQ(Vnx4i32.getHalfNumVectorElementsVT(Ctx), Vnx2i32); + + // Check that overloaded '*' and '/' operators work + EXPECT_EQ(EVT::getVectorVT(Ctx, MVT::i64, EltCnt * 2), MVT::nxv4i64); + EXPECT_EQ(EVT::getVectorVT(Ctx, MVT::i64, EltCnt / 2), MVT::nxv1i64); + + // Check that float->int conversion works + EVT Vnx2f64 = EVT::getVectorVT(Ctx, MVT::f64, {2, true}); + EXPECT_EQ(Vnx2f64.changeTypeToInteger(), Vnx2i64); + + // Check fields inside MVT::ElementCount + EltCnt = Vnx4i32.getVectorElementCount(); + EXPECT_EQ(EltCnt.Min, 4U); + ASSERT_TRUE(EltCnt.Scalable); + + // Check that fixed-length vector types aren't scalable. + EVT V8i32 = EVT::getVectorVT(Ctx, MVT::i32, 8); + ASSERT_FALSE(V8i32.isScalableVector()); + EVT V4f64 = EVT::getVectorVT(Ctx, MVT::f64, {4, false}); + ASSERT_FALSE(V4f64.isScalableVector()); + + // Check that MVT::ElementCount works for fixed-length types. + EltCnt = V8i32.getVectorElementCount(); + EXPECT_EQ(EltCnt.Min, 8U); + ASSERT_FALSE(EltCnt.Scalable); +} + +} Index: unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp =================================================================== --- unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp +++ unittests/DebugInfo/DWARF/DWARFDebugInfoTest.cpp @@ -170,7 +170,8 @@ CUDie.addAttribute(Attr_DW_FORM_ref8, DW_FORM_ref8, Data8); const auto Attr_DW_FORM_ref_sig8 = static_cast(Attr++); - CUDie.addAttribute(Attr_DW_FORM_ref_sig8, DW_FORM_ref_sig8, Data8_2); + if (Version >= 4) + CUDie.addAttribute(Attr_DW_FORM_ref_sig8, DW_FORM_ref_sig8, Data8_2); const auto Attr_DW_FORM_ref_udata = static_cast(Attr++); CUDie.addAttribute(Attr_DW_FORM_ref_udata, DW_FORM_ref_udata, UData[0]); @@ -185,7 +186,8 @@ CUDie.addAttribute(Attr_DW_FORM_flag_false, DW_FORM_flag, false); const auto Attr_DW_FORM_flag_present = static_cast(Attr++); - CUDie.addAttribute(Attr_DW_FORM_flag_present, DW_FORM_flag_present); + if (Version >= 4) + CUDie.addAttribute(Attr_DW_FORM_flag_present, DW_FORM_flag_present); //---------------------------------------------------------------------- // Test SLEB128 based forms @@ -213,8 +215,9 @@ Dwarf32Values[0]); const auto Attr_DW_FORM_sec_offset = static_cast(Attr++); - CUDie.addAttribute(Attr_DW_FORM_sec_offset, DW_FORM_sec_offset, - Dwarf32Values[1]); + if (Version >= 4) + CUDie.addAttribute(Attr_DW_FORM_sec_offset, DW_FORM_sec_offset, + Dwarf32Values[1]); //---------------------------------------------------------------------- // Add an address at the end to make sure we can decode this value @@ -307,7 +310,8 @@ EXPECT_EQ(Data2, toReference(DieDG.find(Attr_DW_FORM_ref2), 0)); EXPECT_EQ(Data4, toReference(DieDG.find(Attr_DW_FORM_ref4), 0)); EXPECT_EQ(Data8, toReference(DieDG.find(Attr_DW_FORM_ref8), 0)); - EXPECT_EQ(Data8_2, toReference(DieDG.find(Attr_DW_FORM_ref_sig8), 0)); + if (Version >= 4) + EXPECT_EQ(Data8_2, toReference(DieDG.find(Attr_DW_FORM_ref_sig8), 0)); EXPECT_EQ(UData[0], toReference(DieDG.find(Attr_DW_FORM_ref_udata), 0)); //---------------------------------------------------------------------- @@ -315,7 +319,8 @@ //---------------------------------------------------------------------- EXPECT_EQ(1ULL, toUnsigned(DieDG.find(Attr_DW_FORM_flag_true), 0)); EXPECT_EQ(0ULL, toUnsigned(DieDG.find(Attr_DW_FORM_flag_false), 1)); - EXPECT_EQ(1ULL, toUnsigned(DieDG.find(Attr_DW_FORM_flag_present), 0)); + if (Version >= 4) + EXPECT_EQ(1ULL, toUnsigned(DieDG.find(Attr_DW_FORM_flag_present), 0)); //---------------------------------------------------------------------- // Test SLEB128 based forms @@ -334,8 +339,9 @@ //---------------------------------------------------------------------- EXPECT_EQ(Dwarf32Values[0], toReference(DieDG.find(Attr_DW_FORM_GNU_ref_alt), 0)); - EXPECT_EQ(Dwarf32Values[1], - toSectionOffset(DieDG.find(Attr_DW_FORM_sec_offset), 0)); + if (Version >= 4) + EXPECT_EQ(Dwarf32Values[1], + toSectionOffset(DieDG.find(Attr_DW_FORM_sec_offset), 0)); //---------------------------------------------------------------------- // Add an address at the end to make sure we can decode this value Index: unittests/Support/BranchProbabilityTest.cpp =================================================================== --- unittests/Support/BranchProbabilityTest.cpp +++ unittests/Support/BranchProbabilityTest.cpp @@ -115,6 +115,54 @@ EXPECT_FALSE(BigZero >= BigOne); } +TEST(BranchProbabilityTest, ArithmeticOperators) { + BP Z(0, 1); + BP O(1, 1); + BP H(1, 2); + BP Q(1, 4); + BP Q3(3, 4); + + EXPECT_EQ(Z + O, O); + EXPECT_EQ(H + Z, H); + EXPECT_EQ(H + H, O); + EXPECT_EQ(Q + H, Q3); + EXPECT_EQ(Q + Q3, O); + EXPECT_EQ(H + Q3, O); + EXPECT_EQ(Q3 + Q3, O); + + EXPECT_EQ(Z - O, Z); + EXPECT_EQ(O - Z, O); + EXPECT_EQ(O - H, H); + EXPECT_EQ(O - Q, Q3); + EXPECT_EQ(Q3 - H, Q); + EXPECT_EQ(Q - H, Z); + EXPECT_EQ(Q - Q3, Z); + + EXPECT_EQ(Z * O, Z); + EXPECT_EQ(H * H, Q); + EXPECT_EQ(Q * O, Q); + EXPECT_EQ(O * O, O); + EXPECT_EQ(Z * Z, Z); + + EXPECT_EQ(Z * 3, Z); + EXPECT_EQ(Q * 3, Q3); + EXPECT_EQ(H * 3, O); + EXPECT_EQ(Q3 * 2, O); + EXPECT_EQ(O * UINT32_MAX, O); + + EXPECT_EQ(Z / 4, Z); + EXPECT_EQ(O / 4, Q); + EXPECT_EQ(Q3 / 3, Q); + EXPECT_EQ(H / 2, Q); + EXPECT_EQ(O / 2, H); + EXPECT_EQ(H / UINT32_MAX, Z); + + BP Min(1, 1u << 31); + + EXPECT_EQ(O / UINT32_MAX, Z); + EXPECT_EQ(Min * UINT32_MAX, O); +} + TEST(BranchProbabilityTest, getCompl) { EXPECT_EQ(BP(5, 7), BP(2, 7).getCompl()); EXPECT_EQ(BP(2, 7), BP(5, 7).getCompl()); Index: utils/TableGen/AsmMatcherEmitter.cpp =================================================================== --- utils/TableGen/AsmMatcherEmitter.cpp +++ utils/TableGen/AsmMatcherEmitter.cpp @@ -2861,7 +2861,7 @@ emitValidateOperandClass(Info, OS); // Emit the available features compute function. - SubtargetFeatureInfo::emitComputeAvailableFeatures( + SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures( Info.Target.getName(), ClassName, "ComputeAvailableFeatures", Info.SubtargetFeatures, OS); Index: utils/TableGen/CodeEmitterGen.cpp =================================================================== --- utils/TableGen/CodeEmitterGen.cpp +++ utils/TableGen/CodeEmitterGen.cpp @@ -336,7 +336,7 @@ o << "#endif // NDEBUG\n"; // Emit the available features compute function. - SubtargetFeatureInfo::emitComputeAvailableFeatures( + SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures( Target.getName(), "MCCodeEmitter", "computeAvailableFeatures", SubtargetFeatures, o); Index: utils/TableGen/CodeGenTarget.cpp =================================================================== --- utils/TableGen/CodeGenTarget.cpp +++ utils/TableGen/CodeGenTarget.cpp @@ -126,6 +126,45 @@ case MVT::v2f64: return "MVT::v2f64"; case MVT::v4f64: return "MVT::v4f64"; case MVT::v8f64: return "MVT::v8f64"; + case MVT::nxv2i1: return "MVT::nxv2i1"; + case MVT::nxv4i1: return "MVT::nxv4i1"; + case MVT::nxv8i1: return "MVT::nxv8i1"; + case MVT::nxv16i1: return "MVT::nxv16i1"; + case MVT::nxv32i1: return "MVT::nxv32i1"; + case MVT::nxv1i8: return "MVT::nxv1i8"; + case MVT::nxv2i8: return "MVT::nxv2i8"; + case MVT::nxv4i8: return "MVT::nxv4i8"; + case MVT::nxv8i8: return "MVT::nxv8i8"; + case MVT::nxv16i8: return "MVT::nxv16i8"; + case MVT::nxv32i8: return "MVT::nxv32i8"; + case MVT::nxv1i16: return "MVT::nxv1i16"; + case MVT::nxv2i16: return "MVT::nxv2i16"; + case MVT::nxv4i16: return "MVT::nxv4i16"; + case MVT::nxv8i16: return "MVT::nxv8i16"; + case MVT::nxv16i16: return "MVT::nxv16i16"; + case MVT::nxv32i16: return "MVT::nxv32i16"; + case MVT::nxv1i32: return "MVT::nxv1i32"; + case MVT::nxv2i32: return "MVT::nxv2i32"; + case MVT::nxv4i32: return "MVT::nxv4i32"; + case MVT::nxv8i32: return "MVT::nxv8i32"; + case MVT::nxv16i32: return "MVT::nxv16i32"; + case MVT::nxv1i64: return "MVT::nxv1i64"; + case MVT::nxv2i64: return "MVT::nxv2i64"; + case MVT::nxv4i64: return "MVT::nxv4i64"; + case MVT::nxv8i64: return "MVT::nxv8i64"; + case MVT::nxv16i64: return "MVT::nxv16i64"; + case MVT::nxv2f16: return "MVT::nxv2f16"; + case MVT::nxv4f16: return "MVT::nxv4f16"; + case MVT::nxv8f16: return "MVT::nxv8f16"; + case MVT::nxv1f32: return "MVT::nxv1f32"; + case MVT::nxv2f32: return "MVT::nxv2f32"; + case MVT::nxv4f32: return "MVT::nxv4f32"; + case MVT::nxv8f32: return "MVT::nxv8f32"; + case MVT::nxv16f32: return "MVT::nxv16f32"; + case MVT::nxv1f64: return "MVT::nxv1f64"; + case MVT::nxv2f64: return "MVT::nxv2f64"; + case MVT::nxv4f64: return "MVT::nxv4f64"; + case MVT::nxv8f64: return "MVT::nxv8f64"; case MVT::token: return "MVT::token"; case MVT::Metadata: return "MVT::Metadata"; case MVT::iPTR: return "MVT::iPTR"; Index: utils/TableGen/GlobalISelEmitter.cpp =================================================================== --- utils/TableGen/GlobalISelEmitter.cpp +++ utils/TableGen/GlobalISelEmitter.cpp @@ -31,6 +31,7 @@ //===----------------------------------------------------------------------===// #include "CodeGenDAGPatterns.h" +#include "SubtargetFeatureInfo.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -161,20 +162,6 @@ return Explanation; } -static std::string explainRulePredicates(const ArrayRef Predicates) { - std::string Explanation = ""; - StringRef Separator = ""; - for (const auto *P : Predicates) { - Explanation += Separator; - - if (const DefInit *PDef = dyn_cast(P)) { - Explanation += PDef->getDef()->getName(); - } else - Explanation += ""; - } - return Explanation; -} - std::string explainOperator(Record *Operator) { if (Operator->isSubClassOf("SDNode")) return " (" + Operator->getValueAsString("Opcode") + ")"; @@ -238,6 +225,8 @@ /// ID for the next instruction variable defined with defineInsnVar() unsigned NextInsnVarID; + std::vector RequiredFeatures; + public: RuleMatcher() : Matchers(), Actions(), InsnVariableNames(), NextInsnVarID(0) {} @@ -245,6 +234,7 @@ RuleMatcher &operator=(RuleMatcher &&Other) = default; InstructionMatcher &addInstructionMatcher(); + void addRequiredFeature(Record *Feature); template Kind &addAction(Args &&... args); @@ -255,7 +245,9 @@ void emitCxxCapturedInsnList(raw_ostream &OS); void emitCxxCaptureStmts(raw_ostream &OS, StringRef Expr); - void emit(raw_ostream &OS); + void emit(raw_ostream &OS, + std::map + SubtargetFeatures); /// Compare the priority of this object and B. /// @@ -1092,6 +1084,10 @@ return *Matchers.back(); } +void RuleMatcher::addRequiredFeature(Record *Feature) { + RequiredFeatures.push_back(Feature); +} + template Kind &RuleMatcher::addAction(Args &&... args) { Actions.emplace_back(llvm::make_unique(std::forward(args)...)); @@ -1135,7 +1131,9 @@ Matchers.front()->emitCxxCaptureStmts(OS, *this, InsnVarName); } -void RuleMatcher::emit(raw_ostream &OS) { +void RuleMatcher::emit(raw_ostream &OS, + std::map + SubtargetFeatures) { if (Matchers.empty()) llvm_unreachable("Unexpected empty matcher!"); @@ -1149,7 +1147,22 @@ // %elt0(s32), %elt1(s32) = TGT_LOAD_PAIR %ptr // on some targets but we don't need to make use of that yet. assert(Matchers.size() == 1 && "Cannot handle multi-root matchers yet"); - OS << "if ([&]() {\n"; + + OS << "if ("; + OS << "[&]() {\n"; + if (!RequiredFeatures.empty()) { + OS << " PredicateBitset ExpectedFeatures = {"; + StringRef Separator = ""; + for (const auto &Predicate : RequiredFeatures) { + const auto &I = SubtargetFeatures.find(Predicate); + assert(I != SubtargetFeatures.end() && "Didn't import predicate?"); + OS << Separator << I->second.getEnumBitName(); + Separator = ", "; + } + OS << "};\n"; + OS << "if ((AvailableFeatures & ExpectedFeatures) != ExpectedFeatures)\n" + << " return false;\n"; + } emitCxxCaptureStmts(OS, "I"); @@ -1264,10 +1277,13 @@ /// GIComplexPatternEquiv. DenseMap ComplexPatternEquivs; + // Map of predicates to their subtarget features. + std::map SubtargetFeatures; + void gatherNodeEquivs(); const CodeGenInstruction *findNodeEquiv(Record *N) const; - Error importRulePredicates(RuleMatcher &M, ArrayRef Predicates) const; + Error importRulePredicates(RuleMatcher &M, ArrayRef Predicates); Expected createAndImportSelDAGMatcher(InstructionMatcher &InsnMatcher, const TreePatternNode *Src) const; @@ -1287,6 +1303,8 @@ /// Analyze pattern \p P, returning a matcher for it if possible. /// Otherwise, return an Error explaining why we don't support it. Expected runOnPattern(const PatternToMatch &P); + + void declareSubtargetFeature(Record *Predicate); }; void GlobalISelEmitter::gatherNodeEquivs() { @@ -1315,10 +1333,13 @@ Error GlobalISelEmitter::importRulePredicates(RuleMatcher &M, - ArrayRef Predicates) const { - if (!Predicates.empty()) - return failedImport("Pattern has a rule predicate (" + - explainRulePredicates(Predicates) + ")"); + ArrayRef Predicates) { + for (const Init *Predicate : Predicates) { + const DefInit *PredicateDef = static_cast(Predicate); + declareSubtargetFeature(PredicateDef->getDef()); + M.addRequiredFeature(PredicateDef->getDef()); + } + return Error::success(); } @@ -1725,6 +1746,13 @@ for (const auto &Rule : Rules) MaxTemporaries = std::max(MaxTemporaries, Rule.countTemporaryOperands()); + OS << "#ifdef GET_GLOBALISEL_PREDICATE_BITSET\n" + << "const unsigned MAX_SUBTARGET_PREDICATES = " << SubtargetFeatures.size() + << ";\n" + << "using PredicateBitset = " + "llvm::PredicateBitsetImpl;\n" + << "#endif // ifdef GET_GLOBALISEL_PREDICATE_BITSET\n\n"; + OS << "#ifdef GET_GLOBALISEL_TEMPORARIES_DECL\n"; for (unsigned I = 0; I < MaxTemporaries; ++I) OS << " mutable MachineOperand TempOp" << I << ";\n"; @@ -1735,14 +1763,21 @@ OS << ", TempOp" << I << "(MachineOperand::CreatePlaceholder())\n"; OS << "#endif // ifdef GET_GLOBALISEL_TEMPORARIES_INIT\n\n"; - OS << "#ifdef GET_GLOBALISEL_IMPL\n" - << "bool " << Target.getName() + OS << "#ifdef GET_GLOBALISEL_IMPL\n"; + SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration(SubtargetFeatures, + OS); + SubtargetFeatureInfo::emitNameTable(SubtargetFeatures, OS); + SubtargetFeatureInfo::emitComputeAvailableFeatures( + Target.getName(), "InstructionSelector", "computeAvailableFeatures", + SubtargetFeatures, OS); + + OS << "bool " << Target.getName() << "InstructionSelector::selectImpl(MachineInstr &I) const {\n" << " MachineFunction &MF = *I.getParent()->getParent();\n" << " const MachineRegisterInfo &MRI = MF.getRegInfo();\n"; for (auto &Rule : Rules) { - Rule.emit(OS); + Rule.emit(OS, SubtargetFeatures); ++NumPatternEmitted; } @@ -1751,6 +1786,12 @@ << "#endif // ifdef GET_GLOBALISEL_IMPL\n"; } +void GlobalISelEmitter::declareSubtargetFeature(Record *Predicate) { + if (SubtargetFeatures.count(Predicate) == 0) + SubtargetFeatures.emplace( + Predicate, SubtargetFeatureInfo(Predicate, SubtargetFeatures.size())); +} + } // end anonymous namespace //===----------------------------------------------------------------------===// Index: utils/TableGen/SubtargetFeatureInfo.h =================================================================== --- utils/TableGen/SubtargetFeatureInfo.h +++ utils/TableGen/SubtargetFeatureInfo.h @@ -37,16 +37,34 @@ return "Feature_" + TheDef->getName().str(); } + /// \brief The name of the enumerated constant identifying the bitnumber for + /// this feature. + std::string getEnumBitName() const { + return "Feature_" + TheDef->getName().str() + "Bit"; + } + void dump() const; static std::vector> getAll(const RecordKeeper &Records); /// Emit the subtarget feature flag definitions. + /// + /// This version emits the bit value for the feature and is therefore limited + /// to 64 feature bits. static void emitSubtargetFeatureFlagEnumeration( std::map &SubtargetFeatures, raw_ostream &OS); + /// Emit the subtarget feature flag definitions. + /// + /// This version emits the bit index for the feature and can therefore support + /// more than 64 feature bits. + static void emitSubtargetFeatureBitEnumeration( + std::map + &SubtargetFeatures, + raw_ostream &OS); + static void emitNameTable(std::map &SubtargetFeatures, raw_ostream &OS); @@ -54,6 +72,9 @@ /// Emit the function to compute the list of available features given a /// subtarget. /// + /// This version is used for subtarget features defined using Predicate<> + /// and supports more than 64 feature bits. + /// /// \param TargetName The name of the target as used in class prefixes (e.g. /// Subtarget) /// \param ClassName The name of the class (without the prefix) @@ -66,6 +87,25 @@ std::map &SubtargetFeatures, raw_ostream &OS); + + /// Emit the function to compute the list of available features given a + /// subtarget. + /// + /// This version is used for subtarget features defined using + /// AssemblerPredicate<> and supports up to 64 feature bits. + /// + /// \param TargetName The name of the target as used in class prefixes (e.g. + /// Subtarget) + /// \param ClassName The name of the class (without the prefix) + /// that will contain the generated functions. + /// \param FuncName The name of the function to emit. + /// \param SubtargetFeatures A map of TableGen records to the + /// SubtargetFeatureInfo equivalent. + static void emitComputeAssemblerAvailableFeatures( + StringRef TargetName, StringRef ClassName, StringRef FuncName, + std::map + &SubtargetFeatures, + raw_ostream &OS); }; } // end namespace llvm Index: utils/TableGen/SubtargetFeatureInfo.cpp =================================================================== --- utils/TableGen/SubtargetFeatureInfo.cpp +++ utils/TableGen/SubtargetFeatureInfo.cpp @@ -59,6 +59,20 @@ OS << "};\n\n"; } +void SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration( + std::map &SubtargetFeatures, + raw_ostream &OS) { + OS << "// Bits for subtarget features that participate in " + << "instruction matching.\n"; + OS << "enum SubtargetFeatureBits : " + << getMinimalTypeForRange(SubtargetFeatures.size()) << " {\n"; + for (const auto &SF : SubtargetFeatures) { + const SubtargetFeatureInfo &SFI = SF.second; + OS << " " << SFI.getEnumBitName() << " = " << SFI.Index << ",\n"; + } + OS << "};\n\n"; +} + void SubtargetFeatureInfo::emitNameTable( std::map &SubtargetFeatures, raw_ostream &OS) { @@ -90,6 +104,24 @@ StringRef TargetName, StringRef ClassName, StringRef FuncName, std::map &SubtargetFeatures, raw_ostream &OS) { + OS << "PredicateBitset " << TargetName << ClassName << "::\n" + << FuncName << "(const MachineFunction *MF, const " << TargetName + << "Subtarget *Subtarget) const {\n"; + OS << " PredicateBitset Features;\n"; + for (const auto &SF : SubtargetFeatures) { + const SubtargetFeatureInfo &SFI = SF.second; + + OS << " if (" << SFI.TheDef->getValueAsString("CondString") << ")\n"; + OS << " Features[" << SFI.getEnumBitName() << "] = 1;\n"; + } + OS << " return Features;\n"; + OS << "}\n\n"; +} + +void SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures( + StringRef TargetName, StringRef ClassName, StringRef FuncName, + std::map &SubtargetFeatures, + raw_ostream &OS) { OS << "uint64_t " << TargetName << ClassName << "::\n" << FuncName << "(const FeatureBitset& FB) const {\n"; OS << " uint64_t Features = 0;\n"; Index: utils/TableGen/Types.cpp =================================================================== --- utils/TableGen/Types.cpp +++ utils/TableGen/Types.cpp @@ -40,5 +40,6 @@ uint64_t MaxIndex = Size; if (MaxIndex > 0) MaxIndex--; + assert(MaxIndex <= 64 && "Too many bits"); return getMinimalTypeForRange(1ULL << MaxIndex); }