Index: lib/Target/X86/CMakeLists.txt =================================================================== --- lib/Target/X86/CMakeLists.txt +++ lib/Target/X86/CMakeLists.txt @@ -22,6 +22,7 @@ X86InstrInfo.cpp X86MCInstLower.cpp X86MachineFunctionInfo.cpp + X86MaterializeImmediates.cpp X86PadShortFunction.cpp X86RegisterInfo.cpp X86SelectionDAGInfo.cpp Index: lib/Target/X86/X86.h =================================================================== --- lib/Target/X86/X86.h +++ lib/Target/X86/X86.h @@ -67,6 +67,10 @@ /// to eliminate execution delays in some Atom processors. FunctionPass *createX86FixupLEAs(); +/// \brief Creates a pass that moves constants used as immediates into +/// registers, if this reduces code size. +FunctionPass *createX86MaterializeImmediates(); + } // End llvm namespace #endif Index: lib/Target/X86/X86MaterializeImmediates.cpp =================================================================== --- /dev/null +++ lib/Target/X86/X86MaterializeImmediates.cpp @@ -0,0 +1,1052 @@ +//===-------- X86MaterializeImmediate.cpp - move immediate to register ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the pass which will move immediate operand into a resister +// if it is used in several adjacent instructions. This transformation tries to +// reduce code size by using shorter instructions. +// +// +// Decision, whether the instruction with immediate value should be replaced by +// its register form, is made by analysis of the immediate usage history. The +// pass scans instructions in basic blocks, when it find an instruction that +// uses immediate value and has equivalent form with register, it records info +// about it into the history. Sequential number of the instruction in basic +// block plays a role of time. The history is organized as a matrix, one +// dimension is "time", the other corresponds to the value of immediate +// operand. The history is limited in both dimensions, if there is no room for +// new immediate use, the oldest one is dropped. +// +// Each immediate value in the history is thus represented by series of its +// uses. Based on this set the profit of moving the value to register is +// calculated. If the size gain is enough, the series of the instructions may be +// replaced by load instruction and series of instructions using the register. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "x86-imm2reg" +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +using namespace llvm; + +// Maximal number of uses kept for an immediate value in history. Actual number +// may be set smaller by supplying proper command line option. +const unsigned MaxHistoryDepth = 8; + +// Maximal number of immediate values that history can track. Actual number +// may be set smaller by supplying proper command line option. +const unsigned MaxHistoryWidth = 4; + +// Default maximal number of instruction allowed after the last use of an +// immediate value before it is dropped from history. +const unsigned DefMaxSeparatingInstrs = MaxHistoryWidth; + +// Default maximal number of registers that may be used for caching immediate +// values. +const unsigned DefMaxRegisters = 2; + +// Default minimal gain in bytes, necessary for moving immediate value to a +// register. +const unsigned DefMinProfit = 1; + +// Max number of immediate values tracked simultaneously. +static cl::opt MaxImmediates( + "x86-i2r-histwidth", + cl::desc("Max number of immediate values tracked in history."), + cl::init(MaxHistoryWidth), cl::Hidden); + +// Max number of usages tracked for particular immediate value which is not +// moved it into a register yet. +static cl::opt HistoryDepth( + "x86-i2r-histdepth", + cl::desc("Max number of uses stored for a tracked immediate value."), + cl::init(MaxHistoryDepth), cl::Hidden); + +// Maximal number of instructions that may separate consecutive uses of the +// same immediate value. If last instructions that uses particular immediate +// value is followed by more instructions that do not, this value is removed +// from tracking, register is freed. +static cl::opt MaxSeparatingInstrs( + "x86-i2r-maxsep", + cl::desc("Max number of instructions separating two immediate uses."), + cl::init(DefMaxSeparatingInstrs), cl::Hidden); + +static cl::opt + MaxRegisters("x86-i2r-regs", + cl::desc("Max number of registers used for materialization."), + cl::init(DefMaxRegisters), cl::Hidden); + +static cl::opt MinProfit( + "x86-i2r-profit", + cl::desc("Minimal number of bytes that materialization must save."), + cl::init(DefMinProfit), cl::Hidden); + +STATISTIC(NumMatImmediates, "Number of materialized constants"); +STATISTIC(NumMatInstructions, "Number of instructions with materialized imms"); +STATISTIC(NumMatBytes, "Number of bytes saved by materialization"); +STATISTIC(NumMatDropped, "Number of value uses dropped from history"); + +namespace { + +typedef MachineBasicBlock::iterator InstrIterator; + +// Enumerates possible size of immediate operand. +enum ImmediateSize { + // Must be 1 << DATA_N == size in bytes + DATA_8, + DATA_16, + DATA_32, + DATA_64, + DATA_Total +}; + +static unsigned SizeInBytes(ImmediateSize Sz) { + assert(Sz < DATA_Total); + return 1U << Sz; +} + +// Information about instruction that uses immediate value. +// +// This information depends on instruction opcode only, this type is used for +// table elements that reside in read-only memory. +struct InstrInfo { + int Opcode; + int NewOpCode; // Opcode if imm is replaced by reg + ImmediateSize Size; // of immediate data + int OperandNo; // Argument number of immediate operand + int Profit; // Gain in bytes if imm is changed to reg + bool IsLoad; // is this an instruction like 'mov imm, reg'? +}; + +// Interface to database of instructions that have immediate operand and can +// be transformed to register form. +class OpCodeTable { + static InstrInfo Table[]; + static unsigned TableSize; + static bool Sorted; + + static bool OpCodeCmp(const InstrInfo &A1, const InstrInfo &A2) { + return A1.Opcode < A2.Opcode; + } + static bool OpCodeFind(const InstrInfo &A1, int OpCode) { + return A1.Opcode < OpCode; + } + +public: + static InstrInfo *begin() { return Table; } + static InstrInfo *end() { return Table + TableSize; } + + static void Sort() { + if (!Sorted) { + std::sort(begin(), end(), OpCodeCmp); + Sorted = true; + } + } + + static InstrInfo *Find(int OpCode) { + InstrInfo *P = std::lower_bound(begin(), end(), OpCode, OpCodeFind); + if (P == end()) + return nullptr; + if (P->Opcode != OpCode) + return nullptr; + return P; + } +}; + +// Database of instructions with immediate operand. +InstrInfo OpCodeTable::Table[] = { + {X86::MOV16ri, X86::MOV16rr, DATA_16, 1, 1, 1}, // 66 B8 iw -> 66 89 /r + {X86::MOV32ri, X86::MOV32rr, DATA_32, 1, 3, 1}, // B8 id -> 89 /r + {X86::MOV64ri, X86::MOV64rr, DATA_64, 1, 7, 1}, // RX B8 io -> RX 89 /r + {X86::MOV64ri32, X86::MOV64rr, DATA_64, 1, 4, 1}, // RX C7 /0 id -> RX 89 /r + {X86::MOV8mi, X86::MOV8mr, DATA_8, 5, 1}, // C6 /0 ib -> 88 /r + {X86::MOV16mi, X86::MOV16mr, DATA_16, 5, 2}, // 66 C7 /0 iw -> 66 89 /r + {X86::MOV32mi, X86::MOV32mr, DATA_32, 5, 4}, // C7 /0 id -> 89 /r + {X86::MOV64mi32, X86::MOV64mr, DATA_64, 5, 4}, // RX C7 /0 id -> RX 89 /r + {X86::ADD8mi, X86::ADD8mr, DATA_8, 5, 1}, // 80 /0 ib -> 00 /r + {X86::ADD16mi, X86::ADD16mr, DATA_16, 5, 2}, // 66 81 /0 iw -> 66 01 /r + {X86::ADD32mi, X86::ADD32mr, DATA_32, 5, 4}, // 81 /0 id -> 01 /r + {X86::ADD64mi32, X86::ADD64mr, DATA_64, 5, 4}, // RX 81 /0 id -> RX 01 /r + {X86::ADD16mi8, X86::ADD16mr, DATA_16, 5, 1}, // 66 83 /0 ib -> 66 01 /r + {X86::ADD32mi8, X86::ADD32mr, DATA_32, 5, 1}, // 83 /0 ib -> 01 /r + {X86::ADD64mi8, X86::ADD64mr, DATA_64, 5, 1}, // RX 83 /0 ib -> RX 01 /r + {X86::ADC8mi, X86::ADC8mr, DATA_8, 5, 1}, + {X86::ADC16mi, X86::ADC16mr, DATA_16, 5, 2}, + {X86::ADC32mi, X86::ADC32mr, DATA_32, 5, 4}, + {X86::ADC64mi32, X86::ADC64mr, DATA_64, 5, 4}, + {X86::ADC16mi8, X86::ADC16rm, DATA_16, 5, 1}, + {X86::ADC32mi8, X86::ADC32mr, DATA_32, 5, 1}, + {X86::ADC64mi8, X86::ADC64mr, DATA_64, 5, 1}, + {X86::SUB8mi, X86::SUB8mr, DATA_8, 5, 1}, + {X86::SUB16mi, X86::SUB16mr, DATA_16, 5, 2}, + {X86::SUB32mi, X86::SUB32mr, DATA_32, 5, 4}, + {X86::SUB64mi32, X86::SUB64mr, DATA_64, 5, 4}, + {X86::SUB16mi8, X86::SUB16mr, DATA_16, 5, 1}, + {X86::SUB32mi8, X86::SUB32mr, DATA_32, 5, 1}, + {X86::SUB64mi8, X86::SUB64mr, DATA_64, 5, 1}, + {X86::SBB8mi, X86::SBB8mr, DATA_8, 5, 1}, + {X86::SBB16mi, X86::SBB16mr, DATA_16, 5, 2}, + {X86::SBB32mi, X86::SBB32mr, DATA_32, 5, 4}, + {X86::SBB64mi32, X86::SBB64mr, DATA_64, 5, 4}, + {X86::SBB16mi8, X86::SBB16mr, DATA_16, 5, 1}, + {X86::SBB32mi8, X86::SBB32mr, DATA_32, 5, 1}, + {X86::SBB64mi8, X86::SBB64mr, DATA_64, 5, 1}, + {X86::AND8mi, X86::AND8mr, DATA_8, 5, 1}, + {X86::AND16mi, X86::AND16mr, DATA_16, 5, 2}, + {X86::AND32mi, X86::AND32mr, DATA_32, 5, 4}, + {X86::AND64mi32, X86::AND64mr, DATA_64, 5, 4}, + {X86::AND16mi8, X86::AND16mr, DATA_16, 5, 1}, + {X86::AND32mi8, X86::AND32mr, DATA_32, 5, 1}, + {X86::AND64mi8, X86::AND64mr, DATA_64, 5, 1}, + {X86::OR8mi, X86::OR8mr, DATA_8, 5, 1}, + {X86::OR16mi, X86::OR16mr, DATA_16, 5, 2}, + {X86::OR32mi, X86::OR32mr, DATA_32, 5, 4}, + {X86::OR64mi32, X86::OR64mr, DATA_64, 5, 4}, + {X86::OR16mi8, X86::OR16mr, DATA_16, 5, 1}, + {X86::OR32mi8, X86::OR32mr, DATA_32, 5, 1}, + {X86::OR64mi8, X86::OR64mr, DATA_64, 5, 1}, + {X86::XOR8mi, X86::XOR8mr, DATA_8, 5, 1}, + {X86::XOR16mi, X86::XOR16mr, DATA_16, 5, 2}, + {X86::XOR32mi, X86::XOR32mr, DATA_32, 5, 4}, + {X86::XOR64mi32, X86::XOR64mr, DATA_64, 5, 4}, + {X86::XOR16mi8, X86::XOR16mr, DATA_16, 5, 1}, + {X86::XOR32mi8, X86::XOR32mr, DATA_32, 5, 1}, + {X86::XOR64mi8, X86::XOR64mr, DATA_64, 5, 1}, + {X86::CMP8mi, X86::CMP8mr, DATA_8, 5, 1}, + {X86::CMP16mi, X86::CMP16mr, DATA_16, 5, 2}, + {X86::CMP32mi, X86::CMP32mr, DATA_32, 5, 4}, + {X86::CMP64mi32, X86::CMP64mr, DATA_64, 5, 4}, + {X86::CMP16mi8, X86::CMP16mr, DATA_16, 5, 1}, + {X86::CMP32mi8, X86::CMP32mr, DATA_32, 5, 1}, + {X86::CMP64mi8, X86::CMP64mr, DATA_64, 5, 1}}; +unsigned OpCodeTable::TableSize = array_lengthof(OpCodeTable::Table); +bool OpCodeTable::Sorted = false; + +static int GetSizeOfMovImmToRegInstr(int OpCode) { + if (OpCode == X86::MOV16ri) + return 4; // 66 B8 iw + if (OpCode == X86::MOV32ri) + return 5; // B8 id + if (OpCode == X86::MOV64ri) + return 10; // RX B8 io + if (OpCode == X86::MOV64ri32) + return 7; // RX C7 /0 id + llvm_unreachable("Not mov imm->reg instruction"); +} + +static unsigned GetSubreg(ImmediateSize Entire, ImmediateSize Part) { + if (Entire == Part) + return X86::NoSubRegister; + switch (Part) { + case DATA_8: + return X86::sub_8bit; + case DATA_16: + return X86::sub_16bit; + case DATA_32: + return X86::sub_32bit; + default: + llvm_unreachable("Invalid size"); + } +} + +static unsigned GetSubregByBytes(unsigned Entire, unsigned Part) { + if (Entire == Part) + return X86::NoSubRegister; + switch (Part) { + case 1: + return X86::sub_8bit; + case 2: + return X86::sub_16bit; + case 4: + return X86::sub_32bit; + default: + llvm_unreachable("Invalid size"); + } +} + +// Possible results of the test if 64-bit immediate value can be represented +// as extended 32-bit value. +enum ExtensionKind { + NoExtension, // cannot be represented as extension of 32 bit + ZeroExtension, // 64-bit value = ZExt(32-bit value) + SignExtension // 64-bit value = SExt(32-bit value) +}; + +// Pair of immediate value and instruction that use it. +// +// This type is used for temporary values which are not placed into +// history yet. +class ImmediateValueUse { + int64_t Value; + const InstrInfo *Info; + +public: + void Init(int64_t V, const InstrInfo *II) { + assert(II); + Value = V; + Info = II; + } + + const InstrInfo &GetInstrInfo() const { return *Info; } + int64_t GetValue() const { return Value; } + ImmediateSize GetSize() const { return Info->Size; } + int GetOperand() const { return Info->OperandNo; } + + uint32_t Get32bits() const { return (uint32_t)Value; } + uint16_t Get16bits() const { return (uint16_t)Value; } + uint8_t Get8bits() const { return (uint8_t)Value; } +}; + +// Represents use of an immediate value in history. +// +// Immediate value in history is kept separately, so the use contains only +// info about using instruction. +class RecordedUse { + InstrIterator Instruction; // that uses the value + unsigned SerialNumber; // of the instruction + const InstrInfo *IInfo; // Associated info about imm usage + +public: + void Init(InstrIterator I, unsigned SN, const InstrInfo &II) { + Instruction = I; + SerialNumber = SN; + IInfo = &II; + } + + InstrIterator GetInstr() const { return Instruction; } + unsigned GetSN() const { return SerialNumber; } + int GetProfit() const { return IInfo->Profit; } + ImmediateSize GetSize() const { return IInfo->Size; } + int GetOperand() const { return IInfo->OperandNo; } + int GetNewOpCode() const { return IInfo->NewOpCode; } + bool IsLoad() const { return IInfo->IsLoad; } +}; + +// Represents an immediate value in history. +struct RecordedValue { + + // Iterating through value is iterating through its uses. + typedef RecordedUse *Iterator; + Iterator begin() { return Uses; } + Iterator end() { return Uses + TotalUses; } + + // Enumerates possible states of a value slot in history. + enum ValueState { + Free, // Does not track any immediate value + Used, // Value is not profitable to materialize yet + Waiting, // Is profitable, but no registers are available + Scheduled, // Selected for materialization + InReg, // Materialization in progress + }; + + ValueState State; + int64_t Value; + unsigned TotalUses; // Number of recorded uses + RecordedUse Uses[MaxHistoryDepth]; // Recorded uses of this value + unsigned SizeCnt[DATA_Total]; // Counts uses of different size + InstrIterator FirstInstruction; // that uses the immediate value + bool HasLoad; + Iterator LoadInstruction; // Instr like MOV32ri, if exists + int Profit; // Gain in size (bytes) + unsigned ReplacedCnt; // Counts transformations imm->reg + unsigned Register; // If the value is cached in reg + ImmediateSize RegSize; // Size of allocated register + bool UsesREX; // Register requires prefix to access + + // State query + bool IsFree() const { return State == Free; } + bool IsUsed() const { return State == Used; } + bool IsWaiting() const { return State == Waiting; } + bool IsInReg() const { return State == InReg; } + bool IsScheduled() const { return State == Scheduled; } + bool IsFull() const { return TotalUses == HistoryDepth; } + + // State change + void MarkWaiting() { + assert(State == Used || State == Waiting); + State = Waiting; + } + void MarkSchedule() { + assert(State == Used || State == Waiting); + State = Scheduled; + Register = 0; + } + void MarkInReg(int Reg, bool UseREX) { + assert(State == Scheduled); + assert(Reg != 0); + State = InReg; + Register = Reg; + UsesREX = UseREX; + } + void MarkFree() { State = Free; } + + int64_t GetValue() const { return Value; } + ImmediateSize GetSize() const { return RegSize; } + + uint32_t Get32bits() const { + assert(State != Free); + return (uint32_t)Value; + } + uint16_t Get16bits() const { + assert(State != Free); + return (uint16_t)Value; + } + uint8_t Get8bits() const { + assert(State != Free); + return (uint8_t)Value; + } + + ExtensionKind ExtensionOf32() const { + if (SizeCnt[DATA_64] == 0) + return NoExtension; + if ((int64_t)(Get32bits()) == Value) + return ZeroExtension; + if ((int64_t)(int32_t)Get32bits() == Value) + return SignExtension; + return NoExtension; + } + + // Returns size in bytes of the instruction that loads immediate value into + // a register. On 64 bit target the size is calculated for loading into a + // register, that do not require REX prefix. If the instruction anyway + // requires REX prefix, the argument WithREX is set to 'true'. + int GetLoadInstructionSize(bool &WithREX) const { + WithREX = false; + if (SizeCnt[DATA_64]) { + switch (ExtensionOf32()) { + case ZeroExtension: + return 5; // B8 id + case SignExtension: + WithREX = true; + return 7; // REX C7 /0 id + default: + WithREX = true; + return 10; // REX B8 io + } + } + if (SizeCnt[DATA_32]) + return 5; // B8 id + if (SizeCnt[DATA_16]) + return 4; // 66 B8 iw + return 2; // B0 ib + } + + // Returns opcode of instruction that loads immediate value into register. + int GetLoadInstructionOpcode() const { + if (SizeCnt[DATA_64]) { + switch (ExtensionOf32()) { + case ZeroExtension: + return X86::MOV32ri64; + case SignExtension: + return X86::MOV64ri32; + default: + return X86::MOV64ri; + } + } + if (SizeCnt[DATA_32]) + return X86::MOV32ri; + if (SizeCnt[DATA_16]) + return X86::MOV16ri; + return X86::MOV8ri; + } + + // Returns gain in size if this value is moved to a register. + // If materialization requires 32 bit or shorter register, the profit is + // calculated for resisters that don't require REX prefix to access. + int GetProfit() { + bool WithREX; + return Profit - GetLoadInstructionSize(WithREX); + } + + // Returns number of instructions the value history spans over. + unsigned Length() { + assert(State != Free); + return LastUse().GetSN() - FirstUse().GetSN(); + } + + RecordedUse &LastUse() { + assert(State != Free); + return Uses[TotalUses - 1]; + } + + RecordedUse &FirstUse() { + assert(State != Free); + return Uses[0]; + } + + void Init(int64_t V) { + assert(State == Free); + State = Used; + TotalUses = 0; + for (unsigned I = 0; I < DATA_Total; ++I) + SizeCnt[I] = 0; + Value = V; + RegSize = DATA_8; + Register = 0; + Profit = 0; + ReplacedCnt = 0; + HasLoad = false; + } + + // Creates new use of the value in history. + void NewUse(InstrIterator I, unsigned SN, const InstrInfo &IInfo) { + assert(!IsFree()); + assert(TotalUses < HistoryDepth); + Iterator NewUseItem = end(); + ++TotalUses; + NewUseItem->Init(I, SN, IInfo); + Profit += IInfo.Profit; + ++SizeCnt[IInfo.Size]; + if (IInfo.Size > RegSize) + RegSize = IInfo.Size; + + if (IInfo.IsLoad) { + // If the instruction loads immediate into register, it may be moved + // ahead in basic block. No additional load instruction is needed in + // this case. + if (!HasLoad || LoadInstruction->GetSize() <= IInfo.Size) { + HasLoad = true; + LoadInstruction = &LastUse(); + } + } + } + + // Removes oldest uses in the value history. + void DropOldestUses(unsigned NumOfDropped) { + assert(IsUsed() || IsWaiting()); + assert(NumOfDropped <= TotalUses); + NumMatDropped += NumOfDropped; + if (NumOfDropped == TotalUses) { + State = Free; + return; + } + int ProfitOfDropped = 0; + for (unsigned I = 0; I < NumOfDropped; ++I) + ProfitOfDropped += Uses[I].GetProfit(); + Profit -= ProfitOfDropped; + memmove(Uses, Uses + NumOfDropped, sizeof(RecordedUse) * (TotalUses - 1)); + TotalUses -= NumOfDropped; + } +}; + +// Keeps track of immediate value usages. +class ImmHistory { +public: + // Iterating through history is iterating through values kept in it. + typedef RecordedValue *Iterator; + Iterator begin() { return Immediates; } + Iterator end() { return Immediates + MaxImmediates; } + +private: + RecordedValue Immediates[MaxHistoryWidth]; + unsigned TotalTrackedValues; + unsigned CurrentSN; // Counts 'time' in BB + + // Finds a slot for the new value and initializes it. + // Returns iterator pointing to the slot. + Iterator CreateValue(unsigned SN, ImmediateValueUse &Info) { + assert(TotalTrackedValues <= MaxImmediates); + Iterator Result = begin(); + for (Iterator E = end(); Result != E; ++Result) { + if (Result->IsFree()) + break; + if ((Result->IsUsed() || Result->IsWaiting()) && IsExpired(*Result)) { + Drop(*Result); + break; + } + } + assert(Result != end() && "Cannot find free value slot"); + Result->Init(Info.GetValue()); + ++TotalTrackedValues; + return Result; + } + +public: + ImmHistory() { Clear(); } + + void Clear() { + for (Iterator I = begin(), E = end(); I != E; ++I) + I->MarkFree(); + TotalTrackedValues = 0; + CurrentSN = 0; + } + + void Advance() { ++CurrentSN; } + + // Returns 'true' if the value is expired. + bool IsExpired(RecordedValue &Value) { + return (CurrentSN - Value.LastUse().GetSN()) >= MaxSeparatingInstrs; + } + + // Searches history for the specified value. + // + // Returns pointer to the value descriptor if the value referenced by the + // given instruction is found in the history, otherwise returns null + // pointer. If exact match (value and size) is not found, but history + // contains a value that could be represented as a subreg of the value + // sought, returns pointer to that value. + RecordedValue *FindValue(ImmediateValueUse &Info) { + for (RecordedValue *I = begin(); I != end(); ++I) { + if (I->IsFree()) + continue; + if (I->IsUsed() && IsExpired(*I)) { + Drop(*I); + continue; + } + + // Look for the value that is of the same size or can be truncated to + // the value looked for. + if (Info.GetSize() <= I->GetSize()) { + switch (Info.GetSize()) { + case DATA_64: + if (I->GetValue() == Info.GetValue()) + return I; + break; + case DATA_32: + if (I->Get32bits() == Info.Get32bits()) + return I; + break; + case DATA_16: + if (I->Get16bits() == Info.Get16bits()) + return I; + break; + case DATA_8: + if (I->Get8bits() == Info.Get8bits()) + return I; + break; + default: + llvm_unreachable("Invalid data size"); + } + } + + // Is there a short value that can be extended to the specified? + if (I->IsInReg()) { + // Values that are already in a register cannot be extended. + continue; + } else if (Info.GetSize() > I->GetSize()) { + Iterator Found = end(); + switch (I->GetSize()) { + case DATA_32: + if (I->Get32bits() == Info.Get32bits()) + Found = I; + break; + case DATA_16: + if (I->Get16bits() == Info.Get16bits()) + Found = I; + break; + case DATA_8: + if (I->Get8bits() == Info.Get8bits()) + Found = I; + break; + default: + llvm_unreachable("Invalid data size"); + } + if (Found != end()) + return Found; + } + } + return nullptr; + } + + // Registers the new use of the immediate value. + RecordedValue &AddUse(RecordedValue *ValPtr, InstrIterator I, + ImmediateValueUse &Info) { + if (ValPtr == nullptr) { + // New immediate value + ValPtr = CreateValue(CurrentSN, Info); + ValPtr->NewUse(I, CurrentSN, Info.GetInstrInfo()); + } else { + // The new use is added to existing value + if (ValPtr->GetSize() < Info.GetSize()) { + // The value in history is a subreg of the added one, need to + // widen it. + ValPtr->Value = Info.GetValue(); + } + if (ValPtr->IsInReg()) { + // If the value is already in register, keep only the last use. + assert(ValPtr->TotalUses == 1); + ValPtr->FirstUse().Init(I, CurrentSN, Info.GetInstrInfo()); + } else { + // Not in register yet + assert(!ValPtr->IsScheduled() || !ValPtr->IsFull()); + if (ValPtr->IsFull()) + ValPtr->DropOldestUses(1); + assert(!ValPtr->IsFull()); + ValPtr->NewUse(I, CurrentSN, Info.GetInstrInfo()); + } + } + return *ValPtr; + } + + // Removes the specified value from history. + void Drop(RecordedValue &ImmV) { + assert(TotalTrackedValues > 0); + assert(!ImmV.IsInReg()); + ImmV.MarkFree(); + --TotalTrackedValues; + } +}; + +// The pass transforms sequences of instructions that use the same immediate +// value into equivalent sequence that uses that value loaded into a register. +struct MaterializeImmediates : public MachineFunctionPass { + static char ID; + ImmHistory History; + unsigned TotalUsedRegs; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRegInfo; + bool Mode64Bit; + const X86RegisterInfo *RegInfo; + + MaterializeImmediates() : MachineFunctionPass(ID) { OpCodeTable::Sort(); } + + virtual bool runOnMachineFunction(MachineFunction &MF); + + virtual const char *getPassName() const { + return "X86 constant materializer"; + } + + bool CanBeTransformed(MachineInstr &Instr, ImmediateValueUse &Info); + bool UseCanBeAddedToValue(RecordedValue &ImmV, ImmediateValueUse &Info); + bool CanMaterialize(RecordedValue &ImmV); + const TargetRegisterClass * + GetCommonClass(const TargetRegisterClass *ImmRegClass, + const TargetRegisterClass *LoadRegClass); + void ProcessExpiredValue(bool Finish); + void ScheduleForMaterialization(RecordedValue &ImmV); + void StartMaterialization(RecordedValue &ImmV); + void MaterializeValues(RecordedValue &ImmV); + unsigned FinishMaterialization(RecordedValue &ImmV); +}; + +char MaterializeImmediates::ID = 0; +} + +bool MaterializeImmediates::runOnMachineFunction(MachineFunction &MF) { + MRegInfo = &MF.getRegInfo(); + const TargetSubtargetInfo &STI = MF.getSubtarget(); + TII = STI.getInstrInfo(); + Mode64Bit = (STI.getFeatureBits() & X86::Mode64Bit) != 0; + RegInfo = static_cast(STI.getRegisterInfo()); + unsigned StartNumMaterialized = NumMatImmediates; + + for (MachineBasicBlock &MBB : MF) { + History.Clear(); + TotalUsedRegs = 0; + for (auto MII = MBB.begin(), MIE = MBB.end(); MII != MIE; ++MII) { + History.Advance(); + // Time is advanced and some values kept in the history may need removal. + // Do this check if we know that some value need materialization, thus we + // can speed up the pass. Values that are expired and do not need moving + // to register are dropped when we look for a slot for a new value. + if (TotalUsedRegs) + ProcessExpiredValue(false); + + // Process the current instruction. + ImmediateValueUse Info; + if (CanBeTransformed(*MII, Info)) { + RecordedValue *ValPtr = History.FindValue(Info); + if (ValPtr && !UseCanBeAddedToValue(*ValPtr, Info)) + continue; + RecordedValue &ImmV = History.AddUse(ValPtr, MII, Info); + if (ImmV.IsInReg()) { + MaterializeValues(ImmV); + } else if (ImmV.IsScheduled()) { + if (ImmV.IsFull()) + StartMaterialization(ImmV); + } else if (CanMaterialize(ImmV)) { + if (TotalUsedRegs < MaxRegisters) + ScheduleForMaterialization(ImmV); + else + ImmV.MarkWaiting(); + } + } + } + + // Finish pending materialization requests. + while (TotalUsedRegs) + ProcessExpiredValue(true); + } + return NumMatImmediates > StartNumMaterialized; +} + +// Checks if the the given machine instruction uses immediate value and it is +// possible to transform it into a form that uses register instead. If so, +// initializes argument 'Info' with respective information. +bool MaterializeImmediates::CanBeTransformed(MachineInstr &Instr, + ImmediateValueUse &Info) { + InstrInfo *Rec = OpCodeTable::Find(Instr.getOpcode()); + if (Rec == 0) + return false; + assert(Rec->Opcode == Instr.getOpcode()); + + // Skip things like addresses represented by immediate values. + if (!Instr.getOperand(Rec->OperandNo).isImm()) + return false; + + if (Rec->IsLoad) { + unsigned LoadedReg = Instr.getOperand(0).getReg(); + if (MRegInfo->hasOneUse(LoadedReg)) + return false; + } + + Info.Init(Instr.getOperand(Rec->OperandNo).getImm(), Rec); + return true; +} + +// If immediate value is already present in history, this method decides, if +// the instruction specified by argument 'Info' can be added to uses of the +// value. +bool MaterializeImmediates::UseCanBeAddedToValue(RecordedValue &ImmV, + ImmediateValueUse &Info) { + // If a value is already in a register that requires REX, replacing byte + // immediates does not give gain in size. + if (ImmV.IsInReg() && Mode64Bit && ImmV.UsesREX && Info.GetSize() == DATA_8) + return false; + return true; +} + +// Checks if it is profitable to put the immediate value into a register. +bool MaterializeImmediates::CanMaterialize(RecordedValue &ImmV) { + assert(!ImmV.IsFree()); + return ImmV.GetProfit() >= MinProfit; +} + +const TargetRegisterClass * +MaterializeImmediates::GetCommonClass(const TargetRegisterClass *ImmRegClass, + const TargetRegisterClass *LoadRegClass) { + const TargetRegisterClass *CommonRC = nullptr; + if (ImmRegClass->getSize() == LoadRegClass->getSize()) { + if (LoadRegClass->hasSuperClassEq(ImmRegClass)) + CommonRC = LoadRegClass; + else if (LoadRegClass->hasSubClass(ImmRegClass)) + CommonRC = ImmRegClass; + } else if (LoadRegClass->getSize() < ImmRegClass->getSize()) { + unsigned SubReg = + GetSubregByBytes(ImmRegClass->getSize(), LoadRegClass->getSize()); + if (const TargetRegisterClass *MRC = RegInfo->getMatchingSuperRegClass( + ImmRegClass, LoadRegClass, SubReg)) { + CommonRC = MRC; + } + } + return CommonRC; +} + +// Try to find a value that needs to be removed from the history and finish +// its materialization. +void MaterializeImmediates::ProcessExpiredValue(bool Finish) { + // Find a value that should be removed from history. + unsigned LastInstructionSN = 0; // SN of the last instruction that used reg + for (auto &Value : History) { + if (!Value.IsFree() && (Finish || History.IsExpired(Value))) { + // The value need to be removed from history. + if (Value.IsScheduled()) + // The value is marked for materialization, but register was not + // allocated to it yet. + StartMaterialization(Value); + if (Value.IsInReg()) + LastInstructionSN = FinishMaterialization(Value); + if (!Value.IsFree()) + History.Drop(Value); + if (LastInstructionSN) + break; + } + } + + if (LastInstructionSN == 0) // No value is processed + return; + + // There may be a value that can be moved to register (as it is profitable) + // but it has not due to lack of registers. Now a register become available + // and such value can be cached. + for (auto &Value : History) { + if (Value.IsWaiting()) { + // Scan recorded uses until we found a use that occurs after a register + // becomes available. All preceding uses are discarded. + unsigned NumOfDropped = 0; + for (auto &Use : Value) { + if (Use.GetSN() < LastInstructionSN) + ++NumOfDropped; + } + Value.DropOldestUses(NumOfDropped); + if (!Value.IsUsed()) + continue; + if (CanMaterialize(Value)) { + ScheduleForMaterialization(Value); + StartMaterialization(Value); + } else if (Finish) + History.Drop(Value); + } + } +} + +// Marks the value as ready for loading to a register, but does not change +// instructions. Actual replacement is postponed until history is full or the +// value is to be removed from tracking. +void MaterializeImmediates::ScheduleForMaterialization(RecordedValue &ImmV) { + ImmV.MarkSchedule(); + assert(TotalUsedRegs < MaxRegisters); + ImmV.FirstInstruction = ImmV.FirstUse().GetInstr(); + ++TotalUsedRegs; +} + +// Replaces the instructions that use the specified immediate value by their +// variants that use register. After this call any subsequent instruction that +// uses the same immediate value is not put into history, but is immediately +// transformed into the form that uses register. +void MaterializeImmediates::StartMaterialization(RecordedValue &ImmV) { + assert(ImmV.IsScheduled()); + + // Determine actual immediate size and choose appropriate register class + // for it. + bool AlwaysUseREX; + int LoadInstructionSize = ImmV.GetLoadInstructionSize(AlwaysUseREX); + bool REXMayBeUsed = AlwaysUseREX; + const TargetRegisterClass *RegClass; + if (AlwaysUseREX) { + // 64-bit value, upper half != 0 + RegClass = &X86::GR64RegClass; + } else if (ImmV.SizeCnt[DATA_64] != 0) { + // 64-bit value, loaded as zext'ed 32-bit + if (ImmV.SizeCnt[DATA_8] == 0) { + RegClass = &X86::GR64RegClass; + REXMayBeUsed = true; + } else { + // Replacement MOV8mi -> MOV8ri is not profitable if register requires REX + RegClass = &X86::GR64_NOREXRegClass; + } + } else if (ImmV.SizeCnt[DATA_32] != 0) { + if (!Mode64Bit || ImmV.SizeCnt[DATA_8] == 0) { + RegClass = &X86::GR32RegClass; + REXMayBeUsed = true; + } else { + RegClass = &X86::GR32_NOREXRegClass; + } + } else if (ImmV.SizeCnt[DATA_16] != 0) { + RegClass = &X86::GR16_NOREXRegClass; + } else { // 8-bit value + RegClass = &X86::GR8_NOREXRegClass; + } + + // If use set of the value contains load instruction, expences for load + // instruction can be decreased. + if (ImmV.HasLoad) { + MachineInstr &LoadInstr = *ImmV.LoadInstruction->GetInstr(); + unsigned LoadedReg = LoadInstr.getOperand(0).getReg(); + const TargetRegisterClass *LoadedRC = MRegInfo->getRegClass(LoadedReg); + const TargetRegisterClass *CommonRC = GetCommonClass(RegClass, LoadedRC); + if (CommonRC) { + RegClass = CommonRC; + if (RegClass->getSize() == LoadedRC->getSize()) + LoadInstructionSize = 0; + else + LoadInstructionSize -= GetSizeOfMovImmToRegInstr(LoadInstr.getOpcode()); + } + } + + // Assume the worst case - REX is used if it may be used. + if (REXMayBeUsed) + ++LoadInstructionSize; + if (ImmV.Profit <= LoadInstructionSize) { + assert(TotalUsedRegs > 0); + --TotalUsedRegs; + History.Drop(ImmV); + return; + } + + // Keep info about allocated register. + ImmV.MarkInReg(MRegInfo->createVirtualRegister(RegClass), REXMayBeUsed); + ImmV.Profit -= LoadInstructionSize; + + // Insert load instruction + int LoadOpCode = ImmV.GetLoadInstructionOpcode(); + const MCInstrDesc &IDescr = TII->get(LoadOpCode); + InstrIterator FirstInstr = ImmV.FirstInstruction; + BuildMI(*FirstInstr->getParent(), FirstInstr, FirstInstr->getDebugLoc(), + IDescr, ImmV.Register).addImm(ImmV.Value); + + // Go through the value uses and replace immediates with register. + MaterializeValues(ImmV); +} + +// Replaces the specified use of immediate value with register use. +void MaterializeImmediates::MaterializeValues(RecordedValue &ImmV) { + assert(ImmV.IsInReg()); + + for (auto &Use : ImmV) { + MachineInstr &Instr = *Use.GetInstr(); + unsigned SubReg = GetSubreg(ImmV.RegSize, Use.GetSize()); + + if (Use.IsLoad()) { + unsigned LoadedReg = Instr.getOperand(0).getReg(); + + // Class of the register used to store immediate must be a subclass + // of the loaded register class, or its super register class. + const TargetRegisterClass *LoadedRC = MRegInfo->getRegClass(LoadedReg); + const TargetRegisterClass *ImmRC = MRegInfo->getRegClass(ImmV.Register); + const TargetRegisterClass *CommonRC = GetCommonClass(ImmRC, LoadedRC); + if (!CommonRC) + continue; + if (CommonRC != ImmRC) + MRegInfo->setRegClass(ImmV.Register, CommonRC); + + // Replace all uses of the loaded register with the register used for + // immediate value. + unsigned LoadSize = LoadedRC->getSize(); + unsigned ImmSize = SizeInBytes(ImmV.GetSize()); + assert(LoadSize <= ImmSize); + MRegInfo->replaceRegWith(LoadedReg, ImmV.Register); + Instr.removeFromParent(); + } else { + const MCInstrDesc &UseInstr = TII->get(Use.GetNewOpCode()); + Instr.setDesc(UseInstr); + Instr.getOperand(Use.GetOperand()).ChangeToRegister(ImmV.Register, false); + Instr.getOperand(Use.GetOperand()).setSubReg(SubReg); + } + } + ImmV.ReplacedCnt += ImmV.TotalUses; + ImmV.Uses[0] = ImmV.LastUse(); + ImmV.TotalUses = 1; +} + +// Prepares the immediate value for removing from history. +unsigned MaterializeImmediates::FinishMaterialization(RecordedValue &Value) { + assert(Value.IsInReg()); + unsigned LastInstructionSN = Value.LastUse().GetSN(); + Value.MarkFree(); + History.Drop(Value); + + // Update statistics + ++NumMatImmediates; + NumMatBytes += Value.Profit; + NumMatInstructions += Value.ReplacedCnt; + --TotalUsedRegs; + + return LastInstructionSN; +} + +FunctionPass *llvm::createX86MaterializeImmediates() { + if (MaxImmediates && MaxImmediates > MaxHistoryWidth) + MaxImmediates = MaxHistoryWidth; + if (HistoryDepth && HistoryDepth > MaxHistoryDepth) + HistoryDepth = MaxHistoryDepth; + if (MaxSeparatingInstrs && MaxSeparatingInstrs > MaxHistoryWidth) + MaxSeparatingInstrs = MaxHistoryWidth; + return new MaterializeImmediates(); +} Index: lib/Target/X86/X86TargetMachine.cpp =================================================================== --- lib/Target/X86/X86TargetMachine.cpp +++ lib/Target/X86/X86TargetMachine.cpp @@ -129,7 +129,9 @@ } bool X86PassConfig::addPreRegAlloc() { - return false; // -print-machineinstr shouldn't print after this. + addPass(createX86MaterializeImmediates()); + return true; +// return false; // -print-machineinstr shouldn't print after this. } bool X86PassConfig::addPostRegAlloc() { Index: test/CodeGen/X86/coalescer-commute3.ll =================================================================== --- test/CodeGen/X86/coalescer-commute3.ll +++ test/CodeGen/X86/coalescer-commute3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 6 +; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 8 %struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* } Index: test/CodeGen/X86/fast-isel-x86.ll =================================================================== --- test/CodeGen/X86/fast-isel-x86.ll +++ test/CodeGen/X86/fast-isel-x86.ll @@ -69,12 +69,12 @@ ret void ; CHECK-LABEL: test5: ; Local value area is still there: -; CHECK: movl $42, {{%[a-z]+}} +; CHECK: movl $42, [[IMMREG:%[a-z]+]] ; Fast-ISel's arg push is not here: ; CHECK-NOT: movl $42, (%esp) ; SDag-ISel's arg push: ; CHECK: movl %esp, [[REGISTER:%[a-z]+]] -; CHECK: movl $42, ([[REGISTER]]) +; CHECK: movl [[IMMREG]], ([[REGISTER]]) ; CHECK: movl __imp__test5dllimport } declare dllimport i32 @test5dllimport(i32) Index: test/CodeGen/X86/materialize-imm.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/materialize-imm.ll @@ -0,0 +1,444 @@ +; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu | FileCheck %s + +; // 2 byte move replacement gives zero gain +; void func_01(char* x) { +; *x = 11; +; *(x+3) = 11; +; } +define void @func_01(i8* nocapture %x) { +entry: + store i8 11, i8* %x, align 1 + %add.ptr = getelementptr inbounds i8* %x, i32 3 + store i8 11, i8* %add.ptr, align 1 + ret void +; CHECK-LABEL: func_01: +; CHECK: movb $11, +; CHECK: movb $11, +; CHECK: ret +} + +; // But 3 byte is already profitable +; void func_02(char* x) { +; *x = 11; +; *(x+3) = 11; +; *(x+6) = 11; +; } +define void @func_02(i8* nocapture %x) { +entry: + store i8 11, i8* %x, align 1 + %add.ptr = getelementptr inbounds i8* %x, i32 3 + store i8 11, i8* %add.ptr, align 1 + %add.ptr1 = getelementptr inbounds i8* %x, i32 6 + store i8 11, i8* %add.ptr1, align 1 + ret void +; CHECK-LABEL: func_02: +; CHECK: movb $11, %[[REG:[a-zA-Z0-9]+]] +; CHECK: movb %[[REG]], +; CHECK: movb %[[REG]], +; CHECK: movb %[[REG]], +; CHECK: ret +} + +; // 2 32-bit move replacement is profitable +; void func_03(char* x) { +; *((int*)x) = 0x55555555; +; *((int*)x+3) = 0x55555555; +; } +define void @func_03(i8* nocapture %x) { +entry: + %0 = bitcast i8* %x to i32* + store i32 1431655765, i32* %0, align 4 + %add.ptr = getelementptr inbounds i8* %x, i32 12 + %1 = bitcast i8* %add.ptr to i32* + store i32 1431655765, i32* %1, align 4 + ret void +; CHECK-LABEL: func_03: +; CHECK: movl $1431655765, %[[REG:[a-zA-Z0-9]+]] +; CHECK: movl %[[REG]], +; CHECK: movl %[[REG]], +; CHECK: ret +} + +; // 2 16-bit move replacement gives zero gain +; void func_04(char* x) { +; *((short*)x) = 0x5555; +; *((short*)x+3) = 0x5555; +; } +define void @func_04(i8* nocapture %x) { +entry: + %0 = bitcast i8* %x to i16* + store i16 21845, i16* %0, align 2 + %add.ptr = getelementptr inbounds i8* %x, i32 6 + %1 = bitcast i8* %add.ptr to i16* + store i16 21845, i16* %1, align 2 + ret void +; CHECK-LABEL: func_04: +; CHECK: movw $21845, +; CHECK: movw $21845, +; CHECK: ret +} + + +; // but 3 16-bit move replacement is already profitable +; void func_05(char* x) { +; *((short*)x) = 0x5555; +; *((short*)x+3) = 0x5555; +; *((short*)x+9) = 0x5555; +; } +define void @func_05(i8* nocapture %x) { +entry: + %0 = bitcast i8* %x to i16* + store i16 21845, i16* %0, align 2 + %add.ptr = getelementptr inbounds i8* %x, i32 6 + %1 = bitcast i8* %add.ptr to i16* + store i16 21845, i16* %1, align 2 + %add.ptr1 = getelementptr inbounds i8* %x, i32 18 + %2 = bitcast i8* %add.ptr1 to i16* + store i16 21845, i16* %2, align 2 + ret void +; CHECK-LABEL: func_05: +; CHECK: movw $21845, %[[REG:[a-zA-Z0-9]+]] +; CHECK: movw %[[REG]], +; CHECK: movw %[[REG]], +; CHECK: ret +} + + +; // 1*16 + 1*8 is not profitable +; void func_06(char* x) { +; *(x) = 0x55; +; *((short*)x+6) = 0x5555; +; } +define void @func_06(i8* nocapture %x) { +entry: + store i8 85, i8* %x, align 1 + %add.ptr = getelementptr inbounds i8* %x, i32 12 + %0 = bitcast i8* %add.ptr to i16* + store i16 21845, i16* %0, align 2 + ret void +; CHECK-LABEL: func_06: +; CHECK: movb $85, +; CHECK: movw $21845, +; CHECK: ret +} + + +; // 1*16 + 2*8 gives zero gain +; void func_06a(char* x) { +; *(x) = 0x55; +; *(x+3) = 0x55; +; *((short*)x+9) = 0x5555; +; } +define void @func_06a(i8* nocapture %x) { +entry: + store i8 85, i8* %x, align 1 + %add.ptr = getelementptr inbounds i8* %x, i32 3 + store i8 85, i8* %add.ptr, align 1 + %add.ptr1 = getelementptr inbounds i8* %x, i32 12 + %0 = bitcast i8* %add.ptr1 to i16* + store i16 21845, i16* %0, align 2 + ret void +; CHECK-LABEL: func_06a: +; CHECK: movb $85, +; CHECK: movb $85, +; CHECK: movw $21845, +; CHECK: ret +} + + +; // 2*16 + 1*8 gives one byte gain +; void func_06b(char* x) { +; *(x) = 0x55; +; *((short*)x+6) = 0x5555; +; *((short*)x+9) = 0x5555; +; } +define void @func_06b(i8* nocapture %x) { +entry: + store i8 85, i8* %x, align 1 + %add.ptr = getelementptr inbounds i8* %x, i32 12 + %0 = bitcast i8* %add.ptr to i16* + store i16 21845, i16* %0, align 2 + %add.ptr1 = getelementptr inbounds i8* %x, i32 18 + %1 = bitcast i8* %add.ptr1 to i16* + store i16 21845, i16* %1, align 2 + ret void +; CHECK-LABEL: func_06b: +; CHECK: movw $21845, %[[REG:[a-z]]]x +; CHECK: movb %[[REG]]l, +; CHECK: movw %[[REG]]x, +; CHECK: movw %[[REG]]x, +; CHECK: ret +} + + +; // 2*16 + 2*8 is profitable +; void func_07(char* x) { +; *(x) = 0x55; +; *(x+3) = 0x55; +; *((short*)x+6) = 0x5555; +; *((short*)x+9) = 0x5555; +; } +define void @func_07(i8* nocapture %x) #0 { +entry: + store i8 85, i8* %x, align 1 + %add.ptr = getelementptr inbounds i8* %x, i32 3 + store i8 85, i8* %add.ptr, align 1 + %add.ptr1 = getelementptr inbounds i8* %x, i32 12 + %0 = bitcast i8* %add.ptr1 to i16* + store i16 21845, i16* %0, align 2 + %add.ptr2 = getelementptr inbounds i8* %x, i32 18 + %1 = bitcast i8* %add.ptr2 to i16* + store i16 21845, i16* %1, align 2 + ret void +; CHECK-LABEL: func_07: +; CHECK: movw $21845, %[[REG:[a-z]]]x +; CHECK: movb %[[REG]]l, +; CHECK: movb %[[REG]]l, +; CHECK: movw %[[REG]]x, +; CHECK: movw %[[REG]]x, +; CHECK: ret +} + +; void func_08(char* x) { +; *(x) = 0x55; +; *(x+3) = 0x55; +; *((short*)x+6) = 0x5555; +; *((int*)x+9) = 0x55555555; +; } +define void @func_08(i8* nocapture %x) { +entry: + store i8 85, i8* %x, align 1 + %add.ptr = getelementptr inbounds i8* %x, i32 3 + store i8 85, i8* %add.ptr, align 1 + %add.ptr1 = getelementptr inbounds i8* %x, i32 12 + %0 = bitcast i8* %add.ptr1 to i16* + store i16 21845, i16* %0, align 2 + %add.ptr2 = getelementptr inbounds i8* %x, i32 36 + %1 = bitcast i8* %add.ptr2 to i32* + store i32 1431655765, i32* %1, align 4 + ret void +; CHECK-LABEL: func_08: +; CHECK: movl $1431655765, %e[[REG:[a-z]]]x +; CHECK: movb %[[REG]]l, +; CHECK: movb %[[REG]]l, +; CHECK: movw %[[REG]]x, +; CHECK: movl %e[[REG]]x, +; CHECK: ret +} + +; void func_09(char* x) { +; *((int*)x+9) = 0x55555555; +; *((short*)x+6) = 0x5555; +; *(x) = 0x55; +; *(x+3) = 0x55; +; } +define void @func_09(i8* nocapture %x) { +entry: + %add.ptr = getelementptr inbounds i8* %x, i32 36 + %0 = bitcast i8* %add.ptr to i32* + store i32 1431655765, i32* %0, align 4 + %add.ptr1 = getelementptr inbounds i8* %x, i32 12 + %1 = bitcast i8* %add.ptr1 to i16* + store i16 21845, i16* %1, align 2 + store i8 85, i8* %x, align 1 + %add.ptr2 = getelementptr inbounds i8* %x, i32 3 + store i8 85, i8* %add.ptr2, align 1 + ret void +; CHECK-LABEL: func_09: +; CHECK: movl $1431655765, %e[[REG:[a-z]]]x +; CHECK: movl %e[[REG]]x, +; CHECK: movw %[[REG]]x, +; CHECK: movb %[[REG]]l, +; CHECK: movb %[[REG]]l, +; CHECK: ret +} + +; void func_10(char* x) { +; *((short*)x+6) = 0x5555; +; *((int*)x+9) = 0x55555555; +; *(x) = 0x55; +; *(x+3) = 0x55; +; } +define void @func_10(i8* nocapture %x) { +entry: + %add.ptr = getelementptr inbounds i8* %x, i32 12 + %0 = bitcast i8* %add.ptr to i16* + store i16 21845, i16* %0, align 2 + %add.ptr1 = getelementptr inbounds i8* %x, i32 36 + %1 = bitcast i8* %add.ptr1 to i32* + store i32 1431655765, i32* %1, align 4 + store i8 85, i8* %x, align 1 + %add.ptr2 = getelementptr inbounds i8* %x, i32 3 + store i8 85, i8* %add.ptr2, align 1 + ret void +; CHECK-LABEL: func_10: +; CHECK: movl $1431655765, %e[[REG:[a-z]]]x +; CHECK: movw %[[REG]]x, +; CHECK: movl %e[[REG]]x, +; CHECK: movb %[[REG]]l, +; CHECK: movb %[[REG]]l, +; CHECK: ret +} + +; void func_11(char* x) { +; *((short*)x+6) = 0x5555; +; *(x) = 0x55; +; *((int*)x+9) = 0x55555555; +; *(x+3) = 0x55; +; } +define void @func_11(i8* nocapture %x) { +entry: + %add.ptr = getelementptr inbounds i8* %x, i32 12 + %0 = bitcast i8* %add.ptr to i16* + store i16 21845, i16* %0, align 2 + store i8 85, i8* %x, align 1 + %add.ptr1 = getelementptr inbounds i8* %x, i32 36 + %1 = bitcast i8* %add.ptr1 to i32* + store i32 1431655765, i32* %1, align 4 + %add.ptr2 = getelementptr inbounds i8* %x, i32 3 + store i8 85, i8* %add.ptr2, align 1 + ret void +; CHECK-LABEL: func_11: +; CHECK: movl $1431655765, %e[[REG:[a-z]]]x +; CHECK: movw %[[REG]]x, +; CHECK: movb %[[REG]]l, +; CHECK: movl %e[[REG]]x, +; CHECK: movb %[[REG]]l, +; CHECK: ret +} + +; void func_12(char* x) { +; *((int*)x) = 0x55555555; +; *((int*)x+2) = 0x55555555; +; *((int*)x+5) = 0x33333333; +; *((int*)x+7) = 0x33333333; +; } +define void @func_12(i8* nocapture %x) { +entry: + %0 = bitcast i8* %x to i32* + store i32 1431655765, i32* %0, align 4 + %add.ptr = getelementptr inbounds i8* %x, i32 8 + %1 = bitcast i8* %add.ptr to i32* + store i32 1431655765, i32* %1, align 4 + %add.ptr1 = getelementptr inbounds i8* %x, i32 20 + %2 = bitcast i8* %add.ptr1 to i32* + store i32 858993459, i32* %2, align 4 + %add.ptr2 = getelementptr inbounds i8* %x, i32 28 + %3 = bitcast i8* %add.ptr2 to i32* + store i32 858993459, i32* %3, align 4 + ret void +; CHECK-LABEL: func_12: +; CHECK: movl $1431655765, %e[[REG1:[a-z]]]x +; CHECK: movl %e[[REG1]]x, +; CHECK: movl %e[[REG1]]x, +; CHECK: movl $858993459, %e[[REG2:[a-z]]]x +; CHECK: movl %e[[REG2]]x, +; CHECK: movl %e[[REG2]]x, +; CHECK: ret +} + +; void func_13(char* x) { +; *((int*)x) = 0x55555555; +; *((int*)x+2) = 0x55555555; +; *((int*)x+5) = 0x33333333; +; *((int*)x+7) = 0x33333333; +; *((int*)x+9) = 0x11111111; +; *((int*)x+11) = 0x11111111; +; *((int*)x+13) = 0x55555555; +; } +define void @func_13(i8* nocapture %x) { +entry: + %0 = bitcast i8* %x to i32* + store i32 1431655765, i32* %0, align 4 + %add.ptr = getelementptr inbounds i8* %x, i32 8 + %1 = bitcast i8* %add.ptr to i32* + store i32 1431655765, i32* %1, align 4 + %add.ptr1 = getelementptr inbounds i8* %x, i32 20 + %2 = bitcast i8* %add.ptr1 to i32* + store i32 858993459, i32* %2, align 4 + %add.ptr2 = getelementptr inbounds i8* %x, i32 28 + %3 = bitcast i8* %add.ptr2 to i32* + store i32 858993459, i32* %3, align 4 + %add.ptr3 = getelementptr inbounds i8* %x, i32 36 + %4 = bitcast i8* %add.ptr3 to i32* + store i32 286331153, i32* %4, align 4 + %add.ptr4 = getelementptr inbounds i8* %x, i32 44 + %5 = bitcast i8* %add.ptr4 to i32* + store i32 286331153, i32* %5, align 4 + %add.ptr5 = getelementptr inbounds i8* %x, i32 52 + %6 = bitcast i8* %add.ptr5 to i32* + store i32 1431655765, i32* %6, align 4 + ret void +; CHECK-LABEL: func_13: +; CHECK: movl $1431655765, %e[[REG1:[a-z]]]x +; CHECK: movl %e[[REG1]]x, +; CHECK: movl %e[[REG1]]x, +; CHECK: movl $858993459, %e[[REG2:[a-z]]]x +; CHECK: movl %e[[REG2]]x, +; CHECK: movl %e[[REG2]]x, +; CHECK: movl $286331153, %e[[REG3:[a-z]]]x +; CHECK: movl %e[[REG3]]x, +; CHECK: movl %e[[REG3]]x, +; CHECK: movl $1431655765, +; CHECK: ret +} + +;void func_14(char* x) { +; *((int*)x) = 0x55555555; +; *((int*)x+2) = 0x55555555; +; *((int*)x+3) = 0x55555555; +; *((int*)x+15) = 0x55555555; +; *((int*)x+13) = 0x55555555; +; *((int*)x+6) = 0x55555555; +; *((int*)x+7) = 0x55555555; +; *((int*)x+9) = 0x55555555; +; *((int*)x+11) = 0x55555555; +; *((int*)x+12) = 0x55555555; +;} +define void @func_14(i8* nocapture %x) { +entry: + %0 = bitcast i8* %x to i32* + store i32 1431655765, i32* %0, align 4 + %add.ptr = getelementptr inbounds i8* %x, i32 8 + %1 = bitcast i8* %add.ptr to i32* + store i32 1431655765, i32* %1, align 4 + %add.ptr1 = getelementptr inbounds i8* %x, i32 12 + %2 = bitcast i8* %add.ptr1 to i32* + store i32 1431655765, i32* %2, align 4 + %add.ptr2 = getelementptr inbounds i8* %x, i32 60 + %3 = bitcast i8* %add.ptr2 to i32* + store i32 1431655765, i32* %3, align 4 + %add.ptr3 = getelementptr inbounds i8* %x, i32 52 + %4 = bitcast i8* %add.ptr3 to i32* + store i32 1431655765, i32* %4, align 4 + %add.ptr4 = getelementptr inbounds i8* %x, i32 24 + %5 = bitcast i8* %add.ptr4 to i32* + store i32 1431655765, i32* %5, align 4 + %add.ptr5 = getelementptr inbounds i8* %x, i32 28 + %6 = bitcast i8* %add.ptr5 to i32* + store i32 1431655765, i32* %6, align 4 + %add.ptr6 = getelementptr inbounds i8* %x, i32 36 + %7 = bitcast i8* %add.ptr6 to i32* + store i32 1431655765, i32* %7, align 4 + %add.ptr7 = getelementptr inbounds i8* %x, i32 44 + %8 = bitcast i8* %add.ptr7 to i32* + store i32 1431655765, i32* %8, align 4 + %add.ptr8 = getelementptr inbounds i8* %x, i32 48 + %9 = bitcast i8* %add.ptr8 to i32* + store i32 1431655765, i32* %9, align 4 + ret void +; CHECK-LABEL: func_14: +; CHECK: movl $1431655765, %e[[REG1:[a-z]]]x +; CHECK: movl %e[[REG1]]x, +; CHECK: movl %e[[REG1]]x, +; CHECK: movl %e[[REG1]]x, +; CHECK: movl %e[[REG1]]x, +; CHECK: movl %e[[REG1]]x, +; CHECK: movl %e[[REG1]]x, +; CHECK: movl %e[[REG1]]x, +; CHECK: movl %e[[REG1]]x, +; CHECK: movl %e[[REG1]]x, +; CHECK: movl %e[[REG1]]x, +; CHECK: ret +} Index: test/CodeGen/X86/memcpy-2.ll =================================================================== --- test/CodeGen/X86/memcpy-2.ll +++ test/CodeGen/X86/memcpy-2.ll @@ -26,16 +26,18 @@ ; SSE1-LABEL: t1: ; SSE1: movaps _.str, %xmm0 ; SSE1: movaps %xmm0 -; SSE1: movb $0, 24(%esp) -; SSE1: movl $0, 20(%esp) -; SSE1: movl $0, 16(%esp) +; SSE1: movl $0, %e[[REGISTER:[a-d]]]x +; SSE1: movb %[[REGISTER]]l, 24(%esp) +; SSE1: movl %e[[REGISTER]]x, 20(%esp) +; SSE1: movl %e[[REGISTER]]x, 16(%esp) ; NOSSE-LABEL: t1: -; NOSSE: movb $0 -; NOSSE: movl $0 -; NOSSE: movl $0 -; NOSSE: movl $0 -; NOSSE: movl $0 +; NOSSE: movl $0, %e[[REGISTER:[a-d]]]x +; NOSSE: movb %[[REGISTER]]l, +; NOSSE: movl %e[[REGISTER]]x, +; NOSSE: movl %e[[REGISTER]]x, +; NOSSE: movl %e[[REGISTER]]x, +; NOSSE: movl %e[[REGISTER]]x, ; NOSSE: movl $101 ; NOSSE: movl $1734438249 @@ -141,43 +143,46 @@ entry: ; SSE2-Darwin-LABEL: t4: ; SSE2-Darwin: movw $120 -; SSE2-Darwin: movl $2021161080 -; SSE2-Darwin: movl $2021161080 -; SSE2-Darwin: movl $2021161080 -; SSE2-Darwin: movl $2021161080 -; SSE2-Darwin: movl $2021161080 -; SSE2-Darwin: movl $2021161080 -; SSE2-Darwin: movl $2021161080 +; SSE2-Darwin: movl $2021161080, [[REGISTER:%e[a-d]x]] +; SSE2-Darwin: movl [[REGISTER]], +; SSE2-Darwin: movl [[REGISTER]], +; SSE2-Darwin: movl [[REGISTER]], +; SSE2-Darwin: movl [[REGISTER]], +; SSE2-Darwin: movl [[REGISTER]], +; SSE2-Darwin: movl [[REGISTER]], +; SSE2-Darwin: movl [[REGISTER]], ; SSE2-Mingw32-LABEL: t4: ; SSE2-Mingw32: movw $120 -; SSE2-Mingw32: movl $2021161080 -; SSE2-Mingw32: movl $2021161080 -; SSE2-Mingw32: movl $2021161080 -; SSE2-Mingw32: movl $2021161080 -; SSE2-Mingw32: movl $2021161080 -; SSE2-Mingw32: movl $2021161080 -; SSE2-Mingw32: movl $2021161080 +; SSE2-Mingw32: movl $2021161080, [[REGISTER:%e[a-d]x]] +; SSE2-Mingw32: movl [[REGISTER]], +; SSE2-Mingw32: movl [[REGISTER]], +; SSE2-Mingw32: movl [[REGISTER]], +; SSE2-Mingw32: movl [[REGISTER]], +; SSE2-Mingw32: movl [[REGISTER]], +; SSE2-Mingw32: movl [[REGISTER]], +; SSE2-Mingw32: movl [[REGISTER]], ; SSE1-LABEL: t4: ; SSE1: movw $120 -; SSE1: movl $2021161080 -; SSE1: movl $2021161080 -; SSE1: movl $2021161080 -; SSE1: movl $2021161080 -; SSE1: movl $2021161080 -; SSE1: movl $2021161080 -; SSE1: movl $2021161080 +; SSE1: movl $2021161080, [[REGISTER:%e[a-d]x]] +; SSE1: movl [[REGISTER]], +; SSE1: movl [[REGISTER]], +; SSE1: movl [[REGISTER]], +; SSE1: movl [[REGISTER]], +; SSE1: movl [[REGISTER]], +; SSE1: movl [[REGISTER]], +; SSE1: movl [[REGISTER]], ; NOSSE-LABEL: t4: ; NOSSE: movw $120 -; NOSSE: movl $2021161080 -; NOSSE: movl $2021161080 -; NOSSE: movl $2021161080 -; NOSSE: movl $2021161080 -; NOSSE: movl $2021161080 -; NOSSE: movl $2021161080 -; NOSSE: movl $2021161080 +; NOSSE: movl $2021161080, [[REGISTER:%e[a-d]x]] +; NOSSE: movl [[REGISTER]], +; NOSSE: movl [[REGISTER]], +; NOSSE: movl [[REGISTER]], +; NOSSE: movl [[REGISTER]], +; NOSSE: movl [[REGISTER]], +; NOSSE: movl [[REGISTER]], ; X86-64-LABEL: t4: ; X86-64: movabsq $8680820740569200760, %rax Index: test/CodeGen/X86/memset.ll =================================================================== --- test/CodeGen/X86/memset.ll +++ test/CodeGen/X86/memset.ll @@ -11,15 +11,15 @@ %tmp110117 = bitcast [8 x %struct.x]* %up_mvd to i8* ; [#uses=1] call void @llvm.memset.p0i8.i64(i8* %tmp110117, i8 0, i64 32, i32 8, i1 false) -; X86: movl $0, -; X86: movl $0, -; X86: movl $0, -; X86: movl $0, -; X86: movl $0, -; X86: movl $0, -; X86: movl $0, -; X86: movl $0, -; X86-NOT: movl $0, +; X86: movl $0, [[REGISTER:%[a-z]+]] +; X86: movl [[REGISTER]], +; X86: movl [[REGISTER]], +; X86: movl [[REGISTER]], +; X86: movl [[REGISTER]], +; X86: movl [[REGISTER]], +; X86: movl [[REGISTER]], +; X86: movl [[REGISTER]], +; X86: movl [[REGISTER]], ; X86: ret ; XMM: xorps %xmm{{[0-9]+}}, [[Z:%xmm[0-9]+]] @@ -45,11 +45,12 @@ ; Ensure that alignment of '0' in an @llvm.memset intrinsic results in ; unaligned loads and stores. ; XMM: PR15348 -; XMM: movb $0, -; XMM: movl $0, -; XMM: movl $0, -; XMM: movl $0, -; XMM: movl $0, +; XMM: movl $0, %e[[REGISTER:[a-d]]]x +; XMM: movb %[[REGISTER]]l, +; XMM: movl %e[[REGISTER]]x, +; XMM: movl %e[[REGISTER]]x, +; XMM: movl %e[[REGISTER]]x, +; XMM: movl %e[[REGISTER]]x, call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 17, i32 0, i1 false) ret void } Index: test/CodeGen/X86/memset64-on-x86-32.ll =================================================================== --- test/CodeGen/X86/memset64-on-x86-32.ll +++ test/CodeGen/X86/memset64-on-x86-32.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=nehalem | grep movups | count 5 -; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=core2 | grep movl | count 20 -; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=core2 | grep movl | count 20 +; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=core2 | grep movl | count 21 +; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=core2 | grep movl | count 21 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | grep movq | count 10 define void @bork() nounwind { Index: test/CodeGen/X86/nancvt.ll =================================================================== --- test/CodeGen/X86/nancvt.ll +++ test/CodeGen/X86/nancvt.ll @@ -1,8 +1,8 @@ ; RUN: opt < %s -std-compile-opts | llc > %t -; RUN: grep 2147027116 %t | count 3 -; RUN: grep 2147228864 %t | count 3 -; RUN: grep 2146502828 %t | count 3 -; RUN: grep 2143034560 %t | count 3 +; RUN: grep 2147027116 %t | count 1 +; RUN: grep 2147228864 %t | count 1 +; RUN: grep 2146502828 %t | count 1 +; RUN: grep 2143034560 %t | count 1 ; Compile time conversions of NaNs. ; ModuleID = 'nan2.c' target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" Index: test/CodeGen/X86/pr14562.ll =================================================================== --- test/CodeGen/X86/pr14562.ll +++ test/CodeGen/X86/pr14562.ll @@ -8,8 +8,9 @@ %t = trunc i64 %s to i32 %z = zext i32 %t to i64 store i64 %z, i64* @temp1, align 8 -; CHECK: movl $0, {{_?}}temp1+4 -; CHECK: movl $0, {{_?}}temp1 +; CHECK: movl $0, [[REGISTER:%e[a-d]x]] +; CHECK: movl [[REGISTER]], {{_?}}temp1+4 +; CHECK: movl [[REGISTER]], {{_?}}temp1 ret void } Index: test/CodeGen/X86/pr18023.ll =================================================================== --- test/CodeGen/X86/pr18023.ll +++ test/CodeGen/X86/pr18023.ll @@ -3,9 +3,10 @@ ; CHECK: movabsq $4294967296, %rcx ; CHECK: movq %rcx, (%rax) -; CHECK: movl $1, 4(%rax) +; CHECK: movl $1, %r[[REG:[a-z]]]x +; CHECK: movl %e[[REG]]x, 4(%rax) ; CHECK: movl $0, 4(%rax) -; CHECK: movq $1, 4(%rax) +; CHECK: movq %r[[REG]]x, 4(%rax) @c = common global i32 0, align 4 @a = common global [3 x i32] zeroinitializer, align 4 Index: test/CodeGen/X86/tlv-1.ll =================================================================== --- test/CodeGen/X86/tlv-1.ll +++ test/CodeGen/X86/tlv-1.ll @@ -11,8 +11,9 @@ unreachable ; CHECK: movq _c@TLVP(%rip), %rdi ; CHECK-NEXT: callq *(%rdi) - ; CHECK-NEXT: movl $0, 56(%rax) - ; CHECK-NEXT: movq $0, 48(%rax) + ; CHECK-NEXT: movl $0, %r[[REGISTER:[a-z]+]] + ; CHECK-NEXT: movl %e[[REGISTER]], 56(%rax) + ; CHECK-NEXT: movq %r[[REGISTER]], 48(%rax) } ; rdar://10291355 Index: test/CodeGen/X86/xmulo.ll =================================================================== --- test/CodeGen/X86/xmulo.ll +++ test/CodeGen/X86/xmulo.ll @@ -9,8 +9,9 @@ define i32 @t1() nounwind { ; CHECK-LABEL: t1: -; CHECK: movl $0, 12(%esp) -; CHECK: movl $0, 8(%esp) +; CHECK: movl $0, [[REGISTER:%[a-z]+]] +; CHECK: movl [[REGISTER]], 12(%esp) +; CHECK: movl [[REGISTER]], 8(%esp) ; CHECK: movl $72, 4(%esp) %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 8) @@ -23,9 +24,10 @@ define i32 @t2() nounwind { ; CHECK-LABEL: t2: -; CHECK: movl $0, 12(%esp) -; CHECK: movl $0, 8(%esp) -; CHECK: movl $0, 4(%esp) +; CHECK: movl $0, [[REGISTER:%[a-z]+]] +; CHECK: movl [[REGISTER]], 12(%esp) +; CHECK: movl [[REGISTER]], 8(%esp) +; CHECK: movl [[REGISTER]], 4(%esp) %1 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 9, i64 0) %2 = extractvalue {i64, i1} %1, 0 Index: test/DebugInfo/X86/debug-loc-offset.ll =================================================================== --- test/DebugInfo/X86/debug-loc-offset.ll +++ test/DebugInfo/X86/debug-loc-offset.ll @@ -55,7 +55,7 @@ ; CHECK: .debug_loc contents: ; CHECK: 0x00000000: Beginning address offset: 0x0000000000000000 -; CHECK: Ending address offset: 0x000000000000001a +; CHECK: Ending address offset: 0x0000000000000017 %struct.A = type { i32 (...)**, i32 }