diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h --- a/llvm/tools/llvm-exegesis/lib/Target.h +++ b/llvm/tools/llvm-exegesis/lib/Target.h @@ -86,6 +86,95 @@ virtual std::vector setRegTo(const MCSubtargetInfo &STI, unsigned Reg, const APInt &Value) const = 0; + // Generates the code for the lower munmap call. The code generated by this + // function may clobber registers. + virtual void generateLowerMunmap(std::vector &GeneratedCode) const { + report_fatal_error( + "generateLowerMunmap is not implemented on the current architecture"); + } + + // Generates the upper munmap call. The code generated by this function may + // clobber registers. + virtual void generateUpperMunmap(std::vector &GeneratedCode) const { + report_fatal_error( + "generateUpperMunmap is not implemented on the current architecture"); + } + + // Generates the code for an exit syscall. The code generated by this function + // may clobber registers. + virtual std::vector generateExitSyscall(unsigned ExitCode) const { + report_fatal_error( + "generateExitSyscall is not implemented on the current architecture"); + } + + // Generates the code to mmap a region of code. The code generated by this + // function may clobber registers. + virtual std::vector + generateMmap(intptr_t Address, size_t Length, + intptr_t FileDescriptorAddress) const { + report_fatal_error( + "generateMmap is not implemented on the current architecture"); + } + + // Generates the mmap code for the aux memory. The code generated by this + // function may clobber registers. + virtual void generateMmapAuxMem(std::vector &GeneratedCode) const { + report_fatal_error( + "generateMmapAuxMem is not implemented on the current architecture\n"); + } + + // Moves argument registers into other registers that won't get clobbered + // while making syscalls. The code generated by this function may clobber + // registers. + virtual void moveArgumentRegisters(std::vector &GeneratedCode) const { + report_fatal_error("moveArgumentRegisters is not implemented on the " + "current architecture\n"); + } + + // Generates code to move argument registers, unmap memory above and below the + // snippet, and map the auxiliary memory into the subprocess. The code + // generated by this function may clobber registers. + virtual std::vector generateMemoryInitialSetup() const { + report_fatal_error("generateMemoryInitialSetup is not supported on the " + "current architecture\n"); + } + + // Sets the stack register to the auxiliary memory so that operations + // requiring the stack can be formed (e.g., setting large registers). The code + // generated by this function may clobber registers. + virtual std::vector setStackRegisterToAuxMem() const { + report_fatal_error("setStackRegisterToAuxMem is not implemented on the " + "current architectures"); + } + + virtual intptr_t getAuxiliaryMemoryStartAddress() const { + report_fatal_error("getAuxiliaryMemoryStartAddress is not implemented on " + "the current architecture"); + } + + // Generates the necessary ioctl system calls to configure the perf counters. + // The code generated by this function preserves all registers if the + // parameter SaveRegisters is set to true. + virtual std::vector configurePerfCounter(long Request, + bool SaveRegisters) const { + report_fatal_error( + "configurePerfCounter is not implemented on the current architecture"); + } + + // Gets the ABI dependent registers that are used to pass arguments in a + // function call. + virtual std::vector getArgumentRegisters() const { + report_fatal_error( + "getArgumentRegisters is not implemented on the current architecture"); + }; + + // Gets the registers that might potentially need to be saved by while + // the setup in the test harness executes. + virtual std::vector getRegistersNeedSaving() const { + report_fatal_error("getRegistersNeedSaving is not implemented on the " + "current architecture"); + }; + // Returns the register pointing to scratch memory, or 0 if this target // does not support memory operands. The benchmark function uses the // default calling convention. diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -11,6 +11,7 @@ #include "../ParallelSnippetGenerator.h" #include "../SerialSnippetGenerator.h" #include "../SnippetGenerator.h" +#include "../SubprocessMemory.h" #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "X86.h" @@ -36,9 +37,17 @@ #include // For _clearfp in ~X86SavedState(). #endif +#ifdef __linux__ +#include +#include +#include +#endif + namespace llvm { namespace exegesis { +static constexpr const intptr_t VAddressSpaceCeiling = 0x0000800000000000; + // If a positive value is specified, we are going to use the LBR in // latency-mode. // @@ -686,6 +695,8 @@ return ExegesisTarget::createCounter(CounterName, State, ProcessID); } + enum ArgumentRegisters { CodeSize = X86::R12, AuxiliaryMemoryFD = X86::R13 }; + private: void addTargetSpecificPasses(PassManagerBase &PM) const override; @@ -709,6 +720,34 @@ std::vector setRegTo(const MCSubtargetInfo &STI, unsigned Reg, const APInt &Value) const override; +#ifdef __linux__ + void generateLowerMunmap(std::vector &GeneratedCode) const override; + + void generateUpperMunmap(std::vector &GeneratedCode) const override; + + std::vector generateExitSyscall(unsigned ExitCode) const override; + + std::vector + generateMmap(intptr_t Address, size_t Length, + intptr_t FileDescriptorAddress) const override; + + void generateMmapAuxMem(std::vector &GeneratedCode) const override; + + void moveArgumentRegisters(std::vector &GeneratedCode) const override; + + std::vector generateMemoryInitialSetup() const override; + + std::vector setStackRegisterToAuxMem() const override; + + intptr_t getAuxiliaryMemoryStartAddress() const override; + + std::vector configurePerfCounter(long Request, bool SaveRegisters) const override; + + std::vector getArgumentRegisters() const override; + + std::vector getRegistersNeedSaving() const override; +#endif // __linux__ + ArrayRef getUnavailableRegisters() const override { if (DisableUpperSSERegisters) return ArrayRef(kUnavailableRegistersSSE, @@ -942,6 +981,223 @@ return {}; // Not yet implemented. } +#ifdef __linux__ + +void generateSyscall(long SyscallNumber, std::vector &GeneratedCode) { + GeneratedCode.push_back( + loadImmediate(X86::RAX, 64, APInt(64, SyscallNumber))); + GeneratedCode.push_back(MCInstBuilder(X86::SYSCALL)); +} + +void generateRoundToNearestPage(unsigned int Register, + std::vector &GeneratedCode) { + int PageSizeShift = static_cast(round(log2(getpagesize()))); + // Round down to the nearest page by getting rid of the least significant bits + // representing location in the page. Shift right to get rid of this info and + // then shift back left. + GeneratedCode.push_back(MCInstBuilder(X86::SHR64ri) + .addReg(Register) + .addReg(Register) + .addImm(PageSizeShift)); + GeneratedCode.push_back(MCInstBuilder(X86::SHL64ri) + .addReg(Register) + .addReg(Register) + .addImm(PageSizeShift)); +} + +void generateGetInstructionPointer(unsigned int ResultRegister, + std::vector &GeneratedCode) { + // Use a load effective address to get the current instruction pointer and put + // it into the result register. + GeneratedCode.push_back(MCInstBuilder(X86::LEA64r) + .addReg(ResultRegister) + .addReg(X86::RIP) + .addImm(1) + .addReg(0) + .addImm(0) + .addReg(0)); +} + +void ExegesisX86Target::generateLowerMunmap( + std::vector &GeneratedCode) const { + // Unmap starting at address zero + GeneratedCode.push_back(loadImmediate(X86::RDI, 64, APInt(64, 0))); + // Get the current instruction pointer so we know where to unmap up to. + generateGetInstructionPointer(X86::RSI, GeneratedCode); + generateRoundToNearestPage(X86::RSI, GeneratedCode); + // Subtract a page from the end of the unmap so we don't unmap the currently + // executing section. + GeneratedCode.push_back(MCInstBuilder(X86::SUB64ri32) + .addReg(X86::RSI) + .addReg(X86::RSI) + .addImm(getpagesize())); + generateSyscall(SYS_munmap, GeneratedCode); +} + +void ExegesisX86Target::generateUpperMunmap( + std::vector &GeneratedCode) const { + generateGetInstructionPointer(X86::R8, GeneratedCode); + // Load in the size of the snippet to RDI from from the argument register. + GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr) + .addReg(X86::RDI) + .addReg(ArgumentRegisters::CodeSize)); + // Add the length of the snippet (in %RDI) to the current instruction pointer + // (%R8) to get the address where we should start unmapping at. + GeneratedCode.push_back(MCInstBuilder(X86::ADD64rr) + .addReg(X86::RDI) + .addReg(X86::RDI) + .addReg(X86::R8)); + generateRoundToNearestPage(X86::RDI, GeneratedCode); + // Add a one page to the start address to ensure that we're above the snippet + // since the above function rounds down. + GeneratedCode.push_back(MCInstBuilder(X86::ADD64ri32) + .addReg(X86::RDI) + .addReg(X86::RDI) + .addImm(getpagesize())); + // Unmap to just one page under the ceiling of the address space. + GeneratedCode.push_back(loadImmediate( + X86::RSI, 64, APInt(64, VAddressSpaceCeiling - getpagesize()))); + GeneratedCode.push_back(MCInstBuilder(X86::SUB64rr) + .addReg(X86::RSI) + .addReg(X86::RSI) + .addReg(X86::RDI)); + generateSyscall(SYS_munmap, GeneratedCode); +} + +std::vector +ExegesisX86Target::generateExitSyscall(unsigned ExitCode) const { + std::vector ExitCallCode; + ExitCallCode.push_back(loadImmediate(X86::RDI, 64, APInt(64, ExitCode))); + generateSyscall(SYS_exit, ExitCallCode); + return ExitCallCode; +} + +std::vector +ExegesisX86Target::generateMmap(intptr_t Address, size_t Length, + intptr_t FileDescriptorAddress) const { + std::vector MmapCode; + MmapCode.push_back(loadImmediate(X86::RDI, 64, APInt(64, Address))); + MmapCode.push_back(loadImmediate(X86::RSI, 64, APInt(64, Length))); + MmapCode.push_back( + loadImmediate(X86::RDX, 64, APInt(64, PROT_READ | PROT_WRITE))); + MmapCode.push_back( + loadImmediate(X86::R10, 64, APInt(64, MAP_SHARED | MAP_FIXED_NOREPLACE))); + // Copy file descriptor location from aux memory into R8 + MmapCode.push_back( + loadImmediate(X86::R8, 64, APInt(64, FileDescriptorAddress))); + // Dereference file descriptor into FD argument register + MmapCode.push_back(MCInstBuilder(X86::MOV32rm) + .addReg(X86::R8D) + .addReg(X86::R8) + .addImm(1) + .addReg(0) + .addImm(0) + .addReg(0)); + MmapCode.push_back(loadImmediate(X86::R9, 64, APInt(64, 0))); + generateSyscall(SYS_mmap, MmapCode); + return MmapCode; +} + +void ExegesisX86Target::generateMmapAuxMem( + std::vector &GeneratedCode) const { + GeneratedCode.push_back( + loadImmediate(X86::RDI, 64, APInt(64, getAuxiliaryMemoryStartAddress()))); + GeneratedCode.push_back(loadImmediate( + X86::RSI, 64, APInt(64, SubprocessMemory::AuxiliaryMemorySize))); + GeneratedCode.push_back( + loadImmediate(X86::RDX, 64, APInt(64, PROT_READ | PROT_WRITE))); + GeneratedCode.push_back( + loadImmediate(X86::R10, 64, APInt(64, MAP_SHARED | MAP_FIXED_NOREPLACE))); + GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr) + .addReg(X86::R8) + .addReg(ArgumentRegisters::AuxiliaryMemoryFD)); + GeneratedCode.push_back(loadImmediate(X86::R9, 64, APInt(64, 0))); + generateSyscall(SYS_mmap, GeneratedCode); +} + +void ExegesisX86Target::moveArgumentRegisters( + std::vector &GeneratedCode) const { + GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr) + .addReg(ArgumentRegisters::CodeSize) + .addReg(X86::RDI)); + GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr) + .addReg(ArgumentRegisters::AuxiliaryMemoryFD) + .addReg(X86::RSI)); +} + +std::vector ExegesisX86Target::generateMemoryInitialSetup() const { + std::vector MemoryInitialSetupCode; + moveArgumentRegisters(MemoryInitialSetupCode); + generateLowerMunmap(MemoryInitialSetupCode); + generateUpperMunmap(MemoryInitialSetupCode); + generateMmapAuxMem(MemoryInitialSetupCode); + return MemoryInitialSetupCode; +} + +std::vector ExegesisX86Target::setStackRegisterToAuxMem() const { + // Moves %rsp to the end of the auxiliary memory + return {MCInstBuilder(X86::MOV64ri) + .addReg(X86::RSP) + .addImm(getAuxiliaryMemoryStartAddress() + + SubprocessMemory::AuxiliaryMemorySize)}; +} + +intptr_t ExegesisX86Target::getAuxiliaryMemoryStartAddress() const { + // Return the second to last page in the virtual address space to try and + // prevent interference with memory annotations in the snippet + return VAddressSpaceCeiling - 2 * getpagesize(); +} + +void generateRegisterStackPush(unsigned int Register, + std::vector &GeneratedCode) { + GeneratedCode.push_back(MCInstBuilder(X86::PUSH64r).addReg(Register)); +} + +void generateRegisterStackPop(unsigned int Register, + std::vector &GeneratedCode) { + GeneratedCode.push_back(MCInstBuilder(X86::POP64r).addReg(Register)); +} + +std::vector +ExegesisX86Target::configurePerfCounter(long Request, bool SaveRegisters) const { + std::vector ConfigurePerfCounterCode; + if(SaveRegisters) { + // Preservie RAX, RDI, and RSI by pushing them to the stack. + generateRegisterStackPush(X86::RAX, ConfigurePerfCounterCode); + generateRegisterStackPush(X86::RDI, ConfigurePerfCounterCode); + generateRegisterStackPush(X86::RSI, ConfigurePerfCounterCode); + } + ConfigurePerfCounterCode.push_back( + loadImmediate(X86::RDI, 64, APInt(64, getAuxiliaryMemoryStartAddress()))); + ConfigurePerfCounterCode.push_back(MCInstBuilder(X86::MOV32rm) + .addReg(X86::EDI) + .addReg(X86::RDI) + .addImm(1) + .addReg(0) + .addImm(0) + .addReg(0)); + ConfigurePerfCounterCode.push_back( + loadImmediate(X86::RSI, 64, APInt(64, Request))); + generateSyscall(SYS_ioctl, ConfigurePerfCounterCode); + if(SaveRegisters) { + // Restore RAX, RDI, and RSI, in reverse order. + generateRegisterStackPop(X86::RSI, ConfigurePerfCounterCode); + generateRegisterStackPop(X86::RIP, ConfigurePerfCounterCode); + generateRegisterStackPop(X86::RAX, ConfigurePerfCounterCode); + } + return ConfigurePerfCounterCode; +} + +std::vector ExegesisX86Target::getArgumentRegisters() const { + return {X86::RDI, X86::RSI}; +} + +std::vector ExegesisX86Target::getRegistersNeedSaving() const { + return {X86::RAX, X86::RDI, X86::RSI}; +} + +#endif // __linux__ + // Instruction can have some variable operands, and we may want to see how // different operands affect performance. So for each operand position, // precompute all the possible choices we might care about, diff --git a/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp --- a/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp @@ -12,6 +12,7 @@ #include #include "MCTargetDesc/X86MCTargetDesc.h" +#include "SubprocessMemory.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" #include "gmock/gmock.h" @@ -19,6 +20,11 @@ #include "llvm/MC/MCInstPrinter.h" +#ifdef __linux__ +#include +#include +#endif // __linux__ + namespace llvm { bool operator==(const MCOperand &a, const MCOperand &b) { @@ -79,6 +85,10 @@ return AllOf(OpcodeIs(Opcode), ElementsAre(IsReg(Reg), IsImm(Value))); } +Matcher IsMovRegToReg(unsigned Opcode, int64_t Reg1, int64_t Reg2) { + return AllOf(OpcodeIs(Opcode), ElementsAre(IsReg(Reg1), IsReg(Reg2))); +} + Matcher IsMovValueToStack(unsigned Opcode, int64_t Value, size_t Offset) { return AllOf(OpcodeIs(Opcode), @@ -576,6 +586,83 @@ State.getExegesisTarget().allowAsBackToBack(getInstr(X86::LEA64r))); } +#ifdef __linux__ +TEST_F(X86Core2TargetTest, GenerateLowerMunmapTest) { + std::vector GeneratedCode; + State.getExegesisTarget().generateLowerMunmap(GeneratedCode); + EXPECT_THAT(GeneratedCode, + ElementsAre(IsMovImmediate(X86::MOV64ri, X86::RDI, 0), + OpcodeIs(X86::LEA64r), OpcodeIs(X86::SHR64ri), + OpcodeIs(X86::SHL64ri), OpcodeIs(X86::SUB64ri32), + IsMovImmediate(X86::MOV64ri, X86::RAX, SYS_munmap), + OpcodeIs(X86::SYSCALL))); +} + +TEST_F(X86Core2TargetTest, GenerateUpperMunmapTest) { + std::vector GeneratedCode; + State.getExegesisTarget().generateUpperMunmap(GeneratedCode); + EXPECT_THAT( + GeneratedCode, + ElementsAreArray({OpcodeIs(X86::LEA64r), OpcodeIs(X86::MOV64rr), + OpcodeIs(X86::ADD64rr), OpcodeIs(X86::SHR64ri), + OpcodeIs(X86::SHL64ri), OpcodeIs(X86::ADD64ri32), + IsMovImmediate(X86::MOV64ri, X86::RSI, + 0x0000800000000000 - getpagesize()), + OpcodeIs(X86::SUB64rr), + IsMovImmediate(X86::MOV64ri, X86::RAX, SYS_munmap), + OpcodeIs(X86::SYSCALL)})); +} + +TEST_F(X86Core2TargetTest, GenerateExitSyscallTest) { + EXPECT_THAT(State.getExegesisTarget().generateExitSyscall(127), + ElementsAre(IsMovImmediate(X86::MOV64ri, X86::RDI, 127), + IsMovImmediate(X86::MOV64ri, X86::RAX, SYS_exit), + OpcodeIs(X86::SYSCALL))); +} + +TEST_F(X86Core2TargetTest, GenerateMmapTest) { + EXPECT_THAT(State.getExegesisTarget().generateMmap(0x1000, 4096, 0x2000), + ElementsAre(IsMovImmediate(X86::MOV64ri, X86::RDI, 0x1000), + IsMovImmediate(X86::MOV64ri, X86::RSI, 4096), + IsMovImmediate(X86::MOV64ri, X86::RDX, + PROT_READ | PROT_WRITE), + IsMovImmediate(X86::MOV64ri, X86::R10, + MAP_SHARED | MAP_FIXED_NOREPLACE), + IsMovImmediate(X86::MOV64ri, X86::R8, 0x2000), + OpcodeIs(X86::MOV32rm), + IsMovImmediate(X86::MOV64ri, X86::R9, 0), + IsMovImmediate(X86::MOV64ri, X86::RAX, SYS_mmap), + OpcodeIs(X86::SYSCALL))); +} + +TEST_F(X86Core2TargetTest, GenerateMmapAuxMemTest) { + std::vector GeneratedCode; + State.getExegesisTarget().generateMmapAuxMem(GeneratedCode); + EXPECT_THAT( + GeneratedCode, + ElementsAre( + IsMovImmediate( + X86::MOV64ri, X86::RDI, + State.getExegesisTarget().getAuxiliaryMemoryStartAddress()), + IsMovImmediate(X86::MOV64ri, X86::RSI, + SubprocessMemory::AuxiliaryMemorySize), + IsMovImmediate(X86::MOV64ri, X86::RDX, PROT_READ | PROT_WRITE), + IsMovImmediate(X86::MOV64ri, X86::R10, + MAP_SHARED | MAP_FIXED_NOREPLACE), + OpcodeIs(X86::MOV64rr), IsMovImmediate(X86::MOV64ri, X86::R9, 0), + IsMovImmediate(X86::MOV64ri, X86::RAX, SYS_mmap), + OpcodeIs(X86::SYSCALL))); +} + +TEST_F(X86Core2TargetTest, MoveArgumentRegistersTest) { + std::vector GeneratedCode; + State.getExegesisTarget().moveArgumentRegisters(GeneratedCode); + EXPECT_THAT(GeneratedCode, + ElementsAre(IsMovRegToReg(X86::MOV64rr, X86::R12, X86::RDI), + IsMovRegToReg(X86::MOV64rr, X86::R13, X86::RSI))); +} +#endif // __linux__ + } // namespace } // namespace exegesis } // namespace llvm