diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h --- a/llvm/tools/llvm-exegesis/lib/Target.h +++ b/llvm/tools/llvm-exegesis/lib/Target.h @@ -86,6 +86,77 @@ virtual std::vector setRegTo(const MCSubtargetInfo &STI, unsigned Reg, const APInt &Value) const = 0; + // Generates the code for the lower munmap call + virtual void generateLowerMunmap(std::vector &GeneratedCode) const { + report_fatal_error( + "generateLowerMunmap is not implemented on the current architecture"); + } + + // Generates the upper munmap call + virtual void generateUpperMunmap(std::vector &GeneratedCode) const { + report_fatal_error( + "generateUpperMunmap is not implemented on the current architecture"); + } + + // Generates the code for an exit syscall + virtual std::vector generateExitSyscall(unsigned ExitCode) const { + report_fatal_error( + "generateExitSyscall is not implemented on the current architecture"); + } + + // Generates the code to mmap a region of code + virtual std::vector + generateMmap(intptr_t Address, size_t Length, + intptr_t FileDescriptorAddress) const { + report_fatal_error( + "generateMmap is not implemented on the current architecture"); + } + + // Generates the mmap code for the aux memory + virtual void generateMmapAuxMem(std::vector &GeneratedCode) const { + report_fatal_error( + "generateMmapAuxMem is not implemented on the current architecture\n"); + } + + // Moves argument registers into other registers that won't get clobbered + // while making syscalls + virtual void moveArgumentRegisters(std::vector &GeneratedCode) const { + report_fatal_error("moveArgumentRegisters is not implemented on the " + "current architecture\n"); + } + + // Generates code to move argument registers, unmap memory above and below the + // snippet, and map the auxiliary memory into the subprocess. + virtual std::vector generateMemoryInitialSetup() const { + report_fatal_error("generateMemoryInitialSetup is not supported on the " + "current architecture\n"); + } + + // Sets the stack register to the auxiliary memory so that operations + // requiring the stack can be formed (e.g., setting large registers). + virtual std::vector setStackRegisterToAuxMem() const { + report_fatal_error("setStackRegisterToAuxMem is not implemented on the " + "current architectures"); + } + + virtual intptr_t getAuxiliaryMemoryStartAddress() const { + report_fatal_error("getAuxiliaryMemoryStartAddress is not implemented on " + "the current architecture"); + } + + // Generates the necessary ioctl system calls to configure the perf counters + virtual std::vector configurePerfCounter(long Request) const { + report_fatal_error( + "configurePerfCounter is not implemented on the current architecture"); + } + + // Gets the ABI dependent registers that are used to pass arguments in a + // function call + virtual std::vector getArgumentRegisters() const { + report_fatal_error( + "getArgumentRegisters is not implemented on the current architecture"); + }; + // Returns the register pointing to scratch memory, or 0 if this target // does not support memory operands. The benchmark function uses the // default calling convention. diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -11,6 +11,7 @@ #include "../ParallelSnippetGenerator.h" #include "../SerialSnippetGenerator.h" #include "../SnippetGenerator.h" +#include "../SubprocessMemory.h" #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "X86.h" @@ -36,6 +37,12 @@ #include // For _clearfp in ~X86SavedState(). #endif +#ifdef __linux__ +#include +#include +#include +#endif + namespace llvm { namespace exegesis { @@ -686,6 +693,8 @@ return ExegesisTarget::createCounter(CounterName, State, ProcessID); } + enum ArgumentRegisters { CodeSize = X86::R12, AuxiliaryMemoryFD = X86::R13 }; + private: void addTargetSpecificPasses(PassManagerBase &PM) const override; @@ -709,6 +718,32 @@ std::vector setRegTo(const MCSubtargetInfo &STI, unsigned Reg, const APInt &Value) const override; +#ifdef __linux__ + void generateLowerMunmap(std::vector &GeneratedCode) const override; + + void generateUpperMunmap(std::vector &GeneratedCode) const override; + + std::vector generateExitSyscall(unsigned ExitCode) const override; + + std::vector + generateMmap(intptr_t Address, size_t Length, + intptr_t FileDescriptorAddress) const override; + + void generateMmapAuxMem(std::vector &GeneratedCode) const override; + + void moveArgumentRegisters(std::vector &GeneratedCode) const override; + + std::vector generateMemoryInitialSetup() const override; + + std::vector setStackRegisterToAuxMem() const override; + + intptr_t getAuxiliaryMemoryStartAddress() const override; + + std::vector configurePerfCounter(long Request) const override; + + std::vector getArgumentRegisters() const override; +#endif // __linux__ + ArrayRef getUnavailableRegisters() const override { if (DisableUpperSSERegisters) return ArrayRef(kUnavailableRegistersSSE, @@ -942,6 +977,182 @@ return {}; // Not yet implemented. } +#ifdef __linux__ +void ExegesisX86Target::generateLowerMunmap( + std::vector &GeneratedCode) const { + GeneratedCode.push_back(loadImmediate(X86::RDI, 64, APInt(64, 0))); + // Get the current function pointer so we know where to unmap up to. + GeneratedCode.push_back(MCInstBuilder(X86::LEA64r) + .addReg(X86::RSI) + .addReg(X86::RIP) + .addImm(1) + .addReg(0) + .addImm(0) + .addReg(0)); + // The below two instructions round to the nearest 4096 byte page. + GeneratedCode.push_back( + MCInstBuilder(X86::SHR64ri).addReg(X86::RSI).addReg(X86::RSI).addImm(12)); + GeneratedCode.push_back( + MCInstBuilder(X86::SHL64ri).addReg(X86::RSI).addReg(X86::RSI).addImm(12)); + // Subtract a page from the end of the unmap so we don't unmap the currently + // executing section. + GeneratedCode.push_back(MCInstBuilder(X86::SUB64ri32) + .addReg(X86::RSI) + .addReg(X86::RSI) + .addImm(getpagesize())); + GeneratedCode.push_back(loadImmediate(X86::RAX, 64, APInt(64, SYS_munmap))); + GeneratedCode.push_back(MCInstBuilder(X86::SYSCALL)); +} + +void ExegesisX86Target::generateUpperMunmap( + std::vector &GeneratedCode) const { + GeneratedCode.push_back(MCInstBuilder(X86::LEA64r) + .addReg(X86::R8) + .addReg(X86::RIP) + .addImm(1) + .addReg(0) + .addImm(0) + .addReg(0)); + // Load in RDI from from the argument registers. + GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr) + .addReg(X86::RDI) + .addReg(ArgumentRegisters::AuxiliaryMemoryFD)); + GeneratedCode.push_back(MCInstBuilder(X86::ADD64rr) + .addReg(X86::RDI) + .addReg(X86::RDI) + .addReg(X86::R8)); + GeneratedCode.push_back( + MCInstBuilder(X86::SHR64ri).addReg(X86::RDI).addReg(X86::RDI).addImm(12)); + GeneratedCode.push_back( + MCInstBuilder(X86::SHL64ri).addReg(X86::RDI).addReg(X86::RDI).addImm(12)); + GeneratedCode.push_back(MCInstBuilder(X86::ADD64ri32) + .addReg(X86::RDI) + .addReg(X86::RDI) + .addImm(getpagesize())); + // Unmap to just one page under the ceiling of the address space. + GeneratedCode.push_back(loadImmediate( + X86::RSI, 64, APInt(64, 0x0000800000000000 - getpagesize()))); + GeneratedCode.push_back(MCInstBuilder(X86::SUB64rr) + .addReg(X86::RSI) + .addReg(X86::RSI) + .addReg(X86::RDI)); + GeneratedCode.push_back(loadImmediate(X86::RAX, 64, APInt(64, SYS_munmap))); + GeneratedCode.push_back(MCInstBuilder(X86::SYSCALL)); +} + +std::vector +ExegesisX86Target::generateExitSyscall(unsigned ExitCode) const { + std::vector ExitCallCode; + ExitCallCode.push_back(loadImmediate(X86::RDI, 64, APInt(64, ExitCode))); + ExitCallCode.push_back(loadImmediate(X86::RAX, 64, APInt(64, SYS_exit))); + ExitCallCode.push_back(MCInstBuilder(X86::SYSCALL)); + return ExitCallCode; +} + +std::vector +ExegesisX86Target::generateMmap(intptr_t Address, size_t Length, + intptr_t FileDescriptorAddress) const { + std::vector MmapCode; + MmapCode.push_back(loadImmediate(X86::RAX, 64, APInt(64, SYS_mmap))); + MmapCode.push_back(loadImmediate(X86::RDI, 64, APInt(64, Address))); + MmapCode.push_back(loadImmediate(X86::RSI, 64, APInt(64, Length))); + MmapCode.push_back( + loadImmediate(X86::RDX, 64, APInt(64, PROT_READ | PROT_WRITE))); + MmapCode.push_back( + loadImmediate(X86::R10, 64, APInt(64, MAP_SHARED | MAP_FIXED_NOREPLACE))); + // Copy file descriptor location from aux memory into R8 + MmapCode.push_back( + loadImmediate(X86::R8, 64, APInt(64, FileDescriptorAddress))); + // Dereference file descriptor into FD argument register + MmapCode.push_back(MCInstBuilder(X86::MOV32rm) + .addReg(X86::R8D) + .addReg(X86::R8) + .addImm(1) + .addReg(0) + .addImm(0) + .addReg(0)); + MmapCode.push_back(loadImmediate(X86::R9, 64, APInt(64, 0))); + MmapCode.push_back(MCInstBuilder(X86::SYSCALL)); + return MmapCode; +} + +void ExegesisX86Target::generateMmapAuxMem( + std::vector &GeneratedCode) const { + GeneratedCode.push_back(loadImmediate(X86::RAX, 64, APInt(64, SYS_mmap))); + GeneratedCode.push_back( + loadImmediate(X86::RDI, 64, APInt(64, getAuxiliaryMemoryStartAddress()))); + GeneratedCode.push_back( + loadImmediate(X86::RSI, 64, APInt(64, AuxiliaryMemorySize))); + GeneratedCode.push_back( + loadImmediate(X86::RDX, 64, APInt(64, PROT_READ | PROT_WRITE))); + GeneratedCode.push_back( + loadImmediate(X86::R10, 64, APInt(64, MAP_SHARED | MAP_FIXED_NOREPLACE))); + GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr) + .addReg(X86::R8) + .addReg(ArgumentRegisters::CodeSize)); + GeneratedCode.push_back(loadImmediate(X86::R9, 64, APInt(64, 0))); + GeneratedCode.push_back(MCInstBuilder(X86::SYSCALL)); +} + +void ExegesisX86Target::moveArgumentRegisters( + std::vector &GeneratedCode) const { + GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr) + .addReg(ArgumentRegisters::CodeSize) + .addReg(X86::RSI)); + GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr) + .addReg(ArgumentRegisters::AuxiliaryMemoryFD) + .addReg(X86::RDI)); +} + +std::vector ExegesisX86Target::generateMemoryInitialSetup() const { + std::vector MemoryInitialSetupCode; + moveArgumentRegisters(MemoryInitialSetupCode); + generateLowerMunmap(MemoryInitialSetupCode); + generateUpperMunmap(MemoryInitialSetupCode); + generateMmapAuxMem(MemoryInitialSetupCode); + return MemoryInitialSetupCode; +} + +std::vector ExegesisX86Target::setStackRegisterToAuxMem() const { + // Moves %rsp to the end of the auxiliary memory + return {MCInstBuilder(X86::MOV64ri) + .addReg(X86::RSP) + .addImm(getAuxiliaryMemoryStartAddress() + AuxiliaryMemorySize)}; +} + +intptr_t ExegesisX86Target::getAuxiliaryMemoryStartAddress() const { + // Return the second to last page in the virtual address space to try and + // prevent interference with memory annotations in the snippet + return 0x00007fffffffe000; +} + +std::vector +ExegesisX86Target::configurePerfCounter(long Request) const { + // TOOD(boomanaiden154): This currently will clobber RAX, RDI, and RSI. + // Fix this, probably by pushing data to the stack + std::vector ConfigurePerfCounterCode; + ConfigurePerfCounterCode.push_back( + loadImmediate(X86::RAX, 64, APInt(64, SYS_ioctl))); + ConfigurePerfCounterCode.push_back( + loadImmediate(X86::RDI, 64, APInt(64, getAuxiliaryMemoryStartAddress()))); + ConfigurePerfCounterCode.push_back(MCInstBuilder(X86::MOV32rm) + .addReg(X86::EDI) + .addReg(X86::RDI) + .addImm(1) + .addReg(0) + .addImm(0) + .addReg(0)); + ConfigurePerfCounterCode.push_back( + loadImmediate(X86::RSI, 64, APInt(64, Request))); + ConfigurePerfCounterCode.push_back(MCInstBuilder(X86::SYSCALL)); + return ConfigurePerfCounterCode; +} + +std::vector ExegesisX86Target::getArgumentRegisters() const { + return {X86::RDI, X86::RSI}; +} +#endif // __linux__ + // Instruction can have some variable operands, and we may want to see how // different operands affect performance. So for each operand position, // precompute all the possible choices we might care about, diff --git a/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp --- a/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp @@ -12,6 +12,7 @@ #include #include "MCTargetDesc/X86MCTargetDesc.h" +#include "SubprocessMemory.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" #include "gmock/gmock.h" @@ -19,6 +20,11 @@ #include "llvm/MC/MCInstPrinter.h" +#ifdef __linux__ +#include +#include +#endif // __linux__ + namespace llvm { bool operator==(const MCOperand &a, const MCOperand &b) { @@ -79,6 +85,10 @@ return AllOf(OpcodeIs(Opcode), ElementsAre(IsReg(Reg), IsImm(Value))); } +Matcher IsMovRegToReg(unsigned Opcode, int64_t Reg1, int64_t Reg2) { + return AllOf(OpcodeIs(Opcode), ElementsAre(IsReg(Reg1), IsReg(Reg2))); +} + Matcher IsMovValueToStack(unsigned Opcode, int64_t Value, size_t Offset) { return AllOf(OpcodeIs(Opcode), @@ -576,6 +586,81 @@ State.getExegesisTarget().allowAsBackToBack(getInstr(X86::LEA64r))); } +#ifdef __linux__ +TEST_F(X86Core2TargetTest, GenerateLowerMunmapTest) { + std::vector GeneratedCode; + State.getExegesisTarget().generateLowerMunmap(GeneratedCode); + EXPECT_THAT(GeneratedCode, + ElementsAre(IsMovImmediate(X86::MOV64ri, X86::RDI, 0), + OpcodeIs(X86::LEA64r), OpcodeIs(X86::SHR64ri), + OpcodeIs(X86::SHL64ri), OpcodeIs(X86::SUB64ri32), + IsMovImmediate(X86::MOV64ri, X86::RAX, SYS_munmap), + OpcodeIs(X86::SYSCALL))); +} + +TEST_F(X86Core2TargetTest, GenerateUpperMunmapTest) { + std::vector GeneratedCode; + State.getExegesisTarget().generateUpperMunmap(GeneratedCode); + EXPECT_THAT( + GeneratedCode, + ElementsAreArray({OpcodeIs(X86::LEA64r), OpcodeIs(X86::MOV64rr), + OpcodeIs(X86::ADD64rr), OpcodeIs(X86::SHR64ri), + OpcodeIs(X86::SHL64ri), OpcodeIs(X86::ADD64ri32), + IsMovImmediate(X86::MOV64ri, X86::RSI, + 0x0000800000000000 - getpagesize()), + OpcodeIs(X86::SUB64rr), + IsMovImmediate(X86::MOV64ri, X86::RAX, SYS_munmap), + OpcodeIs(X86::SYSCALL)})); +} + +TEST_F(X86Core2TargetTest, GenerateExitSyscallTest) { + EXPECT_THAT(State.getExegesisTarget().generateExitSyscall(127), + ElementsAre(IsMovImmediate(X86::MOV64ri, X86::RDI, 127), + IsMovImmediate(X86::MOV64ri, X86::RAX, SYS_exit), + OpcodeIs(X86::SYSCALL))); +} + +TEST_F(X86Core2TargetTest, GenerateMmapTest) { + EXPECT_THAT( + State.getExegesisTarget().generateMmap(0x1000, 4096, 0x2000), + ElementsAre( + IsMovImmediate(X86::MOV64ri, X86::RAX, SYS_mmap), + IsMovImmediate(X86::MOV64ri, X86::RDI, 0x1000), + IsMovImmediate(X86::MOV64ri, X86::RSI, 4096), + IsMovImmediate(X86::MOV64ri, X86::RDX, PROT_READ | PROT_WRITE), + IsMovImmediate(X86::MOV64ri, X86::R10, + MAP_SHARED | MAP_FIXED_NOREPLACE), + IsMovImmediate(X86::MOV64ri, X86::R8, 0x2000), OpcodeIs(X86::MOV32rm), + IsMovImmediate(X86::MOV64ri, X86::R9, 0), OpcodeIs(X86::SYSCALL))); +} + +TEST_F(X86Core2TargetTest, GenerateMmapAuxMemTest) { + std::vector GeneratedCode; + State.getExegesisTarget().generateMmapAuxMem(GeneratedCode); + EXPECT_THAT( + GeneratedCode, + ElementsAre( + IsMovImmediate(X86::MOV64ri, X86::RAX, SYS_mmap), + IsMovImmediate( + X86::MOV64ri, X86::RDI, + State.getExegesisTarget().getAuxiliaryMemoryStartAddress()), + IsMovImmediate(X86::MOV64ri, X86::RSI, AuxiliaryMemorySize), + IsMovImmediate(X86::MOV64ri, X86::RDX, PROT_READ | PROT_WRITE), + IsMovImmediate(X86::MOV64ri, X86::R10, + MAP_SHARED | MAP_FIXED_NOREPLACE), + OpcodeIs(X86::MOV64rr), IsMovImmediate(X86::MOV64ri, X86::R9, 0), + OpcodeIs(X86::SYSCALL))); +} + +TEST_F(X86Core2TargetTest, MoveArgumentRegistersTest) { + std::vector GeneratedCode; + State.getExegesisTarget().moveArgumentRegisters(GeneratedCode); + EXPECT_THAT(GeneratedCode, + ElementsAre(IsMovRegToReg(X86::MOV64rr, X86::R12, X86::RSI), + IsMovRegToReg(X86::MOV64rr, X86::R13, X86::RDI))); +} +#endif // __linux__ + } // namespace } // namespace exegesis } // namespace llvm