diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h --- a/llvm/tools/llvm-exegesis/lib/Target.h +++ b/llvm/tools/llvm-exegesis/lib/Target.h @@ -85,6 +85,66 @@ // Precondition: Value must fit into Reg. virtual std::vector setRegTo(const MCSubtargetInfo &STI, unsigned Reg, const APInt &Value) const = 0; + + // Generates the code for the lower munmap call + virtual std::vector generateLowerMunmap() const { + report_fatal_error("generateLowerMunmap is not implemented on the current architecture"); + } + + // Generates the upper munmap call + virtual std::vector generateUpperMunmap() const { + report_fatal_error("generateUpperMunmap is not implemented on the current architecture"); + } + + // Generates the code for an exit syscall + virtual std::vector generateExitSyscall(unsigned ExitCode) const { + report_fatal_error("generateExitSyscall is not implemented on the current architecture"); + } + + // Generates the code to mmap a region of code + virtual std::vector generateMmap(intptr_t Address, size_t Length, + intptr_t FileDescriptorAddress) const { + report_fatal_error( + "generateMmap is not implemented on the current architecture"); + } + + // Generates the mmap code for the aux memory + virtual std::vector generateMmapAuxMem() const { + report_fatal_error( + "generateMmapAuxMem is not implemented on the current architecture\n"); + } + + // Moves argument registers into other registers that won't get clobbered + // while making syscalls + virtual std::vector moveArgumentRegisters() const { + report_fatal_error("moveArgumentRegisters is not implemented on the " + "current architecture\n"); + } + + // Sets the stack register to the auxiliary memory so that operations + // requiring the stack can be formed (e.g., setting large registers). + virtual std::vector setStackRegisterToAuxMem() const { + report_fatal_error("setStackRegisterToAuxMem is not implemented on the " + "current architectures"); + } + + virtual intptr_t getAuxiliaryMemoryStartAddress() const { + report_fatal_error("getAuxiliaryMemoryStartAddress is not implemented on " + "the current architecture"); + } + + // Generates the necessary ioctl system calls to configure the perf counters + virtual std::vector configurePerfCounter(long Request) const { + report_fatal_error( + "configurePerfCounter is not implemented on the current architecture"); + } + + // Gets the ABI dependent registers that are used to pass arguments in a + // function call + virtual std::vector getArgumentRegisters() const { + report_fatal_error( + "getArgumentRegisters is not implemented on the current architecture"); + }; // Returns the register pointing to scratch memory, or 0 if this target // does not support memory operands. The benchmark function uses the diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -11,6 +11,7 @@ #include "../ParallelSnippetGenerator.h" #include "../SerialSnippetGenerator.h" #include "../SnippetGenerator.h" +#include "../SubprocessMemory.h" #include "MCTargetDesc/X86BaseInfo.h" #include "MCTargetDesc/X86MCTargetDesc.h" #include "X86.h" @@ -36,6 +37,12 @@ #include // For _clearfp in ~X86SavedState(). #endif +#ifdef __linux__ +#include +#include +#include +#endif + namespace llvm { namespace exegesis { @@ -686,6 +693,8 @@ return ExegesisTarget::createCounter(CounterName, State, ProcessPID); } + enum ArgumentRegisters { CodeSize = X86::R12, AuxiliaryMemoryFD = X86::R13 }; + private: void addTargetSpecificPasses(PassManagerBase &PM) const override; @@ -709,6 +718,30 @@ std::vector setRegTo(const MCSubtargetInfo &STI, unsigned Reg, const APInt &Value) const override; +#ifdef __linux__ + std::vector generateLowerMunmap() const override; + + std::vector generateUpperMunmap() const override; + + std::vector generateExitSyscall(unsigned ExitCode) const override; + + std::vector + generateMmap(intptr_t Address, size_t Length, + intptr_t FileDescriptorAddress) const override; + + std::vector generateMmapAuxMem() const override; + + std::vector moveArgumentRegisters() const override; + + std::vector setStackRegisterToAuxMem() const override; + + intptr_t getAuxiliaryMemoryStartAddress() const override; + + std::vector configurePerfCounter(long Request) const override; + + std::vector getArgumentRegisters() const override; +#endif // __linux__ + ArrayRef getUnavailableRegisters() const override { if (DisableUpperSSERegisters) return ArrayRef(kUnavailableRegistersSSE, @@ -942,6 +975,178 @@ return {}; // Not yet implemented. } +#ifdef __linux__ +std::vector ExegesisX86Target::generateLowerMunmap() const { + std::vector LowerMunmapCode; + LowerMunmapCode.push_back(loadImmediate(X86::RDI, 64, APInt(64, 0))); + // Get the current function pointer so we know where to unmap up to. + LowerMunmapCode.push_back(MCInstBuilder(X86::LEA64r) + .addReg(X86::RSI) + .addReg(X86::RIP) + .addImm(1) + .addReg(0) + .addImm(0) + .addReg(0)); + // The below two instructions round to the nearest 4096 byte page. + LowerMunmapCode.push_back( + MCInstBuilder(X86::SHR64ri).addReg(X86::RSI).addReg(X86::RSI).addImm(12)); + LowerMunmapCode.push_back( + MCInstBuilder(X86::SHL64ri).addReg(X86::RSI).addReg(X86::RSI).addImm(12)); + // Subtract a page from the end of the unmap so we don't unmap the currently + // executing section. + LowerMunmapCode.push_back(MCInstBuilder(X86::SUB64ri32) + .addReg(X86::RSI) + .addReg(X86::RSI) + .addImm(getpagesize())); + LowerMunmapCode.push_back(loadImmediate(X86::RAX, 64, APInt(64, SYS_munmap))); + LowerMunmapCode.push_back(MCInstBuilder(X86::SYSCALL)); + return LowerMunmapCode; +} + +std::vector ExegesisX86Target::generateUpperMunmap() const { + std::vector UpperMunmapCode; + UpperMunmapCode.push_back(MCInstBuilder(X86::LEA64r) + .addReg(X86::R8) + .addReg(X86::RIP) + .addImm(1) + .addReg(0) + .addImm(0) + .addReg(0)); + // Load in RDI from from the argument registers. + UpperMunmapCode.push_back(MCInstBuilder(X86::MOV64rr) + .addReg(X86::RDI) + .addReg(ArgumentRegisters::AuxiliaryMemoryFD)); + UpperMunmapCode.push_back(MCInstBuilder(X86::ADD64rr) + .addReg(X86::RDI) + .addReg(X86::RDI) + .addReg(X86::R8)); + UpperMunmapCode.push_back( + MCInstBuilder(X86::SHR64ri).addReg(X86::RDI).addReg(X86::RDI).addImm(12)); + UpperMunmapCode.push_back( + MCInstBuilder(X86::SHL64ri).addReg(X86::RDI).addReg(X86::RDI).addImm(12)); + UpperMunmapCode.push_back(MCInstBuilder(X86::ADD64ri32) + .addReg(X86::RDI) + .addReg(X86::RDI) + .addImm(getpagesize())); + // Unmap to just one page under the ceiling of the address space. + UpperMunmapCode.push_back(loadImmediate( + X86::RSI, 64, APInt(64, 0x0000800000000000 - getpagesize()))); + UpperMunmapCode.push_back(MCInstBuilder(X86::SUB64rr) + .addReg(X86::RSI) + .addReg(X86::RSI) + .addReg(X86::RDI)); + UpperMunmapCode.push_back(loadImmediate(X86::RAX, 64, APInt(64, SYS_munmap))); + UpperMunmapCode.push_back(MCInstBuilder(X86::SYSCALL)); + return UpperMunmapCode; +} + +std::vector +ExegesisX86Target::generateExitSyscall(unsigned ExitCode) const { + std::vector ExitCallCode; + ExitCallCode.push_back(loadImmediate(X86::RDI, 64, APInt(64, ExitCode))); + ExitCallCode.push_back(loadImmediate(X86::RAX, 64, APInt(64, SYS_exit))); + ExitCallCode.push_back(MCInstBuilder(X86::SYSCALL)); + return ExitCallCode; +} + +std::vector +ExegesisX86Target::generateMmap(intptr_t Address, size_t Length, + intptr_t FileDescriptorAddress) const { + std::vector MmapCode; + MmapCode.push_back(loadImmediate(X86::RAX, 64, APInt(64, SYS_mmap))); + MmapCode.push_back(loadImmediate(X86::RDI, 64, APInt(64, Address))); + MmapCode.push_back(loadImmediate(X86::RSI, 64, APInt(64, Length))); + MmapCode.push_back( + loadImmediate(X86::RDX, 64, APInt(64, PROT_READ | PROT_WRITE))); + MmapCode.push_back( + loadImmediate(X86::R10, 64, APInt(64, MAP_SHARED | MAP_FIXED_NOREPLACE))); + // Copy file descriptor location from aux memory into R8 + MmapCode.push_back( + loadImmediate(X86::R8, 64, APInt(64, FileDescriptorAddress))); + // Dereference file descriptor into FD argument register + MmapCode.push_back(MCInstBuilder(X86::MOV32rm) + .addReg(X86::R8D) + .addReg(X86::R8) + .addImm(1) + .addReg(0) + .addImm(0) + .addReg(0)); + MmapCode.push_back(loadImmediate(X86::R9, 64, APInt(64, 0))); + MmapCode.push_back(MCInstBuilder(X86::SYSCALL)); + return MmapCode; +} + +std::vector ExegesisX86Target::generateMmapAuxMem() const { + std::vector MmapAuxMemCode; + MmapAuxMemCode.push_back(loadImmediate(X86::RAX, 64, APInt(64, SYS_mmap))); + MmapAuxMemCode.push_back( + loadImmediate(X86::RDI, 64, APInt(64, getAuxiliaryMemoryStartAddress()))); + MmapAuxMemCode.push_back( + loadImmediate(X86::RSI, 64, APInt(64, AuxiliaryMemorySize))); + MmapAuxMemCode.push_back( + loadImmediate(X86::RDX, 64, APInt(64, PROT_READ | PROT_WRITE))); + MmapAuxMemCode.push_back( + loadImmediate(X86::R10, 64, APInt(64, MAP_SHARED | MAP_FIXED_NOREPLACE))); + MmapAuxMemCode.push_back(MCInstBuilder(X86::MOV64rr) + .addReg(X86::R8) + .addReg(ArgumentRegisters::CodeSize)); + MmapAuxMemCode.push_back(loadImmediate(X86::R9, 64, APInt(64, 0))); + MmapAuxMemCode.push_back(MCInstBuilder(X86::SYSCALL)); + return MmapAuxMemCode; +} + +std::vector ExegesisX86Target::moveArgumentRegisters() const { + std::vector MoveArgumentRegistersCode; + MoveArgumentRegistersCode.push_back(MCInstBuilder(X86::MOV64rr) + .addReg(ArgumentRegisters::CodeSize) + .addReg(X86::RSI)); + MoveArgumentRegistersCode.push_back( + MCInstBuilder(X86::MOV64rr) + .addReg(ArgumentRegisters::AuxiliaryMemoryFD) + .addReg(X86::RDI)); + return MoveArgumentRegistersCode; +} + +std::vector ExegesisX86Target::setStackRegisterToAuxMem() const { + // Moves %rsp to the end of the auxiliary memory + return {MCInstBuilder(X86::MOV64ri) + .addReg(X86::RSP) + .addImm(getAuxiliaryMemoryStartAddress() + AuxiliaryMemorySize)}; +} + +intptr_t ExegesisX86Target::getAuxiliaryMemoryStartAddress() const { + // Return the second to last page in the virtual address space to try and + // prevent interference with memory annotations in the snippet + return 0x00007fffffffe000; +} + +std::vector +ExegesisX86Target::configurePerfCounter(long Request) const { + // TOOD(boomanaiden154): This currently will clobber RAX, RDI, and RSI. + // Fix this, probably by pushing data to the stack + std::vector ConfigurePerfCounterCode; + ConfigurePerfCounterCode.push_back( + loadImmediate(X86::RAX, 64, APInt(64, SYS_ioctl))); + ConfigurePerfCounterCode.push_back( + loadImmediate(X86::RDI, 64, APInt(64, getAuxiliaryMemoryStartAddress()))); + ConfigurePerfCounterCode.push_back(MCInstBuilder(X86::MOV32rm) + .addReg(X86::EDI) + .addReg(X86::RDI) + .addImm(1) + .addReg(0) + .addImm(0) + .addReg(0)); + ConfigurePerfCounterCode.push_back( + loadImmediate(X86::RSI, 64, APInt(64, Request))); + ConfigurePerfCounterCode.push_back(MCInstBuilder(X86::SYSCALL)); + return ConfigurePerfCounterCode; +} + +std::vector ExegesisX86Target::getArgumentRegisters() const { + return {X86::RDI, X86::RSI}; +} +#endif // __linux__ + // Instruction can have some variable operands, and we may want to see how // different operands affect performance. So for each operand position, // precompute all the possible choices we might care about, diff --git a/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp --- a/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp @@ -12,6 +12,7 @@ #include #include "MCTargetDesc/X86MCTargetDesc.h" +#include "SubprocessMemory.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" #include "gmock/gmock.h" @@ -19,6 +20,11 @@ #include "llvm/MC/MCInstPrinter.h" +#ifdef __linux__ +#include +#include +#endif // __linux__ + namespace llvm { bool operator==(const MCOperand &a, const MCOperand &b) { @@ -79,6 +85,10 @@ return AllOf(OpcodeIs(Opcode), ElementsAre(IsReg(Reg), IsImm(Value))); } +Matcher IsMovRegToReg(unsigned Opcode, int64_t Reg1, int64_t Reg2) { + return AllOf(OpcodeIs(Opcode), ElementsAre(IsReg(Reg1), IsReg(Reg2))); +} + Matcher IsMovValueToStack(unsigned Opcode, int64_t Value, size_t Offset) { return AllOf(OpcodeIs(Opcode), @@ -576,6 +586,73 @@ State.getExegesisTarget().allowAsBackToBack(getInstr(X86::LEA64r))); } +#ifdef __linux__ +TEST_F(X86Core2TargetTest, GenerateLowerMunmapTest) { + EXPECT_THAT(State.getExegesisTarget().generateLowerMunmap(), + ElementsAre(IsMovImmediate(X86::MOV64ri, X86::RDI, 0), + OpcodeIs(X86::LEA64r), OpcodeIs(X86::SHR64ri), + OpcodeIs(X86::SHL64ri), OpcodeIs(X86::SUB64ri32), + IsMovImmediate(X86::MOV64ri, X86::RAX, SYS_munmap), + OpcodeIs(X86::SYSCALL))); +} + +TEST_F(X86Core2TargetTest, GenerateUpperMunmapTest) { + EXPECT_THAT( + State.getExegesisTarget().generateUpperMunmap(), + ElementsAreArray({OpcodeIs(X86::LEA64r), OpcodeIs(X86::MOV64rr), + OpcodeIs(X86::ADD64rr), OpcodeIs(X86::SHR64ri), + OpcodeIs(X86::SHL64ri), OpcodeIs(X86::ADD64ri32), + IsMovImmediate(X86::MOV64ri, X86::RSI, + 0x0000800000000000 - getpagesize()), + OpcodeIs(X86::SUB64rr), + IsMovImmediate(X86::MOV64ri, X86::RAX, SYS_munmap), + OpcodeIs(X86::SYSCALL)})); +} + +TEST_F(X86Core2TargetTest, GenerateExitSyscallTest) { + EXPECT_THAT(State.getExegesisTarget().generateExitSyscall(127), + ElementsAre(IsMovImmediate(X86::MOV64ri, X86::RDI, 127), + IsMovImmediate(X86::MOV64ri, X86::RAX, SYS_exit), + OpcodeIs(X86::SYSCALL))); +} + +TEST_F(X86Core2TargetTest, GenerateMmapTest) { + EXPECT_THAT( + State.getExegesisTarget().generateMmap(0x1000, 4096, 0x2000), + ElementsAre( + IsMovImmediate(X86::MOV64ri, X86::RAX, SYS_mmap), + IsMovImmediate(X86::MOV64ri, X86::RDI, 0x1000), + IsMovImmediate(X86::MOV64ri, X86::RSI, 4096), + IsMovImmediate(X86::MOV64ri, X86::RDX, PROT_READ | PROT_WRITE), + IsMovImmediate(X86::MOV64ri, X86::R10, + MAP_SHARED | MAP_FIXED_NOREPLACE), + IsMovImmediate(X86::MOV64ri, X86::R8, 0x2000), OpcodeIs(X86::MOV32rm), + IsMovImmediate(X86::MOV64ri, X86::R9, 0), OpcodeIs(X86::SYSCALL))); +} + +TEST_F(X86Core2TargetTest, GenerateMmapAuxMemTest) { + EXPECT_THAT( + State.getExegesisTarget().generateMmapAuxMem(), + ElementsAre( + IsMovImmediate(X86::MOV64ri, X86::RAX, SYS_mmap), + IsMovImmediate( + X86::MOV64ri, X86::RDI, + State.getExegesisTarget().getAuxiliaryMemoryStartAddress()), + IsMovImmediate(X86::MOV64ri, X86::RSI, AuxiliaryMemorySize), + IsMovImmediate(X86::MOV64ri, X86::RDX, PROT_READ | PROT_WRITE), + IsMovImmediate(X86::MOV64ri, X86::R10, + MAP_SHARED | MAP_FIXED_NOREPLACE), + OpcodeIs(X86::MOV64rr), IsMovImmediate(X86::MOV64ri, X86::R9, 0), + OpcodeIs(X86::SYSCALL))); +} + +TEST_F(X86Core2TargetTest, MoveArgumentRegistersTest) { + EXPECT_THAT(State.getExegesisTarget().moveArgumentRegisters(), + ElementsAre(IsMovRegToReg(X86::MOV64rr, X86::R12, X86::RSI), + IsMovRegToReg(X86::MOV64rr, X86::R13, X86::RDI))); +} +#endif // __linux__ + } // namespace } // namespace exegesis } // namespace llvm