Index: docs/CFIVerify.rst =================================================================== --- /dev/null +++ docs/CFIVerify.rst @@ -0,0 +1,88 @@ +============================================== +Control Flow Verification Tool Design Document +============================================== + +Objective +========= + +This document provides an overview of an external tool to verify the protection +mechanisms implemented by Clang's *Control Flow Integrity* (CFI) schemes +(``-fsanitize=cfi``). This tool, provided a binary or DSO, should infer whether +indirect control flow operations are protected by CFI, and should output these +results in a human-readable form. + +This tool should also be added as part of Clang's continuous integration testing +framework, where modifications to the compiler ensure that CFI protection +schemes are still present in the final binary. + +Location +======== + +This tool will be present as a part of the LLVM toolchain, and will reside in +the "/llvm/tools/llvm-cfi-verify" directory, relative to the LLVM trunk. It will +be tested in two methods: + +- Unit tests to validate code sections, present in "/llvm/unittests/llvm-cfi- + verify". +- Integration tests, present in "/llvm/tools/clang/test/LLVMCFIVerify". These + integration tests are part of clang as part of a continuous integration + framework, ensuring updates to the compiler that reduce CFI coverage on + indirect control flow instructions are identified. + +Background +========== + +This tool will continuously validate that CFI directives are properly +implemented around all indirect control flows by analysing the output machine +code. The analysis of machine code is important as it ensure that any bugs +present in linker or compiler do not subvert CFI protections in the final +shipped binary. + +Unprotected indirect control flow instructions will be flagged for manual +review. These unexpected control flows may simply have not been accounted for in +the compiler implementation of CFI (e.g. indirect jumps to facilitate switch +statements may not be fully protected). + +It may be possible in the future to extend this tool to flag unnecessary CFI +directives (e.g. CFI directives around a static call to a non-polymorphic base +type). This type of directive has no security implications, but may present +performance impacts. + +Design Ideas +============ + +This tool will disassemble binaries and DSO's from their machine code format and +analyse the disassembled bytecode. The tool will inspect virtual calls and +indirect function calls. This tool will also inspect indirect jumps, as inlined +functions and jump tables should also be subject to CFI protections. Non-virtual +calls (``-fsanitize=cfi-nvcall``) and cast checks (``-fsanitize=cfi-*cast*``) +are not implemented due to a lack of information provided by the bytecode. + +The tool would operate by searching for indirect control flow instructions in +the disassembly. A control flow graph would be generated from a small buffer of +the instructions surrounding the 'target' control flow instruction. If the +target instruction is branched-to, the fallthrough of the branch should be the +CFI trap (on x86, this is a ``ud2`` instruction). If the target instruction is +the fallthrough (i.e. immediately succeeds) of a conditional jump, the +conditional jump target should be the CFI trap. If an indirect control flow +instruction does not conform to one of these formats, the target will be noted +as being CFI-unprotected. + +Note that in the second case outlined above (where the target instruction is the +fallthrough of a conditional jump), if the target represents a vcall that takes +arguments, these arguments may be pushed to the stack after the branch but +before the target instruction. In these cases, a secondary 'spill graph' in +constructed, to ensure the register argument used by the indirect jump/call is +not spilled from the stack at any point in the interim period. If there are no +spills that affect the target register, the target is marked as CFI-protected. + +Other Design Notes +~~~~~~~~~~~~~~~~~~ + +Only machine code sections that are marked as executable will be subject to this +analysis. Non-executable sections do not require analysis as any execution +present in these sections has already violated the control flow integrity. + +Suitable extensions may be made at a later date to include anaylsis for indirect +control flow operations across DSO boundaries. Currently, these CFI features are +only experimental with an unstable ABI, making them unsuitable for analysis. Index: tools/LLVMBuild.txt =================================================================== --- tools/LLVMBuild.txt +++ tools/LLVMBuild.txt @@ -25,6 +25,7 @@ llvm-as llvm-bcanalyzer llvm-cat + llvm-cfi-verify llvm-cov llvm-cvtres llvm-diff Index: tools/llvm-cfi-verify/CMakeLists.txt =================================================================== --- /dev/null +++ tools/llvm-cfi-verify/CMakeLists.txt @@ -0,0 +1,14 @@ +set(LLVM_LINK_COMPONENTS + AllTargetsAsmPrinters + AllTargetsAsmParsers + AllTargetsDescs + AllTargetsDisassemblers + AllTargetsInfos + MC + MCParser + Support + ) + +add_llvm_tool(llvm-cfi-verify + llvm-cfi-verify.cpp + ) Index: tools/llvm-cfi-verify/LLVMBuild.txt =================================================================== --- /dev/null +++ tools/llvm-cfi-verify/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./tools/llvm-cfi-verify/LLVMBuild.txt --------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Tool +name = llvm-cfi-verify +parent = Tools +required_libraries = MC MCDisassembler MCParser Support all-targets Index: tools/llvm-cfi-verify/llvm-cfi-verify.cpp =================================================================== --- /dev/null +++ tools/llvm-cfi-verify/llvm-cfi-verify.cpp @@ -0,0 +1,274 @@ +//===-- llvm-cfi-verify.cpp - CFI Verification tool for LLVM --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This tool verifies Control Flow Integrity (CFI) instrumentation by static +// binary anaylsis. See the design document in /docs/CFIVerify.rst for more +// information. +// +// This tool is currently incomplete. It currently only does disassembly for +// object files, and searches through the code for indirect control flow +// instructions, printing them once found. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +using namespace llvm; +using namespace llvm::object; + +cl::opt ArgDumpSymbols("sym", cl::desc("Dump the symbol table.")); +cl::opt InputFilename(cl::Positional, cl::desc(""), + cl::Required); + +// TODO(hctim): Replace with ObjectFile::makeTriple() from +// https://reviews.llvm.org/D37719 +static const Triple getTriple(const ObjectFile* Object) { + Triple TheTriple("unknown-unknown-unknown"); + auto Arch = Object->getArch(); + TheTriple.setArch(Triple::ArchType(Arch)); + + // For ARM targets, try to use the build attributes to build determine + // the build target. Target features are also added, but later during + // disassembly. + if (Arch == Triple::arm || Arch == Triple::armeb) { + Object->setARMSubArch(TheTriple); + } + + // TheTriple defaults to ELF, and COFF doesn't have an environment: + // the best we can do here is indicate that it is mach-o. + if (Object->isMachO()) TheTriple.setObjectFormat(Triple::MachO); + + if (Object->isCOFF()) { + const auto COFFObj = dyn_cast(Object); + if (COFFObj->getArch() == Triple::thumb) + TheTriple.setTriple("thumbv7-windows"); + } + + return TheTriple; +} + +static void printSymbols(const ObjectFile* Object) { + for (const SymbolRef& Symbol : Object->symbols()) { + outs() << "Symbol [" << format_hex_no_prefix(Symbol.getValue(), 2) + << "] = "; + + auto SymbolName = Symbol.getName(); + if (SymbolName) { + outs() << *SymbolName; + } else { + outs() << "UNKNOWN"; + } + + if (Symbol.getFlags() & SymbolRef::SF_Hidden) { + outs() << " .hidden"; + } + + outs() << " (Section = "; + + auto SymbolSection = Symbol.getSection(); + if (SymbolSection) { + StringRef SymbolSectionName; + if ((*SymbolSection)->getName(SymbolSectionName)) { + outs() << "UNKNOWN)"; + } else { + outs() << SymbolSectionName << ")"; + } + } else { + outs() << "N/A)"; + } + + outs() << "\n"; + } +} + +int main(int argc, char** argv) { + cl::ParseCommandLineOptions(argc, argv); + + InitializeAllTargetInfos(); + InitializeAllTargetMCs(); + InitializeAllAsmParsers(); + InitializeAllDisassemblers(); + + Expected> BinaryOrErr = createBinary(InputFilename); + if (!BinaryOrErr) { + errs() << "Failed to open file.\n"; + return EXIT_FAILURE; + } + + Binary& Binary = *BinaryOrErr.get().getBinary(); + ObjectFile* Object = dyn_cast(&Binary); + if (!Object) { + errs() << "Disassembling of non-objects not currently supported.\n"; + return EXIT_FAILURE; + } + + Triple TheTriple = getTriple(Object); + std::string TripleName = TheTriple.getTriple(); + std::string ArchName = ""; + std::string ErrorString; + + const Target* TheTarget = + TargetRegistry::lookupTarget(ArchName, TheTriple, ErrorString); + + if (!TheTarget) { + errs() << "Couldn't find target \"" << TheTriple.getTriple() + << "\", failed with error: " << ErrorString << ".\n"; + return EXIT_FAILURE; + } + + SubtargetFeatures Features = Object->getFeatures(); + + std::unique_ptr RegisterInfo( + TheTarget->createMCRegInfo(TripleName)); + if (!RegisterInfo) { + errs() << "RegisterInfo required.\n"; + return EXIT_FAILURE; + } + + std::unique_ptr AsmInfo( + TheTarget->createMCAsmInfo(*RegisterInfo, TripleName)); + if (!AsmInfo) { + errs() << "AsmInfo required.\n"; + return EXIT_FAILURE; + } + + std::string MCPU = ""; + std::unique_ptr SubtargetInfo( + TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString())); + if (!SubtargetInfo) { + errs() << "SubtargetInfo required.\n"; + return EXIT_FAILURE; + } + + std::unique_ptr MII(TheTarget->createMCInstrInfo()); + if (!MII) { + errs() << "MII required.\n"; + return EXIT_FAILURE; + } + + MCObjectFileInfo MOFI; + MCContext Context(AsmInfo.get(), RegisterInfo.get(), &MOFI); + + std::unique_ptr Disassembler( + TheTarget->createMCDisassembler(*SubtargetInfo, Context)); + + if (!Disassembler) { + errs() << "No disassembler available for target."; + return EXIT_FAILURE; + } + + std::unique_ptr MIA( + TheTarget->createMCInstrAnalysis(MII.get())); + + std::unique_ptr Printer( + TheTarget->createMCInstPrinter(TheTriple, AsmInfo->getAssemblerDialect(), + *AsmInfo, *MII, *RegisterInfo)); + + if (ArgDumpSymbols) { + printSymbols(Object); + } + + for (const SectionRef& Section : Object->sections()) { + outs() << "Section [" << format_hex_no_prefix(Section.getAddress(), 2) + << "] = "; + StringRef SectionName; + + if (Section.getName(SectionName)) { + outs() << "UNKNOWN.\n"; + } else { + outs() << SectionName << "\n"; + } + + StringRef SectionContents; + if (Section.getContents(SectionContents)) { + outs() << "Failed to retrieve section contents.\n"; + return EXIT_FAILURE; + } + + MCInst Instruction; + size_t InstructionSize; + + ArrayRef SectionBytes((const uint8_t*)SectionContents.data(), + Section.getSize()); + + for (size_t Byte = 0; Byte < Section.getSize();) { + bool BadInstruction = false; + + // Disassemble the instruction. + if (Disassembler->getInstruction( + Instruction, InstructionSize, SectionBytes.drop_front(Byte), 0, + nulls(), outs()) != MCDisassembler::Success) { + BadInstruction = true; + } + + Byte += InstructionSize; + + // Skip instructions that do not affect the control flow. + const auto& InstrDesc = MII->get(Instruction.getOpcode()); + if (BadInstruction || + !InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo)) { + continue; + } + + // Skip instructions that do not operate on register operands. + bool UsesRegisterOperand = false; + for (unsigned i = 0; i < Instruction.getNumOperands(); ++i) { + if (Instruction.getOperand(i).isReg()) { + UsesRegisterOperand = true; + } + } + + if (!UsesRegisterOperand) { + continue; + } + + // Print the instruction address. + outs() << " " + << format_hex(Section.getAddress() + Byte - InstructionSize, 2) + << ": "; + + // Print the instruction bytes. + for (size_t i = 0; i < InstructionSize; ++i) { + outs() << format_hex_no_prefix(SectionBytes[Byte - InstructionSize + i], + 2) + << " "; + } + + // Print the instruction. + outs() << " | " << MII->getName(Instruction.getOpcode()) << " "; + Instruction.dump_pretty(outs(), Printer.get()); + + outs() << "\n"; + } + } + + return EXIT_SUCCESS; +}