Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2292,6 +2292,14 @@ Group, Flags<[CoreOption,CC1Option]>; def mno_speculative_load_hardening : Flag<["-"], "mno-speculative-load-hardening">, Group, Flags<[CoreOption]>; +def mlvi_hardening : Flag<["-"], "mlvi-hardening">, Group, Flags<[CoreOption,DriverOption]>, + HelpText<"Enable all mitigations for Load Value Injection (LVI)">; +def mno_lvi_hardening : Flag<["-"], "mno-lvi-hardening">, Group, Flags<[CoreOption,DriverOption]>, + HelpText<"Disable mitigations for Load Value Injection (LVI)">; +def mlvi_cfi : Flag<["-"], "mlvi-cfi">, Group, Flags<[CoreOption,DriverOption]>, + HelpText<"Enable only control-flow mitigations for Load Value Injection (LVI)">; +def mno_lvi_cfi : Flag<["-"], "mno-lvi-cfi">, Group, Flags<[CoreOption,DriverOption]>, + HelpText<"Disable control-flow mitigations for Load Value Injection (LVI)">; def mrelax : Flag<["-"], "mrelax">, Group, HelpText<"Enable linker relaxation">; Index: clang/lib/Driver/ToolChains/Arch/X86.cpp =================================================================== --- clang/lib/Driver/ToolChains/Arch/X86.cpp +++ clang/lib/Driver/ToolChains/Arch/X86.cpp @@ -146,6 +146,7 @@ // flags). This is a bit hacky but keeps existing usages working. We should // consider deprecating this and instead warn if the user requests external // retpoline thunks and *doesn't* request some form of retpolines. + auto SpectreOpt = clang::driver::options::ID::OPT_INVALID; if (Args.hasArgNoClaim(options::OPT_mretpoline, options::OPT_mno_retpoline, options::OPT_mspeculative_load_hardening, options::OPT_mno_speculative_load_hardening)) { @@ -153,12 +154,14 @@ false)) { Features.push_back("+retpoline-indirect-calls"); Features.push_back("+retpoline-indirect-branches"); + SpectreOpt = options::OPT_mretpoline; } else if (Args.hasFlag(options::OPT_mspeculative_load_hardening, options::OPT_mno_speculative_load_hardening, false)) { // On x86, speculative load hardening relies on at least using retpolines // for indirect calls. Features.push_back("+retpoline-indirect-calls"); + SpectreOpt = options::OPT_mspeculative_load_hardening; } } else if (Args.hasFlag(options::OPT_mretpoline_external_thunk, options::OPT_mno_retpoline_external_thunk, false)) { @@ -166,6 +169,26 @@ // eventually switch to an error here. Features.push_back("+retpoline-indirect-calls"); Features.push_back("+retpoline-indirect-branches"); + SpectreOpt = options::OPT_mretpoline_external_thunk; + } + + auto LVIOpt = clang::driver::options::ID::OPT_INVALID; + if (Args.hasFlag(options::OPT_mlvi_hardening, options::OPT_mno_lvi_hardening, + false)) { + Features.push_back("+lvi-load-hardening"); + Features.push_back("+lvi-cfi"); // load hardening implies CFI protection + LVIOpt = options::OPT_mlvi_hardening; + } else if (Args.hasFlag(options::OPT_mlvi_cfi, options::OPT_mno_lvi_cfi, + false)) { + Features.push_back("+lvi-cfi"); + LVIOpt = options::OPT_mlvi_cfi; + } + + if (SpectreOpt != clang::driver::options::ID::OPT_INVALID && + LVIOpt != clang::driver::options::ID::OPT_INVALID) { + D.Diag(diag::err_drv_argument_not_allowed_with) + << D.getOpts().getOptionName(SpectreOpt) + << D.getOpts().getOptionName(LVIOpt); } // Now add any that the user explicitly requested on the command line, Index: llvm/lib/CodeGen/CMakeLists.txt =================================================================== --- llvm/lib/CodeGen/CMakeLists.txt +++ llvm/lib/CodeGen/CMakeLists.txt @@ -115,6 +115,9 @@ ProcessImplicitDefs.cpp PrologEpilogInserter.cpp PseudoSourceValue.cpp + RDFGraph.cpp + RDFLiveness.cpp + RDFRegisters.cpp ReachingDefAnalysis.cpp RegAllocBase.cpp RegAllocBasic.cpp Index: llvm/lib/CodeGen/RDFGraph.cpp =================================================================== --- llvm/lib/CodeGen/RDFGraph.cpp +++ llvm/lib/CodeGen/RDFGraph.cpp @@ -8,8 +8,6 @@ // // Target-independent, SSA-based data flow graph for register data flow (RDF). // -#include "RDFGraph.h" -#include "RDFRegisters.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -20,6 +18,8 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -753,8 +753,10 @@ const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); if (RegisterId R = TLI.getExceptionPointerRegister(PF)) LR.insert(RegisterRef(R)); - if (RegisterId R = TLI.getExceptionSelectorRegister(PF)) - LR.insert(RegisterRef(R)); + if (!isFuncletEHPersonality(classifyEHPersonality(PF))) { + if (RegisterId R = TLI.getExceptionSelectorRegister(PF)) + LR.insert(RegisterRef(R)); + } return LR; } Index: llvm/lib/CodeGen/RDFLiveness.cpp =================================================================== --- llvm/lib/CodeGen/RDFLiveness.cpp +++ llvm/lib/CodeGen/RDFLiveness.cpp @@ -22,9 +22,6 @@ // and Embedded Architectures and Compilers", 8 (4), // <10.1145/2086696.2086706>. // -#include "RDFLiveness.h" -#include "RDFGraph.h" -#include "RDFRegisters.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -33,6 +30,9 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" Index: llvm/lib/CodeGen/RDFRegisters.cpp =================================================================== --- llvm/lib/CodeGen/RDFRegisters.cpp +++ llvm/lib/CodeGen/RDFRegisters.cpp @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// -#include "RDFRegisters.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" Index: llvm/lib/Target/Hexagon/CMakeLists.txt =================================================================== --- llvm/lib/Target/Hexagon/CMakeLists.txt +++ llvm/lib/Target/Hexagon/CMakeLists.txt @@ -64,9 +64,6 @@ HexagonVLIWPacketizer.cpp RDFCopy.cpp RDFDeadCode.cpp - RDFGraph.cpp - RDFLiveness.cpp - RDFRegisters.cpp ) add_subdirectory(AsmParser) Index: llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp =================================================================== --- llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp +++ llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp @@ -12,9 +12,6 @@ #include "HexagonInstrInfo.h" #include "HexagonSubtarget.h" #include "MCTargetDesc/HexagonBaseInfo.h" -#include "RDFGraph.h" -#include "RDFLiveness.h" -#include "RDFRegisters.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringRef.h" @@ -27,6 +24,9 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/InitializePasses.h" #include "llvm/MC/MCInstrDesc.h" Index: llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp =================================================================== --- llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp +++ llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp @@ -11,9 +11,6 @@ #include "MCTargetDesc/HexagonBaseInfo.h" #include "RDFCopy.h" #include "RDFDeadCode.h" -#include "RDFGraph.h" -#include "RDFLiveness.h" -#include "RDFRegisters.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" @@ -24,6 +21,9 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" Index: llvm/lib/Target/Hexagon/RDFCopy.h =================================================================== --- llvm/lib/Target/Hexagon/RDFCopy.h +++ llvm/lib/Target/Hexagon/RDFCopy.h @@ -9,9 +9,9 @@ #ifndef LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H #define LLVM_LIB_TARGET_HEXAGON_RDFCOPY_H -#include "RDFGraph.h" -#include "RDFLiveness.h" -#include "RDFRegisters.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/MachineFunction.h" #include #include Index: llvm/lib/Target/Hexagon/RDFCopy.cpp =================================================================== --- llvm/lib/Target/Hexagon/RDFCopy.cpp +++ llvm/lib/Target/Hexagon/RDFCopy.cpp @@ -11,13 +11,13 @@ //===----------------------------------------------------------------------===// #include "RDFCopy.h" -#include "RDFGraph.h" -#include "RDFLiveness.h" -#include "RDFRegisters.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/CodeGen/RDFRegisters.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/MC/MCRegisterInfo.h" Index: llvm/lib/Target/Hexagon/RDFDeadCode.h =================================================================== --- llvm/lib/Target/Hexagon/RDFDeadCode.h +++ llvm/lib/Target/Hexagon/RDFDeadCode.h @@ -23,8 +23,8 @@ #ifndef RDF_DEADCODE_H #define RDF_DEADCODE_H -#include "RDFGraph.h" -#include "RDFLiveness.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" #include "llvm/ADT/SetVector.h" namespace llvm { Index: llvm/lib/Target/Hexagon/RDFDeadCode.cpp =================================================================== --- llvm/lib/Target/Hexagon/RDFDeadCode.cpp +++ llvm/lib/Target/Hexagon/RDFDeadCode.cpp @@ -9,13 +9,13 @@ // RDF-based generic dead code elimination. #include "RDFDeadCode.h" -#include "RDFGraph.h" -#include "RDFLiveness.h" #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" #include "llvm/Support/Debug.h" #include Index: llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp =================================================================== --- llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -3152,6 +3152,64 @@ void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out) { Out.emitInstruction(Inst, getSTI()); + + if (getSTI().getFeatureBits()[X86::FeatureLVILoadHardening]) { + auto Flags = Inst.getFlags(); + if ((Flags & X86::REP_PREFIX) || (Flags & X86::REPNE_PREFIX)) { + switch (Inst.getOpcode()) { + case X86::CMPSB: + case X86::CMPSW: + case X86::CMPSL: + case X86::CMPSQ: + case X86::SCASB: + case X86::SCASW: + case X86::SCASL: + case X86::SCASQ: + Warning(Inst.getLoc(), "Instruction may be vulnerable to LVI and " + "requires manual mitigation"); + return; + } + } + } + + if (getSTI().getFeatureBits()[X86::FeatureLVIControlFlowIntegrity]) { + switch (Inst.getOpcode()) { + case X86::RET: + case X86::RETL: + case X86::RETQ: + case X86::RETIL: + case X86::RETIQ: + case X86::RETIW: + case X86::JMP16m: + case X86::JMP32m: + case X86::JMP64m: + case X86::JMP64m_REX: + case X86::FARJMP16m: + case X86::FARJMP32m: + case X86::FARJMP64: + case X86::CALL16m: + case X86::CALL32m: + case X86::CALL64m: + case X86::FARCALL16m: + case X86::FARCALL32m: + case X86::FARCALL64: + Warning(Inst.getLoc(), "Instruction may be vulnerable to LVI and " + "requires manual mitigation"); + return; + } + } + + // If this instruction loads and we're hardening for LVI, emit an LFENCE. + if (getSTI().getFeatureBits()[X86::FeatureLVILoadHardening]) { + const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); + // LFENCE has the mayLoad property, don't double fence. + if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) { + MCInst FenceInst; + FenceInst.setOpcode(X86::LFENCE); + FenceInst.setLoc(Inst.getLoc()); + Out.emitInstruction(FenceInst, getSTI()); + } + } } bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, Index: llvm/lib/Target/X86/CMakeLists.txt =================================================================== --- llvm/lib/Target/X86/CMakeLists.txt +++ llvm/lib/Target/X86/CMakeLists.txt @@ -51,6 +51,9 @@ X86InstrInfo.cpp X86EvexToVex.cpp X86LegalizerInfo.cpp + X86LoadValueInjectionIndirectThunks.cpp + X86LoadValueInjectionLoadHardening.cpp + X86LoadValueInjectionRetHardening.cpp X86MCInstLower.cpp X86MachineFunctionInfo.cpp X86MacroFusion.cpp Index: llvm/lib/Target/X86/ImmutableGraph.h =================================================================== --- /dev/null +++ llvm/lib/Target/X86/ImmutableGraph.h @@ -0,0 +1,415 @@ +//==========-- ImmutableGraph.h - A fast DAG implementation ---------=========// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef IMMUTABLEGRAPH_H +#define IMMUTABLEGRAPH_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include + +namespace llvm { + +template +class ImmutableGraph { + using Traits = GraphTraits *>; + template friend class ImmutableGraphBuilder; + +public: + using NodeValueT = _NodeValueT; + using EdgeValueT = _EdgeValueT; + using size_type = _SizeT; + class Node; + class Edge { + friend class ImmutableGraph; + template friend class ImmutableGraphBuilder; + friend Traits; + + Node *__dest; + EdgeValueT __value; + + public: + EdgeValueT &value() { return __value; } + }; + class Node { + friend class ImmutableGraph; + template friend class ImmutableGraphBuilder; + friend Traits; + + Edge *__edges; + NodeValueT __value; + + public: + NodeValueT &value() { return __value; } + }; + +protected: + ImmutableGraph(Node *Nodes, size_type NodesSize, Edge *Edges, + size_type EdgesSize) + : __nodes{Nodes}, __nodes_size{NodesSize}, __edges{Edges}, + __edges_size{EdgesSize} {} + ImmutableGraph(const ImmutableGraph &) = delete; + ImmutableGraph(ImmutableGraph &&) = delete; + ImmutableGraph &operator=(const ImmutableGraph &) = delete; + ImmutableGraph &operator=(ImmutableGraph &&) = delete; + +public: + ~ImmutableGraph() { + delete[] __edges; + delete[] __nodes; + } + + Node *nodes_begin() const { return __nodes; } + Node *nodes_end() const { return __nodes + __nodes_size; } + Edge *edges_begin() const { return __edges; } + Edge *edges_end() const { return __edges + __edges_size; } + size_type nodes_size() const { return __nodes_size; } + size_type edges_size() const { return __edges_size; } + bool empty() const { return __nodes_size == 0; } + + class NodeSet { + friend class iterator; + + const ImmutableGraph &__g; + BitVector __v; + + public: + NodeSet(const ImmutableGraph &G, bool ContainsAll = false) + : __g{G}, __v{static_cast(__g.nodes_size()), ContainsAll} {} + bool insert(Node *N) { + size_type Idx = std::distance(__g.nodes_begin(), N); + bool AlreadyExists = __v.test(Idx); + __v.set(Idx); + return !AlreadyExists; + } + void erase(Node *N) { + size_type Idx = std::distance(__g.nodes_begin(), N); + __v.reset(Idx); + } + bool contains(Node *N) const { + size_type Idx = std::distance(__g.nodes_begin(), N); + return __v.test(Idx); + } + void clear() { __v.reset(); } + size_type empty() const { return __v.none(); } + /// Return the number of elements in the set + size_type count() const { return __v.count(); } + /// Return the size of the set's domain + size_type size() const { return __v.size(); } + /// Set union + NodeSet &operator|=(const NodeSet &RHS) { + assert(&this->__g == &RHS.__g); + __v |= RHS.__v; + return *this; + } + /// Set intersection + NodeSet &operator&=(const NodeSet &RHS) { + assert(&this->__g == &RHS.__g); + __v &= RHS.__v; + return *this; + } + /// Set disjoint union + NodeSet &operator^=(const NodeSet &RHS) { + assert(&this->__g == &RHS.__g); + __v ^= RHS.__v; + return *this; + } + + using index_iterator = typename BitVector::const_set_bits_iterator; + index_iterator index_begin() const { return __v.set_bits_begin(); } + index_iterator index_end() const { return __v.set_bits_end(); } + void set(size_type Idx) { __v.set(Idx); } + void reset(size_type Idx) { __v.reset(Idx); } + + class iterator { + const NodeSet &__set; + size_type __current; + + void advance() { + assert(__current != -1); + __current = __set.__v.find_next(__current); + } + + public: + iterator(const NodeSet &Set, size_type Begin) + : __set{Set}, __current{Begin} {} + iterator operator++(int) { + iterator Tmp = *this; + advance(); + return Tmp; + } + iterator &operator++() { + advance(); + return *this; + } + Node *operator*() const { + assert(__current != -1); + return __set.__g.nodes_begin() + __current; + } + bool operator==(const iterator &other) const { + assert(&this->__set == &other.__set); + return this->__current == other.__current; + } + bool operator!=(const iterator &other) const { return !(*this == other); } + }; + + iterator begin() const { return iterator{*this, __v.find_first()}; } + iterator end() const { return iterator{*this, -1}; } + }; + + class EdgeSet { + const ImmutableGraph &__g; + BitVector __v; + + public: + EdgeSet(const ImmutableGraph &G, bool ContainsAll = false) + : __g{G}, __v{static_cast(__g.edges_size()), ContainsAll} {} + bool insert(Edge *E) { + size_type Idx = std::distance(__g.edges_begin(), E); + bool AlreadyExists = __v.test(Idx); + __v.set(Idx); + return !AlreadyExists; + } + void erase(Edge *E) { + size_type Idx = std::distance(__g.edges_begin(), E); + __v.reset(Idx); + } + bool contains(Edge *E) const { + size_type Idx = std::distance(__g.edges_begin(), E); + return __v.test(Idx); + } + void clear() { __v.reset(); } + bool empty() const { return __v.none(); } + /// Return the number of elements in the set + size_type count() const { return __v.count(); } + /// Return the size of the set's domain + size_type size() const { return __v.size(); } + /// Set union + EdgeSet &operator|=(const EdgeSet &RHS) { + assert(&this->__g == &RHS.__g); + __v |= RHS.__v; + return *this; + } + /// Set intersection + EdgeSet &operator&=(const EdgeSet &RHS) { + assert(&this->__g == &RHS.__g); + __v &= RHS.__v; + return *this; + } + /// Set disjoint union + EdgeSet &operator^=(const EdgeSet &RHS) { + assert(&this->__g == &RHS.__g); + __v ^= RHS.__v; + return *this; + } + + using index_iterator = typename BitVector::const_set_bits_iterator; + index_iterator index_begin() const { return __v.set_bits_begin(); } + index_iterator index_end() const { return __v.set_bits_end(); } + void set(size_type Idx) { __v.set(Idx); } + void reset(size_type Idx) { __v.reset(Idx); } + + class iterator { + const EdgeSet &__set; + size_type __current; + + void advance() { + assert(__current != -1); + __current = __set.__v.find_next(__current); + } + + public: + iterator(const EdgeSet &Set, size_type Begin) + : __set{Set}, __current{Begin} {} + iterator operator++(int) { + iterator Tmp = *this; + advance(); + return Tmp; + } + iterator &operator++() { + advance(); + return *this; + } + Edge *operator*() const { + assert(__current != -1); + return __set.__g.edges_begin() + __current; + } + bool operator==(const iterator &other) const { + assert(&this->__set == &other.__set); + return this->__current == other.__current; + } + bool operator!=(const iterator &other) const { return !(*this == other); } + }; + + iterator begin() const { return iterator{*this, __v.find_first()}; } + iterator end() const { return iterator{*this, -1}; } + }; + +private: + Node *__nodes; + size_type __nodes_size; + Edge *__edges; + size_type __edges_size; +}; + +template class ImmutableGraphBuilder { + using NodeValueT = typename GraphT::NodeValueT; + using EdgeValueT = typename GraphT::EdgeValueT; + static_assert( + std::is_base_of, GraphT>::value, + "Template argument to ImmutableGraphBuilder must derive from " + "ImmutableGraph<>"); + using size_type = typename GraphT::size_type; + using NodeSet = typename GraphT::NodeSet; + using Node = typename GraphT::Node; + using EdgeSet = typename GraphT::EdgeSet; + using Edge = typename GraphT::Edge; + using BuilderEdge = std::pair; + using EdgeList = std::vector; + using BuilderVertex = std::pair; + using VertexVec = std::vector; + +public: + using NodeRef = size_type; + + NodeRef addVertex(const NodeValueT &V) { + auto I = __adj_list.emplace(__adj_list.end(), V, EdgeList{}); + return std::distance(__adj_list.begin(), I); + } + + void addEdge(const EdgeValueT &E, NodeRef From, NodeRef To) { + __adj_list[From].second.emplace_back(E, To); + } + + bool empty() const { return __adj_list.empty(); } + + template GraphT *get(ArgT &&... Args) { + size_type VertexSize = __adj_list.size(), EdgeSize = 0; + for (const auto &V : __adj_list) { + EdgeSize += V.second.size(); + } + auto *VertexArray = new Node[VertexSize + 1 /* terminator node */]; + auto *EdgeArray = new Edge[EdgeSize]; + size_type VI = 0, EI = 0; + for (; VI < static_cast(__adj_list.size()); ++VI) { + VertexArray[VI].__value = std::move(__adj_list[VI].first); + VertexArray[VI].__edges = &EdgeArray[EI]; + auto NumEdges = static_cast(__adj_list[VI].second.size()); + if (NumEdges > 0) { + for (size_type VEI = 0; VEI < NumEdges; ++VEI, ++EI) { + auto &E = __adj_list[VI].second[VEI]; + EdgeArray[EI].__value = std::move(E.first); + EdgeArray[EI].__dest = VertexArray + E.second; + } + } + } + assert(VI == VertexSize && EI == EdgeSize && "Gadget graph malformed"); + VertexArray[VI].__edges = EdgeArray + EdgeSize; // terminator node + return new GraphT{VertexArray, VertexSize, EdgeArray, EdgeSize, + std::forward(Args)...}; + } + + template + static GraphT *trim(const GraphT &G, const NodeSet &TrimNodes, + const EdgeSet &TrimEdges, ArgT &&... Args) { + size_type NewVertexSize = TrimNodes.size() - TrimNodes.count(); + size_type NewEdgeSize = TrimEdges.size() - TrimEdges.count(); + auto *NewVertexArray = new Node[NewVertexSize + 1 /* terminator node */]; + auto *NewEdgeArray = new Edge[NewEdgeSize]; + size_type TrimmedNodesSoFar = 0, + *TrimmedNodes = new size_type[TrimNodes.size()]; + for (size_type I = 0; I < TrimNodes.size(); ++I) { + TrimmedNodes[I] = TrimmedNodesSoFar; + if (TrimNodes.contains(G.nodes_begin() + I)) + ++TrimmedNodesSoFar; + } + size_type VertexI = 0, EdgeI = 0; + for (Node *NI = G.nodes_begin(), *NE = G.nodes_end(); NI != NE; ++NI) { + if (TrimNodes.contains(NI)) + continue; + size_type NewNumEdges = + static_cast((NI + 1)->__edges - NI->__edges) > 0 + ? std::count_if( + NI->__edges, (NI + 1)->__edges, + [&TrimEdges](Edge &E) { return !TrimEdges.contains(&E); }) + : 0; + NewVertexArray[VertexI].__value = NI->__value; + NewVertexArray[VertexI].__edges = &NewEdgeArray[EdgeI]; + if (NewNumEdges > 0) { + for (Edge *EI = NI->__edges, *EE = (NI + 1)->__edges; EI != EE; ++EI) { + if (TrimEdges.contains(EI)) + continue; + NewEdgeArray[EdgeI].__value = EI->__value; + size_type DestIdx = std::distance(G.nodes_begin(), EI->__dest); + size_type NewIdx = DestIdx - TrimmedNodes[DestIdx]; + assert(NewIdx < NewVertexSize); + NewEdgeArray[EdgeI].__dest = NewVertexArray + NewIdx; + ++EdgeI; + } + } + ++VertexI; + } + delete[] TrimmedNodes; + assert(VertexI == NewVertexSize && EdgeI == NewEdgeSize && + "Gadget graph malformed"); + NewVertexArray[VertexI].__edges = NewEdgeArray + NewEdgeSize; + return new GraphT{NewVertexArray, NewVertexSize, NewEdgeArray, NewEdgeSize, + std::forward(Args)...}; + } + +private: + VertexVec __adj_list; +}; + +template +struct GraphTraits *> { + using GraphT = ImmutableGraph; + using NodeRef = typename GraphT::Node *; + using EdgeRef = typename GraphT::Edge &; + + static NodeRef edge_dest(EdgeRef E) { return E.__dest; } + using ChildIteratorType = + mapped_iterator; + + static NodeRef getEntryNode(GraphT *G) { return G->nodes_begin(); } + static ChildIteratorType child_begin(NodeRef N) { + return {N->__edges, &edge_dest}; + } + static ChildIteratorType child_end(NodeRef N) { + return {(N + 1)->__edges, &edge_dest}; + } + + static NodeRef getNode(typename GraphT::Node &N) { return NodeRef{&N}; } + using nodes_iterator = + mapped_iterator; + static nodes_iterator nodes_begin(GraphT *G) { + return {G->nodes_begin(), &getNode}; + } + static nodes_iterator nodes_end(GraphT *G) { + return {G->nodes_end(), &getNode}; + } + + using ChildEdgeIteratorType = typename GraphT::Edge *; + + static ChildEdgeIteratorType child_edge_begin(NodeRef N) { + return N->__edges; + } + static ChildEdgeIteratorType child_edge_end(NodeRef N) { + return (N + 1)->__edges; + } + static typename GraphT::size_type size(GraphT *G) { return G->nodes_size(); } +}; + +} // end namespace llvm + +#endif // IMMUTABLEGRAPH_H Index: llvm/lib/Target/X86/X86.h =================================================================== --- llvm/lib/Target/X86/X86.h +++ llvm/lib/Target/X86/X86.h @@ -137,6 +137,9 @@ X86Subtarget &, X86RegisterBankInfo &); +FunctionPass *createX86LoadValueInjectionIndirectThunksPass(); +FunctionPass *createX86LoadValueInjectionLoadHardeningPass(); +FunctionPass *createX86LoadValueInjectionRetHardeningPass(); FunctionPass *createX86SpeculativeLoadHardeningPass(); void initializeEvexToVexInstPassPass(PassRegistry &); @@ -152,6 +155,9 @@ void initializeX86ExecutionDomainFixPass(PassRegistry &); void initializeX86ExpandPseudoPass(PassRegistry &); void initializeX86FlagsCopyLoweringPassPass(PassRegistry &); +void initializeX86LoadValueInjectionIndirectThunksPassPass(PassRegistry &); +void initializeX86LoadValueInjectionLoadHardeningPassPass(PassRegistry &); +void initializeX86LoadValueInjectionRetHardeningPassPass(PassRegistry &); void initializeX86OptimizeLEAPassPass(PassRegistry &); void initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &); Index: llvm/lib/Target/X86/X86.td =================================================================== --- llvm/lib/Target/X86/X86.td +++ llvm/lib/Target/X86/X86.td @@ -426,6 +426,22 @@ "ourselves. Only has effect when combined with some other retpoline " "feature", [FeatureRetpolineIndirectCalls]>; +// Mitigate LVI attacks against indirect calls/branches and call returns +def FeatureLVIControlFlowIntegrity + : SubtargetFeature< + "lvi-cfi", "UseLVIControlFlowIntegrity", "true", + "Prevent indirect calls/branches from using a memory operand, and " + "precede all indirect calls/branches from a register with an " + "LFENCE instruction to serialize control flow. Also decompose RET " + "instructions into a POP+LFENCE+JMP sequence.">; + +// Mitigate LVI attacks against data loads +def FeatureLVILoadHardening + : SubtargetFeature< + "lvi-load-hardening", "UseLVILoadHardening", "true", + "Insert LFENCE instructions to prevent data speculatively injected " + "into loads from being used maliciously.">; + // Direct Move instructions. def FeatureMOVDIRI : SubtargetFeature<"movdiri", "HasMOVDIRI", "true", "Support movdiri instruction">; Index: llvm/lib/Target/X86/X86FastISel.cpp =================================================================== --- llvm/lib/Target/X86/X86FastISel.cpp +++ llvm/lib/Target/X86/X86FastISel.cpp @@ -3208,7 +3208,8 @@ return false; // Functions using retpoline for indirect calls need to use SDISel. - if (Subtarget->useRetpolineIndirectCalls()) + if (Subtarget->useRetpolineIndirectCalls() || + Subtarget->useLVIControlFlowIntegrity()) return false; // Handle only C, fastcc, and webkit_js calling conventions for now. Index: llvm/lib/Target/X86/X86FrameLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86FrameLowering.cpp +++ llvm/lib/Target/X86/X86FrameLowering.cpp @@ -962,7 +962,8 @@ bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; // FIXME: Add retpoline support and remove this. - if (Is64Bit && IsLargeCodeModel && STI.useRetpolineIndirectCalls()) + if (Is64Bit && IsLargeCodeModel && (STI.useRetpolineIndirectCalls() || + STI.useLVIControlFlowIntegrity())) report_fatal_error("Emitting stack probe calls on 64-bit with the large " "code model and retpoline not yet implemented."); @@ -2703,7 +2704,7 @@ // is laid out within 2^31 bytes of each function body, but this seems // to be sufficient for JIT. // FIXME: Add retpoline support and remove the error here.. - if (STI.useRetpolineIndirectCalls()) + if (STI.useRetpolineIndirectCalls() || STI.useLVIControlFlowIntegrity()) report_fatal_error("Emitting morestack calls on 64-bit with the large " "code model and retpoline not yet implemented."); BuildMI(allocMBB, DL, TII.get(X86::CALL64m)) Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1023,7 +1023,8 @@ if (OptLevel != CodeGenOpt::None && // Only do this when the target can fold the load into the call or // jmp. - !Subtarget->useRetpolineIndirectCalls() && + !(Subtarget->useRetpolineIndirectCalls() || + Subtarget->useLVIControlFlowIntegrity()) && ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) || (N->getOpcode() == X86ISD::TC_RETURN && (Subtarget->is64Bit() || Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -30555,7 +30555,8 @@ bool X86TargetLowering::areJTsAllowed(const Function *Fn) const { // If the subtarget is using retpolines, we need to not generate jump tables. - if (Subtarget.useRetpolineIndirectBranches()) + if (Subtarget.useRetpolineIndirectBranches() || + Subtarget.useLVIControlFlowIntegrity()) return false; // Otherwise, fallback on the generic logic. @@ -31758,22 +31759,26 @@ return BB; } -static unsigned getOpcodeForRetpoline(unsigned RPOpc) { +static unsigned getOpcodeForThunk(unsigned RPOpc) { switch (RPOpc) { case X86::RETPOLINE_CALL32: return X86::CALLpcrel32; case X86::RETPOLINE_CALL64: return X86::CALL64pcrel32; + case X86::LVI_THUNK_CALL64: + return X86::CALL64pcrel32; case X86::RETPOLINE_TCRETURN32: return X86::TCRETURNdi; case X86::RETPOLINE_TCRETURN64: return X86::TCRETURNdi64; + case X86::LVI_THUNK_TCRETURN64: + return X86::TCRETURNdi64; } llvm_unreachable("not retpoline opcode"); } -static const char *getRetpolineSymbol(const X86Subtarget &Subtarget, - unsigned Reg) { +static const char *getThunkSymbol(const X86Subtarget &Subtarget, + unsigned Reg) { if (Subtarget.useRetpolineExternalThunk()) { // When using an external thunk for retpolines, we pick names that match the // names GCC happens to use as well. This helps simplify the implementation @@ -31808,6 +31813,12 @@ llvm_unreachable("unexpected reg for retpoline"); } + if (Subtarget.useLVIControlFlowIntegrity()) { + assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); + assert(Reg == X86::R11 && "Invalid register for LVI CFI"); + return "__x86_indirect_thunk_r11"; + } + // When targeting an internal COMDAT thunk use an LLVM-specific name. switch (Reg) { case X86::EAX: @@ -31837,7 +31848,7 @@ DebugLoc DL = MI.getDebugLoc(); const X86InstrInfo *TII = Subtarget.getInstrInfo(); Register CalleeVReg = MI.getOperand(0).getReg(); - unsigned Opc = getOpcodeForRetpoline(MI.getOpcode()); + unsigned Opc = getOpcodeForThunk(MI.getOpcode()); // Find an available scratch register to hold the callee. On 64-bit, we can // just use R11, but we scan for uses anyway to ensure we don't generate @@ -31871,7 +31882,7 @@ report_fatal_error("calling convention incompatible with retpoline, no " "available registers"); - const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg); + const char *Symbol = getThunkSymbol(Subtarget, AvailableReg); BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg) .addReg(CalleeVReg); @@ -32649,8 +32660,10 @@ return EmitLoweredTLSAddr(MI, BB); case X86::RETPOLINE_CALL32: case X86::RETPOLINE_CALL64: + case X86::LVI_THUNK_CALL64: case X86::RETPOLINE_TCRETURN32: case X86::RETPOLINE_TCRETURN64: + case X86::LVI_THUNK_TCRETURN64: return EmitLoweredRetpoline(MI, BB); case X86::CATCHRET: return EmitLoweredCatchRet(MI, BB); Index: llvm/lib/Target/X86/X86InstrCompiler.td =================================================================== --- llvm/lib/Target/X86/X86InstrCompiler.td +++ llvm/lib/Target/X86/X86InstrCompiler.td @@ -1199,13 +1199,19 @@ def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>, - Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>; + Requires<[In64BitMode, NotUseRetpolineIndirectCalls, + NotUseLVIIndirectThunks]>; // Don't fold loads into X86tcret requiring more than 6 regs. // There wouldn't be enough scratch registers for base+index. def : Pat<(X86tcret_6regs (load addr:$dst), imm:$off), (TCRETURNmi64 addr:$dst, imm:$off)>, - Requires<[In64BitMode, NotUseRetpolineIndirectCalls]>; + Requires<[In64BitMode, NotUseRetpolineIndirectCalls, + NotUseLVIIndirectThunks]>; + +def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), + (LVI_THUNK_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>, + Requires<[In64BitMode, UseLVIIndirectThunks]>; def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off), (RETPOLINE_TCRETURN64 ptr_rc_tailcall:$dst, imm:$off)>, Index: llvm/lib/Target/X86/X86InstrControl.td =================================================================== --- llvm/lib/Target/X86/X86InstrControl.td +++ llvm/lib/Target/X86/X86InstrControl.td @@ -334,11 +334,13 @@ Requires<[In64BitMode]>; def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst), "call{q}\t{*}$dst", [(X86call GR64:$dst)]>, - Requires<[In64BitMode,NotUseRetpolineIndirectCalls]>; + Requires<[In64BitMode,NotUseRetpolineIndirectCalls, + NotUseLVIIndirectThunks]>; def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst), "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>, Requires<[In64BitMode,FavorMemIndirectCall, - NotUseRetpolineIndirectCalls]>; + NotUseRetpolineIndirectCalls, + NotUseLVIIndirectThunks]>; // Non-tracking calls for IBT, use with caution. let isCodeGenOnly = 1 in { @@ -400,6 +402,9 @@ def RETPOLINE_CALL64 : PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>, Requires<[In64BitMode,UseRetpolineIndirectCalls]>; + def LVI_THUNK_CALL64 : + PseudoI<(outs), (ins GR64:$dst), [(X86call GR64:$dst)]>, + Requires<[In64BitMode,UseLVIIndirectThunks]>; // Retpoline variant of indirect tail calls. let isTerminator = 1, isReturn = 1, isBarrier = 1 in { @@ -408,6 +413,12 @@ def RETPOLINE_TCRETURN32 : PseudoI<(outs), (ins GR32:$dst, i32imm:$offset), []>; } + + // LVI thunk variant of indirect tail calls. + let isTerminator = 1, isReturn = 1, isBarrier = 1 in { + def LVI_THUNK_TCRETURN64 : + PseudoI<(outs), (ins GR64:$dst, i32imm:$offset), []>; + } } // Conditional tail calls are similar to the above, but they are branches Index: llvm/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.td +++ llvm/lib/Target/X86/X86InstrInfo.td @@ -1005,6 +1005,8 @@ def HasMFence : Predicate<"Subtarget->hasMFence()">; def UseRetpolineIndirectCalls : Predicate<"Subtarget->useRetpolineIndirectCalls()">; def NotUseRetpolineIndirectCalls : Predicate<"!Subtarget->useRetpolineIndirectCalls()">; +def UseLVIIndirectThunks : Predicate<"Subtarget->useLVIControlFlowIntegrity()">; +def NotUseLVIIndirectThunks : Predicate<"!Subtarget->useLVIControlFlowIntegrity()">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. Index: llvm/lib/Target/X86/X86LoadValueInjectionIndirectThunks.cpp =================================================================== --- /dev/null +++ llvm/lib/Target/X86/X86LoadValueInjectionIndirectThunks.cpp @@ -0,0 +1,196 @@ +//=- X86LoadValueInjectionIndirectThunks.cpp - Construct LVI thunks for x86 -=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Description: This pass replaces each indirect call/jump with a direct call +/// to a thunk that looks like: +/// ``` +/// lfence +/// jmpq *%r11 +/// ``` +/// This ensures that if the value in register %r11 was loaded from memory, then +/// the value in %r11 is (architecturally) correct prior to the jump. +/// +/// Note: A lot of this code was lifted from X86RetpolineThunks.cpp. +/// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86Subtarget.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define PASS_KEY "x86-lvi-thunks" +#define DEBUG_TYPE PASS_KEY + +static const char R11ThunkName[] = "__x86_indirect_thunk_r11"; + +namespace { +class X86LoadValueInjectionIndirectThunksPass : public MachineFunctionPass { +public: + X86LoadValueInjectionIndirectThunksPass() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "X86 Load Value Injection (LVI) Indirect Thunks Pass"; + } + bool doInitialization(Module &M) override; + bool runOnMachineFunction(MachineFunction &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); + } + + static char ID; + +private: + MachineModuleInfo *MMI; + const TargetMachine *TM; + const X86Subtarget *STI; + const X86InstrInfo *TII; + + bool InsertedThunks; + + void createThunkFunction(Module &M, StringRef Name); + void populateThunk(MachineFunction &MF, unsigned Reg); +}; + +} // end anonymous namespace + +char X86LoadValueInjectionIndirectThunksPass::ID = 0; + +bool X86LoadValueInjectionIndirectThunksPass::doInitialization(Module &M) { + InsertedThunks = false; + return false; +} + +bool X86LoadValueInjectionIndirectThunksPass::runOnMachineFunction( + MachineFunction &MF) { + STI = &MF.getSubtarget(); + if (!(STI->hasSSE2() || STI->is64Bit())) { + // FIXME: support 32-bit + return false; + } + + // Don't skip functions with the "optnone" attr but participate in opt-bisect. + const Function &F = MF.getFunction(); + if (!F.hasOptNone() && skipFunction(F)) { + return false; + } + + LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName() + << " *****\n"); + TM = &MF.getTarget(); + TII = STI->getInstrInfo(); + MMI = &getAnalysis().getMMI(); + Module &M = const_cast(*MMI->getModule()); + + // If this function is not a thunk, check to see if we need to insert + // a thunk. + if (MF.getName() != R11ThunkName) { + // If we've already inserted a thunk, nothing else to do. + if (InsertedThunks) { + return false; + } + + // Only add a thunk if one of the functions has the LVI-CFI feature + // enabled in its subtarget + if (!STI->useLVIControlFlowIntegrity()) { + return false; + } + + // Otherwise, we need to insert the thunk. + // WARNING: This is not really a well behaving thing to do in a function + // pass. We extract the module and insert a new function (and machine + // function) directly into the module. + LLVM_DEBUG(dbgs() << "Creating thunk procedure" << '\n'); + createThunkFunction(M, R11ThunkName); + InsertedThunks = true; + return true; + } + + assert(MF.getName() == "__x86_indirect_thunk_r11" && + "Should only have an r11 thunk on 64-bit targets"); + LLVM_DEBUG(dbgs() << "Populating thunk" << '\n'); + populateThunk(MF, X86::R11); + return true; +} + +void X86LoadValueInjectionIndirectThunksPass::createThunkFunction( + Module &M, StringRef Name) { + assert(Name == R11ThunkName && "Created a thunk with an unexpected prefix!"); + + LLVMContext &Ctx = M.getContext(); + auto Type = FunctionType::get(Type::getVoidTy(Ctx), false); + Function *F = + Function::Create(Type, GlobalValue::LinkOnceODRLinkage, Name, &M); + F->setVisibility(GlobalValue::HiddenVisibility); + F->setComdat(M.getOrInsertComdat(Name)); + + // Add Attributes so that we don't create a frame, unwind information, or + // inline. + AttrBuilder B; + B.addAttribute(llvm::Attribute::NoUnwind); + B.addAttribute(llvm::Attribute::Naked); + F->addAttributes(llvm::AttributeList::FunctionIndex, B); + + // Populate our function a bit so that we can verify. + BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F); + IRBuilder<> Builder(Entry); + + Builder.CreateRetVoid(); + + // MachineFunctions/MachineBasicBlocks aren't created automatically for the + // IR-level constructs we already made. Create them and insert them into the + // module. + MachineFunction &MF = MMI->getOrCreateMachineFunction(*F); + MachineBasicBlock *EntryMBB = MF.CreateMachineBasicBlock(Entry); + + // Insert EntryMBB into MF. It's not in the module until we do this. + MF.insert(MF.end(), EntryMBB); +} + +void X86LoadValueInjectionIndirectThunksPass::populateThunk(MachineFunction &MF, + unsigned Reg) { + // Set MF properties. We never use vregs... + MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); + + // Grab the entry MBB and erase any other blocks. O0 codegen appears to + // generate two bbs for the entry block. + MachineBasicBlock *Entry = &MF.front(); + Entry->clear(); + while (MF.size() > 1) + MF.erase(std::next(MF.begin())); + + BuildMI(Entry, DebugLoc(), TII->get(X86::LFENCE)); + BuildMI(Entry, DebugLoc(), TII->get(X86::JMP64r)).addReg(Reg); + Entry->addLiveIn(Reg); + return; +} + +INITIALIZE_PASS_BEGIN(X86LoadValueInjectionIndirectThunksPass, PASS_KEY, + "X86 LVI indirect thunk inserter", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineModuleInfoWrapperPass) +INITIALIZE_PASS_END(X86LoadValueInjectionIndirectThunksPass, PASS_KEY, + "X86 LVI indirect thunk inserter", false, false) + +FunctionPass *llvm::createX86LoadValueInjectionIndirectThunksPass() { + return new X86LoadValueInjectionIndirectThunksPass(); +} Index: llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp =================================================================== --- /dev/null +++ llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp @@ -0,0 +1,874 @@ +//==-- X86LoadValueInjectionLoadHardening.cpp - LVI load hardening for x86 --=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Description: This pass finds Load Value Injection (LVI) gadgets consisting +/// of a load from memory (i.e., SOURCE), and any operation that may transmit +/// the value loaded from memory over a covert channel, or use the value loaded +/// from memory to determine a branch/call target (i.e., SINK). After finding +/// all such gadgets in a given function, the pass minimally inserts LFENCE +/// instructions in such a manner that the following property is satisfied: for +/// all SOURCE+SINK pairs, all paths in the CFG from SOURCE to SINK contain at +/// least one LFENCE instruction. The algorithm that implements this minimal +/// insertion is influenced by an academic paper that minimally inserts memory +/// fences for high-performance concurrent programs: +/// http://www.cs.ucr.edu/~lesani/companion/oopsla15/OOPSLA15.pdf +/// The algorithm implemented in this pass is as follows: +/// 1. Build a condensed CFG (i.e., a GadgetGraph) consisting only of the +/// following components: +/// - SOURCE instructions (also includes function arguments) +/// - SINK instructions +/// - Basic block entry points +/// - Basic block terminators +/// - LFENCE instructions +/// 2. Analyze the GadgetGraph to determine which SOURCE+SINK pairs (i.e., +/// gadgets) are already mitigated by existing LFENCEs. If all gadgets have been +/// mitigated, go to step 6. +/// 3. Use a heuristic or plugin to approximate minimal LFENCE insertion. +/// 4. Insert one LFENCE along each CFG edge that was cut in step 3. +/// 5. Go to step 2. +/// 6. If any LFENCEs were inserted, return `true` from runOnFunction() to tell +/// LLVM that the function was modified. +/// +/// For performance purposes, this pass uses a custom data structure to +/// implement the GadgetGraph: ImmutableGraph. As the name suggests, an +/// ImmutableGraph cannot be modified, except by creating a new ImmutableGraph. +/// ImmutableGraph is implemented as two arrays: one containing nodes, and one +/// containing edges. The advantages to this implementation are two-fold: +/// 1. Iteration and traversal operations should experience terrific caching +/// performance. +/// 2. Set representations and operations on nodes and edges become +/// extraordinarily efficient. For instance, a set of edges is implemented as +/// an llvm::BitVector, wherein each bit corresponds to one edge in the edge +/// array. This implies a lower bound of 64x spacial improvement over, e.g., +/// an llvm::DenseSet or llvm::SmallSet. It also means that +/// insert/erase/contains operations complete in negligible constant time: +/// insert and erase require one load and one store, and contains requires +/// just one load. +/// +//===----------------------------------------------------------------------===// + +#include "ImmutableGraph.h" +#include "X86.h" +#include "X86Subtarget.h" +#include "X86TargetMachine.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominanceFrontier.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RDFGraph.h" +#include "llvm/CodeGen/RDFLiveness.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/DOTGraphTraits.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define PASS_KEY "x86-lvi-load" +#define DEBUG_TYPE PASS_KEY + +STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation"); +STATISTIC(NumFunctionsConsidered, "Number of functions analyzed"); +STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations " + "were deployed"); +STATISTIC(NumGadgets, "Number of LVI gadgets detected during analysis"); + +static cl::opt OptimizePluginPath( + PASS_KEY "-opt-plugin", + cl::desc("Specify a plugin to optimize LFENCE insertion"), cl::Hidden); + +static cl::opt NoConditionalBranches( + PASS_KEY "-no-cbranch", + cl::desc("Don't treat conditional branches as disclosure gadgets. This " + "may improve performance, at the cost of security."), + cl::init(false), cl::Hidden); + +static cl::opt EmitDot( + PASS_KEY "-dot", + cl::desc( + "For each function, emit a dot graph depicting potential LVI gadgets"), + cl::init(false), cl::Hidden); + +static cl::opt EmitDotOnly( + PASS_KEY "-dot-only", + cl::desc("For each function, emit a dot graph depicting potential LVI " + "gadgets, and do not insert any fences"), + cl::init(false), cl::Hidden); + +static cl::opt EmitDotVerify( + PASS_KEY "-dot-verify", + cl::desc("For each function, emit a dot graph to stdout depicting " + "potential LVI gadgets, used for testing purposes only"), + cl::init(false), cl::Hidden); + +static cl::opt NoFixedLoads( + PASS_KEY "-no-fixed", + cl::desc("Don't mitigate RIP-relative or RSP-relative loads. This " + "may improve performance, at the cost of security."), + cl::init(false), cl::Hidden); + +static llvm::sys::DynamicLibrary OptimizeDL{}; +typedef int (*OptimizeCutT)(unsigned int *nodes, unsigned int nodes_size, + unsigned int *edges, int *edge_values, + int *cut_edges /* out */, unsigned int edges_size); +static OptimizeCutT OptimizeCut = nullptr; + +#define ARG_NODE nullptr +#define GADGET_EDGE ((int)(-1)) +#define WEIGHT(EdgeValue) ((double)(2 * (EdgeValue) + 1)) + +namespace { + +class X86LoadValueInjectionLoadHardeningPass : public MachineFunctionPass { +public: + X86LoadValueInjectionLoadHardeningPass() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { + return "X86 Load Value Injection (LVI) Load Hardening Pass"; + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + + static char ID; + +private: + struct MachineGadgetGraph : ImmutableGraph { + using GraphT = ImmutableGraph; + using Node = typename GraphT::Node; + using Edge = typename GraphT::Edge; + using size_type = typename GraphT::size_type; + MachineGadgetGraph(Node *Nodes, size_type NodesSize, Edge *Edges, + size_type EdgesSize, int NumFences = 0, + int NumGadgets = 0) + : GraphT{Nodes, NodesSize, Edges, EdgesSize}, NumFences{NumFences}, + NumGadgets{NumGadgets} {} + MachineFunction &getMF() { // FIXME: This function should be cleaner + for (Node *NI = nodes_begin(), *const NE = nodes_end(); NI != NE; ++NI) { + if (NI->value()) { + return *NI->value()->getMF(); + } + } + llvm_unreachable("Could not find a valid node"); + } + static inline bool isCFGEdge(Edge &E) { return E.value() != GADGET_EDGE; } + static inline bool isGadgetEdge(Edge &E) { + return E.value() == GADGET_EDGE; + } + int NumFences; + int NumGadgets; + }; + friend struct llvm::DOTGraphTraits; + using GTraits = llvm::GraphTraits; + using GraphBuilder = ImmutableGraphBuilder; + using EdgeSet = MachineGadgetGraph::EdgeSet; + using Gadget = std::pair; + + const X86Subtarget *STI; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + + int hardenLoads(MachineFunction &MF, bool Fixed) const; + std::unique_ptr + getGadgetGraph(MachineFunction &MF, const MachineLoopInfo &MLI, + const MachineDominatorTree &MDT, + const MachineDominanceFrontier &MDF, bool FixedLoads) const; + std::unique_ptr + elimEdges(std::unique_ptr Graph) const; + void cutEdges(MachineGadgetGraph &G, EdgeSet &CutEdges /* out */) const; + int insertFences(MachineGadgetGraph &G, + EdgeSet &CutEdges /* in, out */) const; + + bool instrUsesRegToAccessMemory(const MachineInstr &I, unsigned Reg) const; + bool instrUsesRegToBranch(const MachineInstr &I, unsigned Reg) const; + template bool hasLoadFrom(const MachineInstr &MI) const; + bool instrAccessesStackSlot(const MachineInstr &MI) const; + bool instrAccessesConstantPool(const MachineInstr &MI) const; + bool instrAccessesGOT(const MachineInstr &MI) const; + inline bool instrIsFixedAccess(const MachineInstr &MI) const { + return instrAccessesConstantPool(MI) || instrAccessesStackSlot(MI) || + instrAccessesGOT(MI); + } + inline bool isFence(const MachineInstr *MI) const { + return MI && (MI->getOpcode() == X86::LFENCE || + (STI->useLVIControlFlowIntegrity() && MI->isCall())); + } +}; + +} // end anonymous namespace + +namespace llvm { + +template <> +struct GraphTraits + : GraphTraits *> {}; + +template <> +struct DOTGraphTraits< + X86LoadValueInjectionLoadHardeningPass::MachineGadgetGraph *> + : DefaultDOTGraphTraits { + using GraphType = X86LoadValueInjectionLoadHardeningPass::MachineGadgetGraph; + using Traits = X86LoadValueInjectionLoadHardeningPass::GTraits; + using NodeRef = typename Traits::NodeRef; + using EdgeRef = typename Traits::EdgeRef; + using ChildIteratorType = typename Traits::ChildIteratorType; + using ChildEdgeIteratorType = typename Traits::ChildEdgeIteratorType; + + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(GraphType *G) { + std::string GraphName{"Speculative gadgets for \""}; + GraphName += G->getMF().getName(); + GraphName += "\" function"; + return GraphName; + } + + std::string getNodeLabel(NodeRef Node, GraphType *) { + std::string str; + raw_string_ostream str_stream{str}; + if (Node->value() == ARG_NODE) + return "ARGS"; + str_stream << *Node->value(); + return str_stream.str(); + } + + static std::string getNodeAttributes(NodeRef Node, GraphType *) { + MachineInstr *MI = Node->value(); + if (MI == ARG_NODE) + return "color = blue"; + else if (MI->getOpcode() == X86::LFENCE) + return "color = green"; + else + return ""; + } + + static std::string getEdgeAttributes(NodeRef, ChildIteratorType E, + GraphType *) { + int EdgeVal = (*E.getCurrent()).value(); + return EdgeVal >= 0 ? "label = " + std::to_string(EdgeVal) + : "color = red, style = \"dashed\""; + } +}; + +} // end namespace llvm + +char X86LoadValueInjectionLoadHardeningPass::ID = 0; + +void X86LoadValueInjectionLoadHardeningPass::getAnalysisUsage( + AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.setPreservesCFG(); +} + +bool X86LoadValueInjectionLoadHardeningPass::runOnMachineFunction( + MachineFunction &MF) { + STI = &MF.getSubtarget(); + if (!STI->useLVILoadHardening() || !(STI->hasSSE2() || STI->is64Bit())) + return false; // FIXME: support 32-bit + + // Don't skip functions with the "optnone" attr but participate in opt-bisect. + const Function &F = MF.getFunction(); + if (!F.hasOptNone() && skipFunction(F)) + return false; + + LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName() + << " *****\n"); + ++NumFunctionsConsidered; + TII = STI->getInstrInfo(); + TRI = STI->getRegisterInfo(); + LLVM_DEBUG(dbgs() << "Hardening data-dependent loads...\n"); + int FencesInserted = hardenLoads(MF, false); + LLVM_DEBUG(dbgs() << "Hardening data-dependent loads... Done\n"); + if (!NoFixedLoads) { + LLVM_DEBUG(dbgs() << "Hardening fixed loads...\n"); + hardenLoads(MF, true); + LLVM_DEBUG(dbgs() << "Hardening fixed loads... Done\n"); + } + if (FencesInserted > 0) + ++NumFunctionsMitigated; + NumFences += FencesInserted; + return (FencesInserted > 0); +} + +// Apply the mitigation to `MF`, return the number of fences inserted. +// If `FixedLoads` is `true`, then the mitigation will be applied to both fixed +// and non-fixed loads; otherwise, only non-fixed loads. +int X86LoadValueInjectionLoadHardeningPass::hardenLoads(MachineFunction &MF, + bool FixedLoads) const { + int FencesInserted = 0; + + LLVM_DEBUG(dbgs() << "Building gadget graph...\n"); + const auto &MLI = getAnalysis(); + const auto &MDT = getAnalysis(); + const auto &MDF = getAnalysis(); + std::unique_ptr Graph = + getGadgetGraph(MF, MLI, MDT, MDF, FixedLoads); + LLVM_DEBUG(dbgs() << "Building gadget graph... Done\n"); + if (Graph == nullptr) + return 0; // didn't find any gadgets + + if (EmitDotVerify) { + WriteGraph(outs(), Graph.get()); + return 0; + } + + if (EmitDot || EmitDotOnly) { + LLVM_DEBUG(dbgs() << "Emitting gadget graph...\n"); + std::error_code FileError; + std::string FileName = "lvi."; + if (FixedLoads) + FileName += "fixed."; + FileName += Graph->getMF().getName(); + FileName += ".dot"; + raw_fd_ostream FileOut(FileName, FileError); + if (FileError) + errs() << FileError.message(); + WriteGraph(FileOut, Graph.get()); + FileOut.close(); + LLVM_DEBUG(dbgs() << "Emitting gadget graph... Done\n"); + if (EmitDotOnly) + return 0; + } + + do { + LLVM_DEBUG(dbgs() << "Eliminating mitigated paths...\n"); + std::unique_ptr ElimGraph = elimEdges(std::move(Graph)); + LLVM_DEBUG(dbgs() << "Eliminating mitigated paths... Done\n"); + if (ElimGraph->NumGadgets == 0) + break; + + EdgeSet CutEdges{*ElimGraph}; + LLVM_DEBUG(dbgs() << "Cutting edges...\n"); + cutEdges(*ElimGraph, CutEdges); + LLVM_DEBUG(dbgs() << "Cutting edges... Done\n"); + + LLVM_DEBUG(dbgs() << "Inserting LFENCEs...\n"); + FencesInserted += insertFences(*ElimGraph, CutEdges); + LLVM_DEBUG(dbgs() << "Inserting LFENCEs... Done\n"); + + Graph.reset(GraphBuilder::trim( + *ElimGraph, MachineGadgetGraph::NodeSet{*ElimGraph}, CutEdges)); + } while (true); + + return FencesInserted; +} + +std::unique_ptr +X86LoadValueInjectionLoadHardeningPass::getGadgetGraph( + MachineFunction &MF, const MachineLoopInfo &MLI, + const MachineDominatorTree &MDT, const MachineDominanceFrontier &MDF, + bool FixedLoads) const { + using namespace rdf; + + // Build the Register Dataflow Graph using the RDF framework + TargetOperandInfo TOI{*TII}; + DataFlowGraph DFG{MF, *TII, *TRI, MDT, MDF, TOI}; + DFG.build(); + Liveness L{MF.getRegInfo(), DFG}; + L.computePhiInfo(); + + GraphBuilder Builder; + using GraphIter = typename GraphBuilder::NodeRef; + DenseMap NodeMap; + int FenceCount = 0; + auto MaybeAddNode = [&NodeMap, &Builder](MachineInstr *MI) { + auto Ref = NodeMap.find(MI); + if (Ref == NodeMap.end()) { + auto I = Builder.addVertex(MI); + NodeMap[MI] = I; + return std::pair{I, true}; + } else { + return std::pair{Ref->getSecond(), false}; + } + }; + + // Analyze all machine instructions to find gadgets and LFENCEs, adding + // each interesting value to `Nodes` + DenseSet> GadgetEdgeSet; + auto AnalyzeDef = [&](NodeAddr Def) { + MachineInstr *MI = Def.Addr->getFlags() & NodeAttrs::PhiRef + ? ARG_NODE + : Def.Addr->getOp().getParent(); + auto AnalyzeUse = [&](NodeAddr Use) { + assert(!(Use.Addr->getFlags() & NodeAttrs::PhiRef)); + MachineOperand &UseMO = Use.Addr->getOp(); + MachineInstr &UseMI = *UseMO.getParent(); + assert(UseMO.isReg()); + // We naively assume that an instruction propagates any loaded Uses + // to all Defs, unless the instruction is a call + if (UseMI.isCall()) + return false; + if (instrUsesRegToAccessMemory(UseMI, UseMO.getReg()) || + (!NoConditionalBranches && + instrUsesRegToBranch(UseMI, UseMO.getReg()))) { // found a gadget! + // add the root of this chain + auto GadgetBegin = MaybeAddNode(MI); + // and the instruction that (transitively) discloses the root + auto GadgetEnd = MaybeAddNode(&UseMI); + if (GadgetEdgeSet.insert({GadgetBegin.first, GadgetEnd.first}).second) + Builder.addEdge(GADGET_EDGE, GadgetBegin.first, GadgetEnd.first); + if (UseMI.mayLoad()) // FIXME: This should be more precise + return false; // stop traversing further uses of `Reg` + } + return true; + }; + SmallSet NodesVisited; + std::function)> AnalyzeDefUseChain = + [&](NodeAddr Def) { + if (Def.Addr->getAttrs() & NodeAttrs::Dead) + return; + RegisterRef DefReg = DFG.getPRI().normalize(Def.Addr->getRegRef(DFG)); + NodeList Uses; + for (auto UseID : L.getAllReachedUses(DefReg, Def)) { + auto Use = DFG.addr(UseID); + if (Use.Addr->getFlags() & NodeAttrs::PhiRef) { // phi node + NodeAddr Phi = Use.Addr->getOwner(DFG); + for (auto I : L.getRealUses(Phi.Id)) { + if (DFG.getPRI().alias(RegisterRef(I.first), DefReg)) { + for (auto UA : I.second) { + auto PhiUse = DFG.addr(UA.first); + Uses.push_back(PhiUse); + } + } + } + } else { // not a phi node + Uses.push_back(Use); + } + } + for (auto N : Uses) { + NodeAddr Use{N}; + if (NodesVisited.insert(Use.Id).second && AnalyzeUse(Use)) { + NodeAddr Owner{Use.Addr->getOwner(DFG)}; + NodeList Defs = Owner.Addr->members_if(DataFlowGraph::IsDef, DFG); + std::for_each(Defs.begin(), Defs.end(), AnalyzeDefUseChain); + } + } + }; + AnalyzeDefUseChain(Def); + }; + + LLVM_DEBUG(dbgs() << "Analyzing def-use chains to find gadgets\n"); + // Analyze function arguments + if (!FixedLoads) { // only need to analyze function args once + NodeAddr EntryBlock = DFG.getFunc().Addr->getEntryBlock(DFG); + for (NodeAddr ArgPhi : + EntryBlock.Addr->members_if(DataFlowGraph::IsPhi, DFG)) { + NodeList Defs = ArgPhi.Addr->members_if(DataFlowGraph::IsDef, DFG); + std::for_each(Defs.begin(), Defs.end(), AnalyzeDef); + } + } + // Analyze every instruction in MF + for (NodeAddr BA : DFG.getFunc().Addr->members(DFG)) { + for (NodeAddr SA : + BA.Addr->members_if(DataFlowGraph::IsCode, DFG)) { + MachineInstr *MI = SA.Addr->getCode(); + if (isFence(MI)) { + MaybeAddNode(MI); + ++FenceCount; + } else if (MI->mayLoad() && ((FixedLoads && instrIsFixedAccess(*MI)) || + (!FixedLoads && !instrIsFixedAccess(*MI)))) { + NodeList Defs = SA.Addr->members_if(DataFlowGraph::IsDef, DFG); + std::for_each(Defs.begin(), Defs.end(), AnalyzeDef); + } + } + } + int GadgetCount = static_cast(GadgetEdgeSet.size()); + LLVM_DEBUG(dbgs() << "Found " << FenceCount << " fences\n"); + LLVM_DEBUG(dbgs() << "Found " << GadgetCount << " gadgets\n"); + if (GadgetCount == 0) + return nullptr; + NumGadgets += GadgetCount; + + // Traverse CFG to build the rest of the graph + SmallSet BlocksVisited; + std::function TraverseCFG = + [&](MachineBasicBlock *MBB, GraphIter GI, unsigned ParentDepth) { + unsigned LoopDepth = MLI.getLoopDepth(MBB); + if (!MBB->empty()) { + // Always add the first instruction in each block + auto NI = MBB->begin(); + auto BeginBB = MaybeAddNode(&*NI); + Builder.addEdge(ParentDepth, GI, BeginBB.first); + if (!BlocksVisited.insert(MBB).second) + return; + + // Add any instructions within the block that are gadget components + GI = BeginBB.first; + while (++NI != MBB->end()) { + auto Ref = NodeMap.find(&*NI); + if (Ref != NodeMap.end()) { + Builder.addEdge(LoopDepth, GI, Ref->getSecond()); + GI = Ref->getSecond(); + } + } + + // Always add the terminator instruction, if one exists + auto T = MBB->getFirstTerminator(); + if (T != MBB->end()) { + auto EndBB = MaybeAddNode(&*T); + if (EndBB.second) + Builder.addEdge(LoopDepth, GI, EndBB.first); + GI = EndBB.first; + } + } + for (MachineBasicBlock *Succ : MBB->successors()) + TraverseCFG(Succ, GI, LoopDepth); + }; + // ARG_NODE is a pseudo-instruction that represents MF args in the GadgetGraph + GraphIter ArgNode = MaybeAddNode(ARG_NODE).first; + TraverseCFG(&MF.front(), ArgNode, 0); + std::unique_ptr G{Builder.get(FenceCount, GadgetCount)}; + LLVM_DEBUG(dbgs() << "Found " << GTraits::size(G.get()) << " nodes\n"); + return G; +} + +std::unique_ptr +X86LoadValueInjectionLoadHardeningPass::elimEdges( + std::unique_ptr Graph) const { + MachineGadgetGraph::NodeSet ElimNodes{*Graph}; + MachineGadgetGraph::EdgeSet ElimEdges{*Graph}; + + if (Graph->NumFences > 0) { // eliminate fences + for (auto EI = Graph->edges_begin(), EE = Graph->edges_end(); EI != EE; + ++EI) { + GTraits::NodeRef Dest = GTraits::edge_dest(*EI); + if (isFence(Dest->value())) { + ElimNodes.insert(Dest); + ElimEdges.insert(EI); + std::for_each( + GTraits::child_edge_begin(Dest), GTraits::child_edge_end(Dest), + [&ElimEdges](GTraits::EdgeRef E) { ElimEdges.insert(&E); }); + } + } + LLVM_DEBUG(dbgs() << "Eliminated " << ElimNodes.count() + << " fence nodes\n"); + } + + // eliminate gadget edges that are mitigated + int NumGadgets = 0; + MachineGadgetGraph::NodeSet Visited{*Graph}, GadgetSinks{*Graph}; + MachineGadgetGraph::EdgeSet ElimGadgets{*Graph}; + for (auto NI = GTraits::nodes_begin(Graph.get()), + NE = GTraits::nodes_end(Graph.get()); + NI != NE; ++NI) { + // collect the gadgets for this node + for (auto EI = GTraits::child_edge_begin(*NI), + EE = GTraits::child_edge_end(*NI); + EI != EE; ++EI) { + if (MachineGadgetGraph::isGadgetEdge(*EI)) { + ++NumGadgets; + ElimGadgets.insert(EI); + GadgetSinks.insert(GTraits::edge_dest(*EI)); + } + } + if (GadgetSinks.empty()) + continue; + std::function TraverseDFS = + [&](GTraits::NodeRef N, bool FirstNode) { + if (!FirstNode) { + Visited.insert(N); + if (GadgetSinks.contains(N)) { + for (auto CEI = GTraits::child_edge_begin(*NI), + CEE = GTraits::child_edge_end(*NI); + CEI != CEE; ++CEI) { + if (MachineGadgetGraph::isGadgetEdge(*CEI) && + GTraits::edge_dest(*CEI) == N) + ElimGadgets.erase(CEI); + } + } + } + for (auto CEI = GTraits::child_edge_begin(N), + CEE = GTraits::child_edge_end(N); + CEI != CEE; ++CEI) { + GTraits::NodeRef Dest = GTraits::edge_dest(*CEI); + if (MachineGadgetGraph::isCFGEdge(*CEI) && + !Visited.contains(Dest) && !ElimEdges.contains(CEI)) + TraverseDFS(Dest, false); + } + }; + TraverseDFS(*NI, true); + Visited.clear(); + GadgetSinks.clear(); + } + LLVM_DEBUG(dbgs() << "Eliminated " << ElimGadgets.count() + << " gadget edges\n"); + ElimEdges |= ElimGadgets; + + if (!(ElimEdges.empty() && ElimNodes.empty())) { + int NumRemainingGadgets = NumGadgets - ElimGadgets.count(); + Graph.reset(GraphBuilder::trim(*Graph, ElimNodes, ElimEdges, + 0 /* NumFences */, NumRemainingGadgets)); + } else { + Graph->NumFences = 0; + Graph->NumGadgets = NumGadgets; + } + return Graph; +} + +void X86LoadValueInjectionLoadHardeningPass::cutEdges( + MachineGadgetGraph &G, + MachineGadgetGraph::EdgeSet &CutEdges /* out */) const { + if (!OptimizePluginPath.empty()) { + if (!OptimizeDL.isValid()) { + std::string ErrorMsg{}; + OptimizeDL = llvm::sys::DynamicLibrary::getPermanentLibrary( + OptimizePluginPath.c_str(), &ErrorMsg); + if (!ErrorMsg.empty()) + report_fatal_error("Failed to load opt plugin: \"" + ErrorMsg + '\"'); + OptimizeCut = (OptimizeCutT)OptimizeDL.getAddressOfSymbol("optimize_cut"); + if (!OptimizeCut) + report_fatal_error("Invalid optimization plugin"); + } + auto *Nodes = new unsigned int[G.nodes_size() + 1 /* terminator node */]; + auto *Edges = new unsigned int[G.edges_size()]; + auto *EdgeCuts = new int[G.edges_size()]; + auto *EdgeValues = new int[G.edges_size()]; + for (auto *NI = G.nodes_begin(), *NE = G.nodes_end(); NI != NE; ++NI) { + Nodes[std::distance(G.nodes_begin(), NI)] = + std::distance(G.edges_begin(), GTraits::child_edge_begin(NI)); + } + Nodes[G.nodes_size()] = G.edges_size(); // terminator node + for (auto *EI = G.edges_begin(), *EE = G.edges_end(); EI != EE; ++EI) { + Edges[std::distance(G.edges_begin(), EI)] = + std::distance(G.nodes_begin(), GTraits::edge_dest(*EI)); + EdgeValues[std::distance(G.edges_begin(), EI)] = EI->value(); + } + OptimizeCut(Nodes, G.nodes_size(), Edges, EdgeValues, EdgeCuts, + G.edges_size()); + for (int I = 0; I < G.edges_size(); ++I) { + if (EdgeCuts[I]) + CutEdges.set(I); + } + delete[] Nodes; + delete[] Edges; + delete[] EdgeCuts; + delete[] EdgeValues; + } else { // Use the default greedy heuristic + // Find the cheapest CFG edge that will eliminate a gadget (by being egress + // from a SOURCE node or ingress to a SINK node), and cut it. + MachineGadgetGraph::NodeSet GadgetSinks{G}; + MachineGadgetGraph::Edge *CheapestSoFar = nullptr; + for (auto NI = GTraits::nodes_begin(&G), NE = GTraits::nodes_end(&G); + NI != NE; ++NI) { + for (auto EI = GTraits::child_edge_begin(*NI), + EE = GTraits::child_edge_end(*NI); + EI != EE; ++EI) { + if (MachineGadgetGraph::isGadgetEdge(*EI)) { + // NI is a SOURCE node. Look for a cheap egress edge + for (auto EEI = GTraits::child_edge_begin(*NI); EEI != EE; ++EEI) { + if (MachineGadgetGraph::isCFGEdge(*EEI)) { + if (!CheapestSoFar || EEI->value() < CheapestSoFar->value()) + CheapestSoFar = EEI; + } + } + GadgetSinks.insert(GTraits::edge_dest(*EI)); + } else { // EI is a CFG edge + if (GadgetSinks.contains(GTraits::edge_dest(*EI))) { + // The dest is a SINK node. Hence EI is an ingress edge + if (!CheapestSoFar || EI->value() < CheapestSoFar->value()) + CheapestSoFar = EI; + } + } + } + } + assert(CheapestSoFar && "Failed to cut an edge"); + CutEdges.insert(CheapestSoFar); + } + LLVM_DEBUG(dbgs() << "Cut " << CutEdges.count() << " edges\n"); +} + +int X86LoadValueInjectionLoadHardeningPass::insertFences( + MachineGadgetGraph &G, EdgeSet &CutEdges /* in, out */) const { + int FencesInserted = 0, AdditionalEdgesCut = 0; + auto CutAllCFGEdges = [&CutEdges, &AdditionalEdgesCut](GTraits::NodeRef N) { + for (auto CEI = GTraits::child_edge_begin(N), + CEE = GTraits::child_edge_end(N); + CEI != CEE; ++CEI) { + if (MachineGadgetGraph::isCFGEdge(*CEI) && !CutEdges.contains(CEI)) { + CutEdges.insert(CEI); + ++AdditionalEdgesCut; + } + } + }; + for (auto NI = GTraits::nodes_begin(&G), NE = GTraits::nodes_end(&G); + NI != NE; ++NI) { + for (auto CEI = GTraits::child_edge_begin(*NI), + CEE = GTraits::child_edge_end(*NI); + CEI != CEE; ++CEI) { + if (CutEdges.contains(CEI)) { + MachineInstr *MI = (*NI)->value(), *Prev; + MachineBasicBlock *MBB; + MachineBasicBlock::iterator InsertionPt; + if (MI == ARG_NODE) { // insert LFENCE at beginning of entry block + MBB = &G.getMF().front(); + InsertionPt = MBB->begin(); + Prev = nullptr; + } else if (MI->isBranch()) { // insert the LFENCE before the branch + MBB = MI->getParent(); + InsertionPt = MI; + Prev = MI->getPrevNode(); + CutAllCFGEdges(*NI); + } else { // insert the LFENCE after the instruction + MBB = MI->getParent(); + InsertionPt = MI->getNextNode() ? MI->getNextNode() : MBB->end(); + Prev = InsertionPt == MBB->end() + ? (MBB->empty() ? nullptr : &MBB->back()) + : InsertionPt->getPrevNode(); + } + if ((InsertionPt == MBB->end() || !isFence(&*InsertionPt)) && + (!Prev || !isFence(Prev))) { + BuildMI(*MBB, InsertionPt, DebugLoc(), TII->get(X86::LFENCE)); + ++FencesInserted; + } + } + } + } + LLVM_DEBUG(dbgs() << "Inserted " << FencesInserted << " fences\n"); + LLVM_DEBUG(dbgs() << "Cut an additional " << AdditionalEdgesCut + << " edges during fence insertion\n"); + return FencesInserted; +} + +bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToAccessMemory( + const MachineInstr &MI, unsigned Reg) const { + if (!MI.mayLoadOrStore() || MI.getOpcode() == X86::MFENCE || + MI.getOpcode() == X86::SFENCE || MI.getOpcode() == X86::LFENCE) + return false; + + // FIXME: This does not handle pseudo loading instruction like TCRETURN* + const MCInstrDesc &Desc = MI.getDesc(); + int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags); + if (MemRefBeginIdx < 0) { + LLVM_DEBUG(dbgs() << "Warning: unable to obtain memory operand for loading " + "instruction:\n"; + MI.print(dbgs()); dbgs() << '\n';); + return false; + } + MemRefBeginIdx += X86II::getOperandBias(Desc); + + const MachineOperand &BaseMO = + MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg); + const MachineOperand &IndexMO = + MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg); + return (BaseMO.isReg() && BaseMO.getReg() != X86::NoRegister && + TRI->regsOverlap(BaseMO.getReg(), Reg)) || + (IndexMO.isReg() && IndexMO.getReg() != X86::NoRegister && + TRI->regsOverlap(IndexMO.getReg(), Reg)); +} + +bool X86LoadValueInjectionLoadHardeningPass::instrUsesRegToBranch( + const MachineInstr &MI, unsigned Reg) const { + if (!MI.isConditionalBranch()) + return false; + for (const MachineOperand &Use : MI.uses()) + if (Use.isReg() && Use.getReg() == Reg) + return true; + return false; +} + +template +bool X86LoadValueInjectionLoadHardeningPass::hasLoadFrom( + const MachineInstr &MI) const { + for (auto &MMO : MI.memoperands()) { + const PseudoSourceValue *PSV = MMO->getPseudoValue(); + if (PSV && PSV->kind() == K && MMO->isLoad()) + return true; + } + return false; +} + +bool X86LoadValueInjectionLoadHardeningPass::instrAccessesStackSlot( + const MachineInstr &MI) const { + // Check the PSV first + if (hasLoadFrom(MI)) + return true; + // Some loads are not marked with a PSV, so we always need to double check + const MCInstrDesc &Desc = MI.getDesc(); + int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags); + if (MemRefBeginIdx < 0) + return false; + MemRefBeginIdx += X86II::getOperandBias(Desc); + return MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg).isFI() && + MI.getOperand(MemRefBeginIdx + X86::AddrScaleAmt).isImm() && + MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg).isReg() && + MI.getOperand(MemRefBeginIdx + X86::AddrDisp).isImm() && + MI.getOperand(MemRefBeginIdx + X86::AddrScaleAmt).getImm() == 1 && + MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg).getReg() == + X86::NoRegister && + MI.getOperand(MemRefBeginIdx + X86::AddrDisp).getImm() == 0; +} + +bool X86LoadValueInjectionLoadHardeningPass::instrAccessesConstantPool( + const MachineInstr &MI) const { + if (hasLoadFrom(MI)) + return true; + const MCInstrDesc &Desc = MI.getDesc(); + int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags); + if (MemRefBeginIdx < 0) + return false; + MemRefBeginIdx += X86II::getOperandBias(Desc); + return MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg).isReg() && + MI.getOperand(MemRefBeginIdx + X86::AddrScaleAmt).isImm() && + MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg).isReg() && + MI.getOperand(MemRefBeginIdx + X86::AddrDisp).isCPI() && + (MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg).getReg() == + X86::RIP || + MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg).getReg() == + X86::NoRegister) && + MI.getOperand(MemRefBeginIdx + X86::AddrScaleAmt).getImm() == 1 && + MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg).getReg() == + X86::NoRegister; +} + +bool X86LoadValueInjectionLoadHardeningPass::instrAccessesGOT( + const MachineInstr &MI) const { + if (hasLoadFrom(MI)) + return true; + const MCInstrDesc &Desc = MI.getDesc(); + int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags); + if (MemRefBeginIdx < 0) + return false; + MemRefBeginIdx += X86II::getOperandBias(Desc); + return MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg).isReg() && + MI.getOperand(MemRefBeginIdx + X86::AddrScaleAmt).isImm() && + MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg).isReg() && + MI.getOperand(MemRefBeginIdx + X86::AddrDisp).getTargetFlags() == + X86II::MO_GOTPCREL && + MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg).getReg() == + X86::RIP && + MI.getOperand(MemRefBeginIdx + X86::AddrScaleAmt).getImm() == 1 && + MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg).getReg() == + X86::NoRegister; +} + +INITIALIZE_PASS_BEGIN(X86LoadValueInjectionLoadHardeningPass, PASS_KEY, + "X86 LVI load hardening", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier) +INITIALIZE_PASS_END(X86LoadValueInjectionLoadHardeningPass, PASS_KEY, + "X86 LVI load hardening", false, false) + +FunctionPass *llvm::createX86LoadValueInjectionLoadHardeningPass() { + return new X86LoadValueInjectionLoadHardeningPass(); +} Index: llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp =================================================================== --- /dev/null +++ llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp @@ -0,0 +1,141 @@ +//===-- X86LoadValueInjectionRetHardening.cpp - LVI RET hardening for x86 --==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Description: Replaces every `ret` instruction with the sequence: +/// ``` +/// pop +/// lfence +/// jmp * +/// ``` +/// where `` is some available scratch register, according to the +/// calling convention of the function being mitigated. +/// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86Subtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include + +using namespace llvm; + +#define PASS_KEY "x86-lvi-ret" +#define DEBUG_TYPE PASS_KEY + +STATISTIC(NumFences, "Number of LFENCEs inserted for LVI mitigation"); +STATISTIC(NumFunctionsConsidered, "Number of functions analyzed"); +STATISTIC(NumFunctionsMitigated, "Number of functions for which mitigations " + "were deployed"); + +namespace { + +class X86LoadValueInjectionRetHardeningPass : public MachineFunctionPass { +public: + X86LoadValueInjectionRetHardeningPass() : MachineFunctionPass(ID) {} + StringRef getPassName() const override { + return "X86 Load Value Injection (LVI) Ret-Hardening Pass"; + } + bool runOnMachineFunction(MachineFunction &MF) override; + + static char ID; +}; + +} // end anonymous namespace + +char X86LoadValueInjectionRetHardeningPass::ID = 0; + +bool X86LoadValueInjectionRetHardeningPass::runOnMachineFunction( + MachineFunction &MF) { + const X86Subtarget *Subtarget = &MF.getSubtarget(); + if (!Subtarget->useLVIControlFlowIntegrity() || + !(Subtarget->hasSSE2() || Subtarget->is64Bit())) + return false; // FIXME: support 32-bit + + // Don't skip functions with the "optnone" attr but participate in opt-bisect. + const Function &F = MF.getFunction(); + if (!F.hasOptNone() && skipFunction(F)) + return false; + + LLVM_DEBUG(dbgs() << "***** " << getPassName() << " : " << MF.getName() + << " *****\n"); + ++NumFunctionsConsidered; + const X86RegisterInfo *TRI = Subtarget->getRegisterInfo(); + const X86InstrInfo *TII = Subtarget->getInstrInfo(); + unsigned ClobberReg = X86::NoRegister; + std::bitset UnclobberableGR64s; + UnclobberableGR64s.set(X86::RSP); // can't clobber stack pointer + UnclobberableGR64s.set(X86::RIP); // can't clobber instruction pointer + UnclobberableGR64s.set(X86::RAX); // used for function return + UnclobberableGR64s.set(X86::RDX); // used for function return + + // We can clobber any register allowed by the function's calling convention. + for (const MCPhysReg *PR = TRI->getCalleeSavedRegs(&MF); auto Reg = *PR; ++PR) + UnclobberableGR64s.set(Reg); + for (auto &Reg : X86::GR64RegClass) { + if (!UnclobberableGR64s.test(Reg)) { + ClobberReg = Reg; + break; + } + } + + if (ClobberReg != X86::NoRegister) { + LLVM_DEBUG(dbgs() << "Selected register " + << Subtarget->getRegisterInfo()->getRegAsmName(ClobberReg) + << " to clobber\n"); + } else { + LLVM_DEBUG(dbgs() << "Could not find a register to clobber\n"); + } + + bool Modified = false; + for (auto &MBB : MF) { + MachineInstr &MI = MBB.back(); + if (MI.getOpcode() != X86::RETQ) + continue; + + if (ClobberReg != X86::NoRegister) { + MBB.erase_instr(&MI); + BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::POP64r)) + .addReg(ClobberReg, RegState::Define) + .setMIFlag(MachineInstr::FrameDestroy); + BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::LFENCE)); + BuildMI(MBB, MBB.end(), DebugLoc(), TII->get(X86::JMP64r)) + .addReg(ClobberReg); + } else { + // In case there is no available scratch register, we can still read from + // RSP to assert that RSP points to a valid page. The write to RSP is + // also helpful because it verifies that the stack's write permissions + // are intact. + MachineInstr *Fence = BuildMI(MBB, MI, DebugLoc(), TII->get(X86::LFENCE)); + addRegOffset(BuildMI(MBB, Fence, DebugLoc(), TII->get(X86::SHL64mi)), + X86::RSP, false, 0) + .addImm(0) + ->addRegisterDead(X86::EFLAGS, TRI); + } + + ++NumFences; + Modified = true; + } + + if (Modified) + ++NumFunctionsMitigated; + return Modified; +} + +INITIALIZE_PASS(X86LoadValueInjectionRetHardeningPass, PASS_KEY, + "X86 LVI ret hardener", false, false) + +FunctionPass *llvm::createX86LoadValueInjectionRetHardeningPass() { + return new X86LoadValueInjectionRetHardeningPass(); +} Index: llvm/lib/Target/X86/X86MCInstLower.cpp =================================================================== --- llvm/lib/Target/X86/X86MCInstLower.cpp +++ llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1223,7 +1223,8 @@ break; case MachineOperand::MO_Register: // FIXME: Add retpoline support and remove this. - if (Subtarget->useRetpolineIndirectCalls()) + if (Subtarget->useRetpolineIndirectCalls() || + Subtarget->useLVIControlFlowIntegrity()) report_fatal_error("Lowering register statepoints with retpoline not " "yet implemented."); CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); @@ -1402,7 +1403,8 @@ EmitAndCountInstruction( MCInstBuilder(X86::MOV64ri).addReg(ScratchReg).addOperand(CalleeMCOp)); // FIXME: Add retpoline support and remove this. - if (Subtarget->useRetpolineIndirectCalls()) + if (Subtarget->useRetpolineIndirectCalls() || + Subtarget->useLVIControlFlowIntegrity()) report_fatal_error( "Lowering patchpoint with retpoline not yet implemented."); EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg)); Index: llvm/lib/Target/X86/X86Subtarget.h =================================================================== --- llvm/lib/Target/X86/X86Subtarget.h +++ llvm/lib/Target/X86/X86Subtarget.h @@ -421,6 +421,16 @@ /// than emitting one inside the compiler. bool UseRetpolineExternalThunk = false; + /// Prevent generation of indirect call/branch instructions from memory, + /// and force all indirect call/branch instructions from a register to be + /// preceded by an LFENCE. Also decompose RET instructions into a + /// POP+LFENCE+JMP sequence. + bool UseLVIControlFlowIntegrity = false; + + /// Insert LFENCE instructions to prevent data speculatively injected into + /// loads from being used maliciously. + bool UseLVILoadHardening = false; + /// Use software floating point for code generation. bool UseSoftFloat = false; @@ -709,6 +719,8 @@ bool useRetpolineExternalThunk() const { return UseRetpolineExternalThunk; } bool preferMaskRegisters() const { return PreferMaskRegisters; } bool useGLMDivSqrtCosts() const { return UseGLMDivSqrtCosts; } + bool useLVIControlFlowIntegrity() const { return UseLVIControlFlowIntegrity; } + bool useLVILoadHardening() const { return UseLVILoadHardening; } unsigned getPreferVectorWidth() const { return PreferVectorWidth; } unsigned getRequiredVectorWidth() const { return RequiredVectorWidth; } @@ -856,7 +868,7 @@ /// If we are using retpolines, we need to expand indirectbr to avoid it /// lowering to an actual indirect jump. bool enableIndirectBrExpand() const override { - return useRetpolineIndirectBranches(); + return useRetpolineIndirectBranches() || useLVIControlFlowIntegrity(); } /// Enable the MachineScheduler pass for all X86 subtargets. Index: llvm/lib/Target/X86/X86TargetMachine.cpp =================================================================== --- llvm/lib/Target/X86/X86TargetMachine.cpp +++ llvm/lib/Target/X86/X86TargetMachine.cpp @@ -82,6 +82,9 @@ initializeX86SpeculativeLoadHardeningPassPass(PR); initializeX86FlagsCopyLoweringPassPass(PR); initializeX86CondBrFoldingPassPass(PR); + initializeX86LoadValueInjectionIndirectThunksPassPass(PR); + initializeX86LoadValueInjectionLoadHardeningPassPass(PR); + initializeX86LoadValueInjectionRetHardeningPassPass(PR); initializeX86OptimizeLEAPassPass(PR); } @@ -496,6 +499,7 @@ void X86PassConfig::addPostRegAlloc() { addPass(createX86FloatingPointStackifierPass()); + addPass(createX86LoadValueInjectionLoadHardeningPass()); } void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); } @@ -527,6 +531,7 @@ const MCAsmInfo *MAI = TM->getMCAsmInfo(); addPass(createX86RetpolineThunksPass()); + addPass(createX86LoadValueInjectionIndirectThunksPass()); // Insert extra int3 instructions after trailing call instructions to avoid // issues in the unwinder. @@ -543,6 +548,7 @@ // Identify valid longjmp targets for Windows Control Flow Guard. if (TT.isOSWindows()) addPass(createCFGuardLongjmpPass()); + addPass(createX86LoadValueInjectionRetHardeningPass()); } std::unique_ptr X86PassConfig::getCSEConfig() const { Index: llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp =================================================================== --- llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -1215,7 +1215,8 @@ // Jump tables are only profitable if the retpoline mitigation is enabled. Attribute FSAttr = CS.getCaller()->getFnAttribute("target-features"); if (FSAttr.hasAttribute(Attribute::None) || - !FSAttr.getValueAsString().contains("+retpoline")) + !(FSAttr.getValueAsString().contains("+retpoline") || + FSAttr.getValueAsString().contains("+lvi-cfi"))) continue; if (RemarksEnabled) Index: llvm/test/CodeGen/X86/O0-pipeline.ll =================================================================== --- llvm/test/CodeGen/X86/O0-pipeline.ll +++ llvm/test/CodeGen/X86/O0-pipeline.ll @@ -55,6 +55,10 @@ ; CHECK-NEXT: Fast Register Allocator ; CHECK-NEXT: Bundle Machine CFG Edges ; CHECK-NEXT: X86 FP Stackifier +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: Machine Natural Loop Construction +; CHECK-NEXT: Machine Dominance Frontier Construction +; CHECK-NEXT: X86 Load Value Injection (LVI) Load Hardening Pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: Prologue/Epilogue Insertion & Frame Finalization @@ -73,7 +77,9 @@ ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: X86 Retpoline Thunks +; CHECK-NEXT: X86 Load Value Injection (LVI) Indirect Thunks Pass ; CHECK-NEXT: Check CFA info and insert CFI instructions if needed +; CHECK-NEXT: X86 Load Value Injection (LVI) Ret-Hardening Pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: X86 Assembly Printer Index: llvm/test/CodeGen/X86/O3-pipeline.ll =================================================================== --- llvm/test/CodeGen/X86/O3-pipeline.ll +++ llvm/test/CodeGen/X86/O3-pipeline.ll @@ -138,9 +138,11 @@ ; CHECK-NEXT: Machine Loop Invariant Code Motion ; CHECK-NEXT: Bundle Machine CFG Edges ; CHECK-NEXT: X86 FP Stackifier +; CHECK-NEXT: MachineDominator Tree Construction +; CHECK-NEXT: Machine Dominance Frontier Construction +; CHECK-NEXT: X86 Load Value Injection (LVI) Load Hardening Pass ; CHECK-NEXT: PostRA Machine Sink ; CHECK-NEXT: Machine Block Frequency Analysis -; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: MachinePostDominator Tree Construction ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter @@ -182,7 +184,9 @@ ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: X86 Retpoline Thunks +; CHECK-NEXT: X86 Load Value Injection (LVI) Indirect Thunks Pass ; CHECK-NEXT: Check CFA info and insert CFI instructions if needed +; CHECK-NEXT: X86 Load Value Injection (LVI) Ret-Hardening Pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: X86 Assembly Printer Index: llvm/test/CodeGen/X86/lvi-hardening-gadget-graph.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/lvi-hardening-gadget-graph.ll @@ -0,0 +1,129 @@ +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -x86-lvi-load-dot-verify -o %t < %s | FileCheck %s + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local i32 @test(i32* %untrusted_user_ptr, i32* %secret, i32 %secret_size) #0 { +entry: + %untrusted_user_ptr.addr = alloca i32*, align 8 + %secret.addr = alloca i32*, align 8 + %secret_size.addr = alloca i32, align 4 + %ret_val = alloca i32, align 4 + %i = alloca i32, align 4 + store i32* %untrusted_user_ptr, i32** %untrusted_user_ptr.addr, align 8 + store i32* %secret, i32** %secret.addr, align 8 + store i32 %secret_size, i32* %secret_size.addr, align 4 + store i32 0, i32* %ret_val, align 4 + call void @llvm.x86.sse2.lfence() + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4 + %1 = load i32, i32* %secret_size.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %2 = load i32, i32* %i, align 4 + %rem = srem i32 %2, 2 + %cmp1 = icmp eq i32 %rem, 0 + br i1 %cmp1, label %if.then, label %if.else + +if.then: ; preds = %for.body + %3 = load i32*, i32** %secret.addr, align 8 + %4 = load i32, i32* %ret_val, align 4 + %idxprom = sext i32 %4 to i64 + %arrayidx = getelementptr inbounds i32, i32* %3, i64 %idxprom + %5 = load i32, i32* %arrayidx, align 4 + %6 = load i32*, i32** %untrusted_user_ptr.addr, align 8 + store i32 %5, i32* %6, align 4 + br label %if.end + +if.else: ; preds = %for.body + %7 = load i32*, i32** %secret.addr, align 8 + %8 = load i32, i32* %ret_val, align 4 + %idxprom2 = sext i32 %8 to i64 + %arrayidx3 = getelementptr inbounds i32, i32* %7, i64 %idxprom2 + store i32 42, i32* %arrayidx3, align 4 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %9 = load i32*, i32** %untrusted_user_ptr.addr, align 8 + %10 = load i32, i32* %9, align 4 + store i32 %10, i32* %ret_val, align 4 + br label %for.inc + +for.inc: ; preds = %if.end + %11 = load i32, i32* %i, align 4 + %inc = add nsw i32 %11, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %12 = load i32, i32* %ret_val, align 4 + ret i32 %12 +} + +; CHECK: digraph "Speculative gadgets for \"test\" function" { +; CHECK-NEXT: label="Speculative gadgets for \"test\" function"; +; CHECK: Node0x{{[0-9a-f]+}} [shape=record,color = green,label="{LFENCE\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 0]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $eax = MOV32rm %stack.4.i, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.i)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{JCC_1 %bb.6, 13, implicit killed $eflags\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{CMP32rm killed renamable $eax, %stack.2.secret_size.addr, 1, $noreg, 0, $noreg, implicit-def $eflags :: (dereferenceable load 4 from %ir.secret_size.addr)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $eax = MOV32rm %stack.4.i, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.i)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{JCC_1 %bb.4, 5, implicit killed $eflags\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rax = MOV64rm %stack.1.secret.addr, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.secret.addr)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $eax = MOV32rm killed renamable $rax, 4, killed renamable $rcx, 0, $noreg :: (load 4 from %ir.arrayidx)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rcx = MOVSX64rm32 %stack.3.ret_val, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.ret_val)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rcx = MOV64rm %stack.0.untrusted_user_ptr.addr, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.untrusted_user_ptr.addr)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{MOV32mr killed renamable $rcx, 1, $noreg, 0, $noreg, killed renamable $eax :: (store 4 into %ir.6)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rax = MOV64rm %stack.1.secret.addr, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.secret.addr)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{MOV32mi killed renamable $rax, 4, killed renamable $rcx, 0, $noreg, 42 :: (store 4 into %ir.arrayidx3)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rcx = MOVSX64rm32 %stack.3.ret_val, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.ret_val)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $rax = MOV64rm %stack.0.untrusted_user_ptr.addr, 1, $noreg, 0, $noreg :: (dereferenceable load 8 from %ir.untrusted_user_ptr.addr)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[color = red, style = "dashed"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $eax = MOV32rm killed renamable $rax, 1, $noreg, 0, $noreg :: (load 4 from %ir.9)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,color = blue,label="{ARGS}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 0]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{MOV64mr %stack.0.untrusted_user_ptr.addr, 1, $noreg, 0, $noreg, killed renamable $rdi :: (store 8 into %ir.untrusted_user_ptr.addr)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 0]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{JMP_1 %bb.5\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{JMP_1 %bb.1\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 1]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{renamable $eax = MOV32rm %stack.3.ret_val, 1, $noreg, 0, $noreg :: (dereferenceable load 4 from %ir.ret_val)\n}"]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} -> Node0x{{[0-9a-f]+}}[label = 0]; +; CHECK-NEXT: Node0x{{[0-9a-f]+}} [shape=record,label="{RET 0, $eax\n}"]; +; CHECK-NEXT: } + +; Function Attrs: nounwind +declare void @llvm.x86.sse2.lfence() #1 + +attributes #0 = { "target-features"="+lvi-cfi" + "target-features"="+lvi-load-hardening" } +attributes #1 = { nounwind } Index: llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/lvi-hardening-indirectbr.ll @@ -0,0 +1,282 @@ +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --check-prefix=X64 +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --check-prefix=X64FAST +; +; Note that a lot of this code was lifted from retpoline.ll. + +declare void @bar(i32) + +; Test a simple indirect call and tail call. +define void @icall_reg(void (i32)* %fp, i32 %x) #0 { +entry: + tail call void @bar(i32 %x) + tail call void %fp(i32 %x) + tail call void @bar(i32 %x) + tail call void %fp(i32 %x) + ret void +} + +; X64-LABEL: icall_reg: +; X64-DAG: movq %rdi, %[[fp:[^ ]*]] +; X64-DAG: movl %esi, %[[x:[^ ]*]] +; X64: movl %esi, %edi +; X64: callq bar +; X64-DAG: movl %[[x]], %edi +; X64-DAG: movq %[[fp]], %r11 +; X64: callq __x86_indirect_thunk_r11 +; X64: movl %[[x]], %edi +; X64: callq bar +; X64-DAG: movl %[[x]], %edi +; X64-DAG: movq %[[fp]], %r11 +; X64: jmp __x86_indirect_thunk_r11 # TAILCALL + +; X64FAST-LABEL: icall_reg: +; X64FAST: callq bar +; X64FAST: callq __x86_indirect_thunk_r11 +; X64FAST: callq bar +; X64FAST: jmp __x86_indirect_thunk_r11 # TAILCALL + + +@global_fp = external global void (i32)* + +; Test an indirect call through a global variable. +define void @icall_global_fp(i32 %x, void (i32)** %fpp) #0 { + %fp1 = load void (i32)*, void (i32)** @global_fp + call void %fp1(i32 %x) + %fp2 = load void (i32)*, void (i32)** @global_fp + tail call void %fp2(i32 %x) + ret void +} + +; X64-LABEL: icall_global_fp: +; X64-DAG: movl %edi, %[[x:[^ ]*]] +; X64-DAG: movq global_fp(%rip), %r11 +; X64: callq __x86_indirect_thunk_r11 +; X64-DAG: movl %[[x]], %edi +; X64-DAG: movq global_fp(%rip), %r11 +; X64: jmp __x86_indirect_thunk_r11 # TAILCALL + +; X64FAST-LABEL: icall_global_fp: +; X64FAST: movq global_fp(%rip), %r11 +; X64FAST: callq __x86_indirect_thunk_r11 +; X64FAST: movq global_fp(%rip), %r11 +; X64FAST: jmp __x86_indirect_thunk_r11 # TAILCALL + + +%struct.Foo = type { void (%struct.Foo*)** } + +; Test an indirect call through a vtable. +define void @vcall(%struct.Foo* %obj) #0 { + %vptr_field = getelementptr %struct.Foo, %struct.Foo* %obj, i32 0, i32 0 + %vptr = load void (%struct.Foo*)**, void (%struct.Foo*)*** %vptr_field + %vslot = getelementptr void(%struct.Foo*)*, void(%struct.Foo*)** %vptr, i32 1 + %fp = load void(%struct.Foo*)*, void(%struct.Foo*)** %vslot + tail call void %fp(%struct.Foo* %obj) + tail call void %fp(%struct.Foo* %obj) + ret void +} + +; X64-LABEL: vcall: +; X64: movq %rdi, %[[obj:[^ ]*]] +; X64: movq (%rdi), %[[vptr:[^ ]*]] +; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]] +; X64: movq %[[fp]], %r11 +; X64: callq __x86_indirect_thunk_r11 +; X64-DAG: movq %[[obj]], %rdi +; X64-DAG: movq %[[fp]], %r11 +; X64: jmp __x86_indirect_thunk_r11 # TAILCALL + +; X64FAST-LABEL: vcall: +; X64FAST: callq __x86_indirect_thunk_r11 +; X64FAST: jmp __x86_indirect_thunk_r11 # TAILCALL + + +declare void @direct_callee() + +define void @direct_tail() #0 { + tail call void @direct_callee() + ret void +} + +; X64-LABEL: direct_tail: +; X64: jmp direct_callee # TAILCALL +; X64FAST-LABEL: direct_tail: +; X64FAST: jmp direct_callee # TAILCALL + + +declare void @nonlazybind_callee() #1 + +define void @nonlazybind_caller() #0 { + call void @nonlazybind_callee() + tail call void @nonlazybind_callee() + ret void +} + +; X64-LABEL: nonlazybind_caller: +; X64: movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]] +; X64: movq %[[REG]], %r11 +; X64: callq __x86_indirect_thunk_r11 +; X64: movq %[[REG]], %r11 +; X64: jmp __x86_indirect_thunk_r11 # TAILCALL +; X64FAST-LABEL: nonlazybind_caller: +; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11 +; X64FAST: callq __x86_indirect_thunk_r11 +; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11 +; X64FAST: jmp __x86_indirect_thunk_r11 # TAILCALL + + +; Check that a switch gets lowered using a jump table +define void @switch_jumptable(i32* %ptr, i64* %sink) #0 { +; X64-LABEL: switch_jumptable: +; X64_NOT: jmpq * +entry: + br label %header + +header: + %i = load volatile i32, i32* %ptr + switch i32 %i, label %bb0 [ + i32 1, label %bb1 + i32 2, label %bb2 + i32 3, label %bb3 + i32 4, label %bb4 + i32 5, label %bb5 + i32 6, label %bb6 + i32 7, label %bb7 + i32 8, label %bb8 + i32 9, label %bb9 + ] + +bb0: + store volatile i64 0, i64* %sink + br label %header + +bb1: + store volatile i64 1, i64* %sink + br label %header + +bb2: + store volatile i64 2, i64* %sink + br label %header + +bb3: + store volatile i64 3, i64* %sink + br label %header + +bb4: + store volatile i64 4, i64* %sink + br label %header + +bb5: + store volatile i64 5, i64* %sink + br label %header + +bb6: + store volatile i64 6, i64* %sink + br label %header + +bb7: + store volatile i64 7, i64* %sink + br label %header + +bb8: + store volatile i64 8, i64* %sink + br label %header + +bb9: + store volatile i64 9, i64* %sink + br label %header +} + + +@indirectbr_rewrite.targets = constant [10 x i8*] [i8* blockaddress(@indirectbr_rewrite, %bb0), + i8* blockaddress(@indirectbr_rewrite, %bb1), + i8* blockaddress(@indirectbr_rewrite, %bb2), + i8* blockaddress(@indirectbr_rewrite, %bb3), + i8* blockaddress(@indirectbr_rewrite, %bb4), + i8* blockaddress(@indirectbr_rewrite, %bb5), + i8* blockaddress(@indirectbr_rewrite, %bb6), + i8* blockaddress(@indirectbr_rewrite, %bb7), + i8* blockaddress(@indirectbr_rewrite, %bb8), + i8* blockaddress(@indirectbr_rewrite, %bb9)] + +; Check that when thunks are enabled the indirectbr instruction gets +; rewritten to use switch, and that in turn doesn't get lowered as a jump +; table. +define void @indirectbr_rewrite(i64* readonly %p, i64* %sink) #0 { +; X64-LABEL: indirectbr_rewrite: +; X64-NOT: jmpq * +entry: + %i0 = load i64, i64* %p + %target.i0 = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i0 + %target0 = load i8*, i8** %target.i0 + indirectbr i8* %target0, [label %bb1, label %bb3] + +bb0: + store volatile i64 0, i64* %sink + br label %latch + +bb1: + store volatile i64 1, i64* %sink + br label %latch + +bb2: + store volatile i64 2, i64* %sink + br label %latch + +bb3: + store volatile i64 3, i64* %sink + br label %latch + +bb4: + store volatile i64 4, i64* %sink + br label %latch + +bb5: + store volatile i64 5, i64* %sink + br label %latch + +bb6: + store volatile i64 6, i64* %sink + br label %latch + +bb7: + store volatile i64 7, i64* %sink + br label %latch + +bb8: + store volatile i64 8, i64* %sink + br label %latch + +bb9: + store volatile i64 9, i64* %sink + br label %latch + +latch: + %i.next = load i64, i64* %p + %target.i.next = getelementptr [10 x i8*], [10 x i8*]* @indirectbr_rewrite.targets, i64 0, i64 %i.next + %target.next = load i8*, i8** %target.i.next + ; Potentially hit a full 10 successors here so that even if we rewrite as + ; a switch it will try to be lowered with a jump table. + indirectbr i8* %target.next, [label %bb0, + label %bb1, + label %bb2, + label %bb3, + label %bb4, + label %bb5, + label %bb6, + label %bb7, + label %bb8, + label %bb9] +} + +; Lastly check that the necessary thunks were emitted. +; +; X64-LABEL: .section .text.__x86_indirect_thunk_r11,{{.*}},__x86_indirect_thunk_r11,comdat +; X64-NEXT: .hidden __x86_indirect_thunk_r11 +; X64-NEXT: .weak __x86_indirect_thunk_r11 +; X64: __x86_indirect_thunk_r11: +; X64-NEXT: # {{.*}} # %entry +; X64-NEXT: lfence +; X64-NEXT: jmpq *%r11 + +attributes #0 = { "target-features"="+lvi-cfi" } +attributes #1 = { nonlazybind } Index: llvm/test/CodeGen/X86/lvi-hardening-inline-asm.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/lvi-hardening-inline-asm.ll @@ -0,0 +1,135 @@ +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -mattr=+lvi-load-hardening -mattr=+lvi-cfi < %s -o %t.out 2> %t.err +; RUN: FileCheck %s --check-prefix=X86 < %t.out +; RUN: FileCheck %s --check-prefix=WARN < %t.err + +; Test module-level assembly +module asm "pop %rbx" +module asm "ret" +; WARN: warning: Instruction may be vulnerable to LVI +; WARN-NEXT: ret + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local void @test_inline_asm() { +entry: +; X86-LABEL: test_inline_asm: + call void asm sideeffect "mov 0x3fed(%rip),%rax", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: movq 16365(%rip), %rax +; X86-NEXT: lfence + call void asm sideeffect "movdqa 0x0(%rip),%xmm0", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: movdqa (%rip), %xmm0 +; X86-NEXT: lfence + call void asm sideeffect "movslq 0x3e5d(%rip),%rbx", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: movslq 15965(%rip), %rbx +; X86-NEXT: lfence + call void asm sideeffect "mov (%r12,%rax,8),%rax", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: movq (%r12,%rax,8), %rax +; X86-NEXT: lfence + call void asm sideeffect "movq (24)(%rsi), %r11", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: movq 24(%rsi), %r11 +; X86-NEXT: lfence + call void asm sideeffect "cmove %r12,%rax", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: cmoveq %r12, %rax +; X86-NOT: lfence + call void asm sideeffect "cmove (%r12),%rax", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: cmoveq (%r12), %rax +; X86-NEXT: lfence + call void asm sideeffect "pop %rbx", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: popq %rbx +; X86-NEXT: lfence + call void asm sideeffect "popq %rbx", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: popq %rbx +; X86-NEXT: lfence + call void asm sideeffect "xchg (%r12),%rax", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: xchgq %rax, (%r12) +; X86-NEXT: lfence + call void asm sideeffect "cmpxchg %r12,(%rax)", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: cmpxchgq %r12, (%rax) +; X86-NEXT: lfence + call void asm sideeffect "vpxor (%rcx,%rdx,1),%ymm1,%ymm0", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: vpxor (%rcx,%rdx), %ymm1, %ymm0 +; X86-NEXT: lfence + call void asm sideeffect "vpmuludq 0x20(%rsi),%ymm0,%ymm12", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: vpmuludq 32(%rsi), %ymm0, %ymm12 +; X86-NEXT: lfence + call void asm sideeffect "vpexpandq 0x40(%rdi),%zmm8{%k2}{z}", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: vpexpandq 64(%rdi), %zmm8 {%k2} {z} +; X86-NEXT: lfence + call void asm sideeffect "addq (%r12),%rax", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: addq (%r12), %rax +; X86-NEXT: lfence + call void asm sideeffect "subq Lpoly+0(%rip), %rax", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: subq Lpoly+0(%rip), %rax +; X86-NEXT: lfence + call void asm sideeffect "adcq %r12,(%rax)", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: adcq %r12, (%rax) +; X86-NEXT: lfence + call void asm sideeffect "negq (%rax)", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: negq (%rax) +; X86-NEXT: lfence + call void asm sideeffect "incq %rax", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: incq %rax +; X86-NOT: lfence + call void asm sideeffect "mulq (%rax)", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: mulq (%rax) +; X86-NEXT: lfence + call void asm sideeffect "imulq (%rax),%rdx", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: imulq (%rax), %rdx +; X86-NEXT: lfence + call void asm sideeffect "shlq $$1,(%rax)", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: shlq (%rax) +; X86-NEXT: lfence + call void asm sideeffect "shrq $$1,(%rax)", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: shrq (%rax) +; X86-NEXT: lfence + call void asm sideeffect "repz cmpsb %es:(%rdi),%ds:(%rsi)", "~{dirflag},~{fpsr},~{flags}"() #1 +; WARN: warning: Instruction may be vulnerable to LVI +; WARN-NEXT: repz cmpsb %es:(%rdi),%ds:(%rsi) +; X86: rep cmpsb %es:(%rdi), %ds:(%rsi) +; X86-NOT: lfence + call void asm sideeffect "repnz scasb", "~{dirflag},~{fpsr},~{flags}"() #1 +; WARN: warning: Instruction may be vulnerable to LVI +; WARN-NEXT: repnz scasb +; X86: repne scasb %es:(%rdi), %al +; X86-NOT: lfence + call void asm sideeffect "pinsrw $$0x6,(%eax),%xmm0", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: pinsrw $6, (%eax), %xmm0 +; X86-NEXT: lfence + call void asm sideeffect "ret", "~{dirflag},~{fpsr},~{flags}"() #1 +; WARN: warning: Instruction may be vulnerable to LVI +; WARN-NEXT: ret +; X86: retq +; X86-NOT: lfence + call void asm sideeffect "ret $$8", "~{dirflag},~{fpsr},~{flags}"() #1 +; WARN: warning: Instruction may be vulnerable to LVI +; WARN-NEXT: ret $8 +; X86: retq $8 +; X86-NOT: lfence + call void asm sideeffect "jmpq *(%rdx)", "~{dirflag},~{fpsr},~{flags}"() #1 +; WARN: warning: Instruction may be vulnerable to LVI +; WARN-NEXT: jmpq *(%rdx) +; X86: jmpq *(%rdx) +; X86-NOT: lfence + call void asm sideeffect "jmpq *0x100(%rdx)", "~{dirflag},~{fpsr},~{flags}"() #1 +; WARN: warning: Instruction may be vulnerable to LVI +; WARN-NEXT: jmpq *0x100(%rdx) +; X86: jmpq *256(%rdx) +; X86-NOT: lfence + call void asm sideeffect "callq *200(%rdx)", "~{dirflag},~{fpsr},~{flags}"() #1 +; WARN: warning: Instruction may be vulnerable to LVI +; WARN-NEXT: callq *200(%rdx) +; X86: callq *200(%rdx) +; X86-NOT: lfence + call void asm sideeffect "fldt 0x8(%rbp)", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: fldt 8(%rbp) +; X86-NEXT: lfence + call void asm sideeffect "fld %st(0)", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: fld %st(0) +; X86-NOT: lfence +; Test assembler macros + call void asm sideeffect ".macro mplus1 x\0Aincq (\5Cx)\0A.endm\0Amplus1 %rcx", "~{dirflag},~{fpsr},~{flags}"() #1 +; X86: incq (%rcx) +; X86-NEXT: lfence + ret void +} + +attributes #1 = { nounwind } Index: llvm/test/CodeGen/X86/lvi-hardening-loads.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/lvi-hardening-loads.ll @@ -0,0 +1,102 @@ +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-CBFX +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown --x86-lvi-load-no-fixed < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-CB +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown --x86-lvi-load-no-cbranch < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-FX +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown --x86-lvi-load-no-fixed --x86-lvi-load-no-cbranch < %s | FileCheck %s --check-prefix=X64 --check-prefix=X64-BASE + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local i32 @test(i32** %secret, i32 %secret_size) #0 { +; X64-LABEL: test: +entry: + %secret.addr = alloca i32**, align 8 + %secret_size.addr = alloca i32, align 4 + %ret_val = alloca i32, align 4 + %i = alloca i32, align 4 + store i32** %secret, i32*** %secret.addr, align 8 + store i32 %secret_size, i32* %secret_size.addr, align 4 + store i32 0, i32* %ret_val, align 4 + call void @llvm.x86.sse2.lfence() + store i32 0, i32* %i, align 4 + br label %for.cond + +; X64: # %bb.0: # %entry +; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp) +; X64-NEXT: movl $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: lfence +; X64-NEXT: movl $0, -{{[0-9]+}}(%rsp) +; X64-NEXT: jmp .LBB0_1 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4 + %1 = load i32, i32* %secret_size.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end + +; X64: .LBB0_1: # %for.cond +; X64-NEXT: # =>This Inner Loop Header: Depth=1 +; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X64-CBFX-NEXT: lfence +; X64-NEXT: cmpl -{{[0-9]+}}(%rsp), %eax +; X64-CBFX-NEXT: lfence +; X64-NEXT: jge .LBB0_5 + +for.body: ; preds = %for.cond + %2 = load i32, i32* %i, align 4 + %rem = srem i32 %2, 2 + %cmp1 = icmp eq i32 %rem, 0 + br i1 %cmp1, label %if.then, label %if.end + +; X64: # %bb.2: # %for.body +; X64-NEXT: # in Loop: Header=BB0_1 Depth=1 +; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X64-CBFX-NEXT: lfence +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: shrl $31, %ecx +; X64-NEXT: addl %eax, %ecx +; X64-NEXT: andl $-2, %ecx +; X64-NEXT: cmpl %ecx, %eax +; X64-NEXT: jne .LBB0_4 + +if.then: ; preds = %for.body + %3 = load i32**, i32*** %secret.addr, align 8 + %4 = load i32, i32* %ret_val, align 4 + %idxprom = sext i32 %4 to i64 + %arrayidx = getelementptr inbounds i32*, i32** %3, i64 %idxprom + %5 = load i32*, i32** %arrayidx, align 8 + %6 = load i32, i32* %5, align 4 + store i32 %6, i32* %ret_val, align 4 + br label %if.end + +; X64: # %bb.3: # %if.then +; X64-NEXT: # in Loop: Header=BB0_1 Depth=1 +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; X64-CBFX-NEXT: lfence +; X64-FX-NEXT: lfence +; X64-NEXT: movslq -{{[0-9]+}}(%rsp), %rcx +; X64-CBFX-NEXT: lfence +; X64-FX-NEXT: lfence +; X64-NEXT: movq (%rax,%rcx,8), %rax +; X64-NEXT: lfence +; X64-NEXT: movl (%rax), %eax +; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; X64-NEXT: jmp .LBB0_4 + +if.end: ; preds = %if.then, %for.body + br label %for.inc + +for.inc: ; preds = %if.end + %7 = load i32, i32* %i, align 4 + %inc = add nsw i32 %7, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %8 = load i32, i32* %ret_val, align 4 + ret i32 %8 +} + +; Function Attrs: nounwind +declare void @llvm.x86.sse2.lfence() #1 + +attributes #0 = { "target-features"="+lvi-load-hardening" } +attributes #1 = { nounwind } Index: llvm/test/CodeGen/X86/lvi-hardening-ret.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/lvi-hardening-ret.ll @@ -0,0 +1,72 @@ +; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s + +define dso_local void @one_instruction() #0 { +; CHECK-LABEL: one_instruction: +entry: + ret void +; CHECK-NOT: retq +; CHECK: popq %[[x:[^ ]*]] +; CHECK-NEXT: lfence +; CHECK-NEXT: jmpq *%[[x]] +} + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local i32 @ordinary_function(i32 %x, i32 %y) #0 { +; CHECK-LABEL: ordinary_function: +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %0 = load i32, i32* %x.addr, align 4 + %1 = load i32, i32* %y.addr, align 4 + %add = add nsw i32 %0, %1 + ret i32 %add +; CHECK-NOT: retq +; CHECK: popq %[[x:[^ ]*]] +; CHECK-NEXT: lfence +; CHECK-NEXT: jmpq *%[[x]] +} + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local i32 @no_caller_saved_registers_function(i32 %x, i32 %y) #1 { +; CHECK-LABEL: no_caller_saved_registers_function: +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %0 = load i32, i32* %x.addr, align 4 + %1 = load i32, i32* %y.addr, align 4 + %add = add nsw i32 %0, %1 + ret i32 %add +; CHECK-NOT: retq +; CHECK: shlq $0, (%{{[^ ]*}}) +; CHECK-NEXT: lfence +; CHECK-NEXT: retq +} + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local preserve_mostcc void @preserve_most() #0 { +; CHECK-LABEL: preserve_most: +entry: + ret void +; CHECK-NOT: retq +; CHECK: popq %r11 +; CHECK-NEXT: lfence +; CHECK-NEXT: jmpq *%r11 +} + +; Function Attrs: noinline nounwind optnone uwtable +define dso_local preserve_allcc void @preserve_all() #0 { +; CHECK-LABEL: preserve_all: +entry: + ret void +; CHECK-NOT: retq +; CHECK: popq %r11 +; CHECK-NEXT: lfence +; CHECK-NEXT: jmpq *%r11 +} + +attributes #0 = { "target-features"="+lvi-cfi" } +attributes #1 = { "no_caller_saved_registers" "target-features"="+lvi-cfi" }