Index: lib/Target/X86/CMakeLists.txt =================================================================== --- lib/Target/X86/CMakeLists.txt +++ lib/Target/X86/CMakeLists.txt @@ -46,6 +46,7 @@ X86OptimizeLEAs.cpp X86PadShortFunction.cpp X86RegisterInfo.cpp + X86SafeStackBoundsChecking.cpp X86SelectionDAGInfo.cpp X86ShuffleDecodeConstantPool.cpp X86Subtarget.cpp Index: lib/Target/X86/X86.h =================================================================== --- lib/Target/X86/X86.h +++ lib/Target/X86/X86.h @@ -94,6 +94,12 @@ void initializeEvexToVexInstPassPass(PassRegistry &); +/// This pass adds bounds checks to prevent stray writes from corrupting the +/// safe stack. +FunctionPass *createX86SafeStackBoundsCheckingPass(X86TargetMachine &TM); + +void initializeX86SafeStackBoundsCheckingPass(PassRegistry&); + } // End llvm namespace #endif Index: lib/Target/X86/X86SafeStackBoundsChecking.cpp =================================================================== --- /dev/null +++ lib/Target/X86/X86SafeStackBoundsChecking.cpp @@ -0,0 +1,365 @@ +//===-- X86SafeStackBoundsChecking.cpp - MPX-based SafeStack Hardening ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass identifies stores that are not authorized to access the safe stack +// and inserts Intel MPX bound checking instructions ahead of them if necessary +// to prevent stray pointers from being used with those instructions to corrupt +// safe stacks. Runtime support is required for initializing MPX, allocating +// safe stacks at high addresses, etc. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86Subtarget.h" +#include "X86TargetMachine.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "x86-safestack-bounds-checking" +#define DEBUG_LABEL "[X86 SafeStack Bounds] " + +namespace llvm { + +STATISTIC(NumChecks, "Total number of emitted bound checks"); +STATISTIC(NumElidedChecks, "Number of elided bound checks"); + +} // namespace llvm + +namespace { + +class X86SafeStackBoundsChecking : public FunctionPass { + X86TargetMachine *TM; + const DataLayout *DL; + const TargetLibraryInfo *TLI; + + /// Inline assembly for BNDCU instructions to check against BND0 + InlineAsm *BoundCheckIAsm; + + ObjectSizeOffsetEvaluator *ObjSizeEval; + + /// Instructions that store to safe stack allocations. + std::set AllocaStores; + + /// Find all instructions that directly or indirectly store to the specified + /// allocation and add them to AllocaStores. + void findPerAllocaStores(const Value *AllocaPtr); + + /// Invoke findPerAllocaStores for each safe stack allocation. + void findAllAllocaStores(Function &F); + + /// Insert a BNDCU instruction just ahead of InsertPt to check that the store + /// to the specified pointer with the specified length will not modify any of + /// the safe stacks. + void insertBoundCheck(Value *Ptr, Value *Length, Instruction *InsertPt); + + /// Check whether the specified instruction needs to be instrumented with a + /// bound check and insert one if necessary. + bool processInstr(Instruction *Inst); + + bool processBasicBlock(BasicBlock &BB); + +public: + static char ID; // Pass identification, replacement for typeid. + X86SafeStackBoundsChecking(X86TargetMachine *TM_) + : FunctionPass(ID), + TM(TM_), DL(nullptr), TLI(nullptr), ObjSizeEval(nullptr) { + initializeX86SafeStackBoundsCheckingPass(*PassRegistry::getPassRegistry()); + } + X86SafeStackBoundsChecking() : X86SafeStackBoundsChecking(nullptr) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + FunctionPass::getAnalysisUsage(AU); + } + + bool runOnFunction(Function &F) override; +}; // class X86SafeStackBoundsChecking + +} // anonymous namespace + +void X86SafeStackBoundsChecking::findPerAllocaStores(const Value *AllocaPtr) { + SmallPtrSet Visited; + SmallVector WorkList; + WorkList.push_back(AllocaPtr); + + // A DFS search through all uses of the alloca in bitcasts/PHI/GEPs/etc. + while (!WorkList.empty()) { + const Value *V = WorkList.pop_back_val(); + for (const Use &UI : V->uses()) { + auto I = cast(UI.getUser()); + + assert(V == UI.get()); + + if (isa(*I) || + isa(*I) || + isa(*I)) { + + Value *StoredVal; + if (isa(*I)) { + StoredVal = I->getOperand(0); + } else if (isa(*I)) { + StoredVal = I->getOperand(2); + } else { + assert(isa(*I)); + StoredVal = I->getOperand(1); + } + + assert(V != StoredVal && + "Unexpected store of safe stack allocation address."); + + AllocaStores.insert(I); + } else if (auto MI = dyn_cast(I)) { + if (MI->getRawDest() == V) + AllocaStores.insert(MI); + } else if (isa(*I) || + isa(*I) || + isa(*I) || + isa(*I)) { + + if (const GetElementPtrInst *GEP = dyn_cast(I)) + assert(GEP->getPointerOperand() == V && + "Safe stack allocation used as one of the GEP indices rather " + "than the pointer operand."); + + // This assumes that the instruction types in the condition are the only + // ones used to compute or propagate the address of the allocation. + if (Visited.insert(I).second) + WorkList.push_back(I); + } + } + } +} + +void X86SafeStackBoundsChecking::findAllAllocaStores(Function &F) { + for (Instruction &I : instructions(&F)) + if (auto AI = dyn_cast(&I)) + findPerAllocaStores(AI); + + for (Argument &Arg : F.args()) + if (Arg.hasByValAttr()) + findPerAllocaStores(&Arg); +} + +void X86SafeStackBoundsChecking::insertBoundCheck(Value *Ptr, + Value *Length, + Instruction *InsertPt) { + DEBUG(dbgs() << DEBUG_LABEL << "Insert bound check for "; + Length->print(dbgs()); dbgs() << "-byte access to "; Ptr->print(dbgs()); + dbgs() << " ahead of "; InsertPt->print(dbgs()); dbgs() << "\n"); + + // Check for accesses to thread-local (global) variables: + // FIXME: Also handle select and phi instructions. + GlobalVariable *GV = dyn_cast(Ptr); + if (GV == nullptr) { + if (GetElementPtrInst *GEP = dyn_cast(Ptr)) { + while ((GEP = dyn_cast(GEP->getPointerOperand()))); + if (GEP != nullptr) + GV = dyn_cast(GEP->getPointerOperand()); + } + } + if (GV != nullptr && + GV->getThreadLocalMode() != GlobalValue::NotThreadLocal) { + // Linear addresses for thread-local accesses are computed with a non-zero + // segment base address, so it would be necessary to check thread-local + // effective addresses against a bounds register with an upper bound that + // is adjusted down to account for that rather than the bounds register that + // is used for checking other accesses. + // However, negative offsets are sometimes used for thread-local accesses, + // which are treated as very large unsigned effective addresses. Checking + // them would require them to first be added to the base of the thread-local + // storage segment. Instead, the current implementation does not check + // accesses to thread-local storage. + DEBUG(dbgs() << DEBUG_LABEL + << "Skip bound check for access to thread-local storage\n"); + return; + } + + IRBuilder<> IRB(InsertPt); + auto GenericPtr = + IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, IRB.getInt8PtrTy()); + auto BoundGEP = IRB.CreateGEP(nullptr, GenericPtr, makeArrayRef(Length)); + IRB.CreateCall(BoundCheckIAsm, makeArrayRef(BoundGEP)); + + ++NumChecks; +} + +bool X86SafeStackBoundsChecking::processInstr(Instruction *Inst) { + if (AllocaStores.count(Inst)) + // Do not instrument stores to the safe stack + return false; + + Value *Ptr = nullptr; + Value *NeededSizeVal = nullptr; + + auto computeNeededSize = [&](Value *V) { + uint64_t S = DL->getTypeStoreSize(V->getType()); + NeededSizeVal = + ConstantInt::get(DL->getIntPtrType(Inst->getContext()), S, false); + }; + + if (StoreInst *SI = dyn_cast(Inst)) { + Ptr = SI->getPointerOperand(); + computeNeededSize(SI->getValueOperand()); + } else if (MemIntrinsic *MI = dyn_cast(Inst)) { + Ptr = MI->getDest(); + NeededSizeVal = MI->getLength(); + } else if (AtomicCmpXchgInst *AI = dyn_cast(Inst)) { + Ptr = AI->getPointerOperand(); + computeNeededSize(AI->getCompareOperand()); + } else if (AtomicRMWInst *AI = dyn_cast(Inst)) { + Ptr = AI->getPointerOperand(); + computeNeededSize(AI->getValOperand()); + } else { + llvm_unreachable("unknown Instruction type"); + } + + ConstantInt *NeededSizeConst = dyn_cast(NeededSizeVal); + + if (NeededSizeConst == nullptr) { + insertBoundCheck(Ptr, NeededSizeVal, Inst); + return true; + } + + const APInt &NeededSize = NeededSizeConst->getValue(); + + DEBUG(dbgs() << DEBUG_LABEL << "Instrument "; NeededSize.print(dbgs(), false); + dbgs() << "-byte access to "; Ptr->print(dbgs()); + dbgs() << " by "; Inst->print(dbgs()); dbgs() << "\n"); + + SizeOffsetEvalType SizeOffset = ObjSizeEval->compute(Ptr); + + if (ObjSizeEval->bothKnown(SizeOffset)) { + DEBUG(dbgs() << DEBUG_LABEL << "Size and offset are known.\n"); + + Value *Size = SizeOffset.first; + Value *Offset = SizeOffset.second; + ConstantInt *SizeCI = dyn_cast(Size); + ConstantInt *OffsetCI = dyn_cast(Offset); + + // three checks are required to ensure safety: + // . Offset >= 0 (since the offset is given from the base ptr) + // . Size >= Offset (unsigned) + // . Size - Offset >= NeededSize (unsigned) + if (SizeCI && OffsetCI) { + const APInt &SizeVal = SizeCI->getValue(); + const APInt &OffsetVal = OffsetCI->getValue(); + + DEBUG(dbgs() << DEBUG_LABEL << "Size and offset are constant: "; + SizeVal.print(dbgs(), false); dbgs() << ", "; + OffsetVal.print(dbgs(), true); dbgs() << "\n"); + + bool Overflow = false; + if (OffsetVal.isNonNegative() && + SizeVal.usub_ov(OffsetVal, Overflow).uge(NeededSize) && + Overflow == false) { + + DEBUG(dbgs() << DEBUG_LABEL << "Elided unnecessary bound check.\n"); + + ++NumElidedChecks; + + return false; + } + } + } + + insertBoundCheck(Ptr, NeededSizeVal, Inst); + return true; +} + +bool X86SafeStackBoundsChecking::processBasicBlock(BasicBlock &BB) { + // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory + // touching instructions + std::forward_list WorkList; + for (Instruction &I : BB) + if (isa(I) || isa(I) || + isa(I) || isa(I)) + WorkList.push_front(&I); + + bool MadeChange = false; + + for (Instruction *Inst : WorkList) + MadeChange |= processInstr(Inst); + + return MadeChange; +} + +bool X86SafeStackBoundsChecking::runOnFunction(Function &F) { + DL = &(F.getParent()->getDataLayout()); + TLI = &getAnalysis().getTLI(); + + if (TM == nullptr) + report_fatal_error(DEBUG_LABEL "Target machine must be provided"); + + if (!F.hasFnAttribute(Attribute::SafeStack)) { + DEBUG(dbgs() << DEBUG_LABEL << "safestack is not requested for this " + << "function\n"); + return false; + } + + const X86Subtarget &STI = TM->getSubtarget(F); + + if (!(STI.useSeparateStackSeg() && STI.is64Bit())) + return false; + + assert(STI.hasMPX() && + "64-bit separate-stack-seg feature requires Intel MPX support"); + + BoundCheckIAsm = + InlineAsm::get(FunctionType::get(Type::getVoidTy(F.getContext()), + makeArrayRef(Type::getInt8PtrTy(F.getContext(), 0)), + false), + "bndcu $0, %bnd0", "*m", false); + + ObjectSizeOffsetEvaluator TheObjSizeEval(*DL, TLI, F.getContext(), + /*RoundToAlign=*/true); + ObjSizeEval = &TheObjSizeEval; + + AllocaStores.clear(); + + findAllAllocaStores(F); + + bool MadeChange = false; + + for (auto &BB : F) + MadeChange |= processBasicBlock(BB); + + return MadeChange; +} + +char X86SafeStackBoundsChecking::ID = 0; +INITIALIZE_PASS_BEGIN(X86SafeStackBoundsChecking, + "x86-safestack-bounds-checking", + "Safe Stack bounds checking pass", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(X86SafeStackBoundsChecking, + "x86-safestack-bounds-checking", + "Safe Stack bounds checking pass", false, false) + +FunctionPass *llvm::createX86SafeStackBoundsCheckingPass(X86TargetMachine &TM) { + return new X86SafeStackBoundsChecking(&TM); +} Index: lib/Target/X86/X86TargetMachine.cpp =================================================================== --- lib/Target/X86/X86TargetMachine.cpp +++ lib/Target/X86/X86TargetMachine.cpp @@ -47,6 +47,7 @@ initializeWinEHStatePassPass(PR); initializeFixupBWInstPassPass(PR); initializeEvexToVexInstPassPass(PR); + initializeX86SafeStackBoundsCheckingPass(PR); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -324,6 +325,9 @@ } bool X86PassConfig::addInstSelector() { + // Add SafeStack bounds checking pass. + addPass(createX86SafeStackBoundsCheckingPass(getX86TargetMachine())); + // Install an instruction selector. addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel())); Index: test/CodeGen/X86/safestack-bounds-checking.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/safestack-bounds-checking.ll @@ -0,0 +1,110 @@ +; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+mpx,+separate-stack-seg -stop-after x86-safestack-bounds-checking -o - %s | FileCheck %s + +@__safestack_unsafe_stack_ptr = external thread_local(initialexec) global i8* + +; Function Attrs: nounwind safestack uwtable +define void @bad_store() #0 { +; CHECK-LABEL: @bad_store() +entry: + %unsafe_stack_ptr = load i8*, i8** @__safestack_unsafe_stack_ptr + %unsafe_stack_static_top = getelementptr i8, i8* %unsafe_stack_ptr, i32 -16 + store i8* %unsafe_stack_static_top, i8** @__safestack_unsafe_stack_ptr + %0 = getelementptr i8, i8* %unsafe_stack_ptr, i32 -4 + %a.unsafe = bitcast i8* %0 to i32* + %1 = ptrtoint i32* %a.unsafe to i64 + %2 = inttoptr i64 %1 to i64* +; CHECK: %3 = bitcast i64* %2 to i8* +; CHECK: %4 = getelementptr i8, i8* %3, i64 8 +; CHECK: call void asm "bndcu $0, %bnd0", "*m"(i8* %4) + store i64 0, i64* %2 + store i8* %unsafe_stack_ptr, i8** @__safestack_unsafe_stack_ptr + ret void +} + +; Function Attrs: nounwind safestack uwtable +define void @good_store() #0 { +; CHECK-LABEL: @good_store() +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + store i8 0, i8* %0 + ret void +} + +; Function Attrs: nounwind safestack uwtable +define void @overflow_gep_store() #0 { +; CHECK-LABEL: @overflow_gep_store() +entry: + %unsafe_stack_ptr = load i8*, i8** @__safestack_unsafe_stack_ptr + %unsafe_stack_static_top = getelementptr i8, i8* %unsafe_stack_ptr, i32 -16 + store i8* %unsafe_stack_static_top, i8** @__safestack_unsafe_stack_ptr + %0 = getelementptr i8, i8* %unsafe_stack_ptr, i32 -8 + %a.unsafe = bitcast i8* %0 to i32* + %1 = bitcast i32* %a.unsafe to i8* + %2 = getelementptr i8, i8* %1, i32 4 +; CHECK: %3 = getelementptr i8, i8* %2, i64 1 +; CHECK: call void asm "bndcu $0, %bnd0", "*m"(i8* %3) + store i8 0, i8* %2 + %3 = getelementptr i8, i8* %unsafe_stack_ptr, i32 -4 + %b.unsafe = bitcast i8* %3 to i32* + %4 = bitcast i32* %b.unsafe to i8* + %5 = getelementptr i8, i8* %4, i32 4 +; CHECK: %7 = getelementptr i8, i8* %6, i64 1 +; CHECK: call void asm "bndcu $0, %bnd0", "*m"(i8* %7) + store i8 0, i8* %5 + store i8* %unsafe_stack_ptr, i8** @__safestack_unsafe_stack_ptr + ret void +} + +; Function Attrs: nounwind safestack uwtable +define void @underflow_gep_store() #0 { +; CHECK-LABEL: @underflow_gep_store() +entry: + %unsafe_stack_ptr = load i8*, i8** @__safestack_unsafe_stack_ptr + %unsafe_stack_static_top = getelementptr i8, i8* %unsafe_stack_ptr, i32 -16 + store i8* %unsafe_stack_static_top, i8** @__safestack_unsafe_stack_ptr + %0 = getelementptr i8, i8* %unsafe_stack_ptr, i32 -4 + %a.unsafe = bitcast i8* %0 to i32* + %1 = bitcast i32* %a.unsafe to i8* + %2 = getelementptr i8, i8* %1, i32 -1 +; CHECK: %3 = getelementptr i8, i8* %2, i64 1 +; CHECK: call void asm "bndcu $0, %bnd0", "*m"(i8* %3) + store i8 0, i8* %2 + store i8* %unsafe_stack_ptr, i8** @__safestack_unsafe_stack_ptr + ret void +} + +; Function Attrs: nounwind safestack uwtable +define void @good_gep_store() #0 { +; CHECK-LABEL: @good_gep_store() +entry: + %a = alloca i32, align 4 + %0 = bitcast i32* %a to i8* + %1 = getelementptr i8, i8* %0, i32 3 + store i8 0, i8* %1 + ret void +} + +; Function Attrs: safestack +define void @call_memset(i64 %len) #1 { +; CHECK-LABEL: @call_memset(i64 %len) +entry: + %unsafe_stack_ptr = load i8*, i8** @__safestack_unsafe_stack_ptr + %unsafe_stack_static_top = getelementptr i8, i8* %unsafe_stack_ptr, i32 -16 + store i8* %unsafe_stack_static_top, i8** @__safestack_unsafe_stack_ptr + %0 = getelementptr i8, i8* %unsafe_stack_ptr, i32 -10 + %q.unsafe = bitcast i8* %0 to [10 x i8]* + %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %q.unsafe, i32 0, i32 0 +; CHECK: %1 = getelementptr i8, i8* %0, i64 %len +; CHECK: call void asm "bndcu $0, %bnd0", "*m"(i8* %1) + call void @llvm.memset.p0i8.i64(i8* %arraydecay, i8 1, i64 %len, i32 1, i1 false) + store i8* %unsafe_stack_ptr, i8** @__safestack_unsafe_stack_ptr + ret void +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #3 + +attributes #0 = { nounwind safestack uwtable } +attributes #1 = { safestack } +attributes #3 = { argmemonly nounwind "target-features"="+mpx,+separate-stack-seg" }