diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp --- a/compiler-rt/lib/dfsan/dfsan.cpp +++ b/compiler-rt/lib/dfsan/dfsan.cpp @@ -559,14 +559,26 @@ } } +#define RET_CHAIN_ORIGIN(id) \ + GET_CALLER_PC_BP_SP; \ + (void)sp; \ + GET_STORE_STACK_TRACE_PC_BP(pc, bp); \ + return ChainOrigin(id, &stack); + // Return a new origin chain with the previous ID id and the current stack // trace. extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin __dfsan_chain_origin(dfsan_origin id) { - GET_CALLER_PC_BP_SP; - (void)sp; - GET_STORE_STACK_TRACE_PC_BP(pc, bp); - return ChainOrigin(id, &stack); + RET_CHAIN_ORIGIN(id) +} + +// Return a new origin chain with the previous ID id and the current stack +// trace if the label is tainted. +extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_origin +__dfsan_chain_origin_if_tainted(dfsan_label label, dfsan_origin id) { + if (!label) + return id; + RET_CHAIN_ORIGIN(id) } // Copy or move the origins of the len bytes from src to dst. diff --git a/compiler-rt/test/dfsan/origin_track_ld.c b/compiler-rt/test/dfsan/origin_track_ld.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_track_ld.c @@ -0,0 +1,31 @@ +// RUN: %clang_dfsan -gmlt -mllvm -dfsan-track-origins=2 -mllvm -dfsan-fast-16-labels=true %s -o %t && \ +// RUN: %run %t > %t.out 2>&1 +// RUN: FileCheck %s < %t.out +// +// REQUIRES: x86_64-target-arch + +#include + +__attribute__((noinline)) uint64_t foo(uint64_t a, uint64_t b) { return a + b; } + +int main(int argc, char *argv[]) { + uint64_t a = 10; + uint64_t b = 20; + dfsan_set_label(8, &a, sizeof(a)); + uint64_t c = foo(a, b); + dfsan_print_origin_trace(&c, NULL); +} + +// CHECK: Taint value 0x8 {{.*}} origin tracking () +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: #0 {{.*}} in main {{.*}}origin_track_ld.c:[[@LINE-6]] + +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: #0 {{.*}} in dfs$foo {{.*}}origin_track_ld.c:[[@LINE-15]] +// CHECK: #1 {{.*}} in main {{.*}}origin_track_ld.c:[[@LINE-10]] + +// CHECK: Origin value: {{.*}}, Taint value was stored to memory at +// CHECK: #0 {{.*}} in main {{.*}}origin_track_ld.c:[[@LINE-13]] + +// CHECK: Origin value: {{.*}}, Taint value was created at +// CHECK: #0 {{.*}} in main {{.*}}origin_track_ld.c:[[@LINE-17]] diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -256,7 +256,8 @@ // Controls how to track origins. // * 0: do not track origins. // * 1: track origins at memory store operations. -// * 2: TODO: track origins at memory store operations and callsites. +// * 2: track origins at memory load and store operations. +// TODO: track callsites. static cl::opt ClTrackOrigins("dfsan-track-origins", cl::desc("Track origins of labels"), cl::Hidden, cl::init(0)); @@ -453,6 +454,7 @@ FunctionType *DFSanLoadStoreCallbackFnTy; FunctionType *DFSanMemTransferCallbackFnTy; FunctionType *DFSanChainOriginFnTy; + FunctionType *DFSanChainOriginIfTaintedFnTy; FunctionType *DFSanMemOriginTransferFnTy; FunctionType *DFSanMaybeStoreOriginFnTy; FunctionCallee DFSanUnionFn; @@ -469,6 +471,7 @@ FunctionCallee DFSanMemTransferCallbackFn; FunctionCallee DFSanCmpCallbackFn; FunctionCallee DFSanChainOriginFn; + FunctionCallee DFSanChainOriginIfTaintedFn; FunctionCallee DFSanMemOriginTransferFn; FunctionCallee DFSanMaybeStoreOriginFn; SmallPtrSet DFSanRuntimeFunctions; @@ -637,9 +640,18 @@ Value *combineShadowsThenConvert(Type *T, Value *V1, Value *V2, Instruction *Pos); Value *combineOperandShadows(Instruction *Inst); - std::pair loadShadowOrigin(Value *ShadowAddr, uint64_t Size, + + /// Generates IR to load shadow and origin corresponding to bytes [\p + /// Addr, \p Addr + \p Size), where addr has alignment \p + /// InstAlignment, and take the union of each of those shadows. The returned + /// shadow always has primitive type. + /// + /// When tracking loads is enabled, the returned origin is a chain at the + /// current stack if the returned shadow is tainted. + std::pair loadShadowOrigin(Value *Addr, uint64_t Size, Align InstAlignment, Instruction *Pos); + void storePrimitiveShadowOrigin(Value *Addr, uint64_t Size, Align InstAlignment, Value *PrimitiveShadow, Value *Origin, Instruction *Pos); @@ -695,11 +707,18 @@ /// additional call with many instructions. To ensure common cases are fast, /// checks if it is possible to load labels and origins without using the /// callback function. + /// + /// When enabling tracking load instructions, we always use + /// __dfsan_load_label_and_origin to reduce code size. bool useCallbackLoadLabelAndOrigin(uint64_t Size, Align InstAlignment); /// Returns a chain at the current stack with previous origin V. Value *updateOrigin(Value *V, IRBuilder<> &IRB); + /// Returns a chain at the current stack with previous origin V if Shadow is + /// tainted. + Value *updateOriginIfTainted(Value *Shadow, Value *Origin, IRBuilder<> &IRB); + /// Creates an Intptr = Origin | Origin << 32 if Intptr's size is 64. Returns /// Origin otherwise. Value *originToIntptr(IRBuilder<> &IRB, Value *Origin); @@ -722,6 +741,13 @@ bool shouldInstrumentWithCall(); + /// Generates IR to load shadow and origin corresponding to bytes [\p + /// Addr, \p Addr + \p Size), where addr has alignment \p + /// InstAlignment, and take the union of each of those shadows. The returned + /// shadow always has primitive type. + std::pair + loadShadowOriginSansLoadTracking(Value *Addr, uint64_t Size, + Align InstAlignment, Instruction *Pos); int NumOriginStores = 0; }; @@ -1110,6 +1136,9 @@ /*isVarArg=*/false); DFSanChainOriginFnTy = FunctionType::get(OriginTy, OriginTy, /*isVarArg=*/false); + Type *DFSanChainOriginIfTaintedArgs[2] = {PrimitiveShadowTy, OriginTy}; + DFSanChainOriginIfTaintedFnTy = FunctionType::get( + OriginTy, DFSanChainOriginIfTaintedArgs, /*isVarArg=*/false); Type *DFSanMaybeStoreOriginArgs[4] = {IntegerType::get(*Ctx, ShadowWidthBits), Int8Ptr, IntptrTy, OriginTy}; DFSanMaybeStoreOriginFnTy = FunctionType::get( @@ -1343,6 +1372,15 @@ DFSanChainOriginFn = Mod->getOrInsertFunction("__dfsan_chain_origin", DFSanChainOriginFnTy, AL); } + { + AttributeList AL; + AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt); + AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt); + AL = AL.addAttribute(M.getContext(), AttributeList::ReturnIndex, + Attribute::ZExt); + DFSanChainOriginIfTaintedFn = Mod->getOrInsertFunction( + "__dfsan_chain_origin_if_tainted", DFSanChainOriginIfTaintedFnTy, AL); + } DFSanMemOriginTransferFn = Mod->getOrInsertFunction( "__dfsan_mem_origin_transfer", DFSanMemOriginTransferFnTy); @@ -1381,6 +1419,8 @@ DFSanCmpCallbackFn.getCallee()->stripPointerCasts()); DFSanRuntimeFunctions.insert( DFSanChainOriginFn.getCallee()->stripPointerCasts()); + DFSanRuntimeFunctions.insert( + DFSanChainOriginIfTaintedFn.getCallee()->stripPointerCasts()); DFSanRuntimeFunctions.insert( DFSanMemOriginTransferFn.getCallee()->stripPointerCasts()); DFSanRuntimeFunctions.insert( @@ -2033,6 +2073,11 @@ bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size, Align InstAlignment) { + // When enabling tracking load instructions, we always use + // __dfsan_load_label_and_origin to reduce code size. + if (ClTrackOrigins == 2) + return true; + assert(Size != 0); // * if Size == 1, it is sufficient to load its origin aligned at 4. // * if Size == 2, we assume most cases Addr % 2 == 0, so it is sufficient to @@ -2198,13 +2243,8 @@ return Shadow; } -// Generates IR to load shadow corresponding to bytes [Addr, Addr+Size), where -// Addr has alignment Align, and take the union of each of those shadows. The -// returned shadow always has primitive type. -std::pair DFSanFunction::loadShadowOrigin(Value *Addr, - uint64_t Size, - Align InstAlignment, - Instruction *Pos) { +std::pair DFSanFunction::loadShadowOriginSansLoadTracking( + Value *Addr, uint64_t Size, Align InstAlignment, Instruction *Pos) { const bool ShouldTrackOrigins = DFS.shouldTrackOrigins(); // Non-escaped loads. @@ -2309,6 +2349,24 @@ return {FallbackCall, Origin}; } +std::pair DFSanFunction::loadShadowOrigin(Value *Addr, + uint64_t Size, + Align InstAlignment, + Instruction *Pos) { + Value *PrimitiveShadow, *Origin; + std::tie(PrimitiveShadow, Origin) = + loadShadowOriginSansLoadTracking(Addr, Size, InstAlignment, Pos); + if (DFS.shouldTrackOrigins()) { + if (ClTrackOrigins == 2) { + IRBuilder<> IRB(Pos); + auto *ConstantShadow = dyn_cast(PrimitiveShadow); + if (!ConstantShadow || !ConstantShadow->isZeroValue()) + Origin = updateOriginIfTainted(PrimitiveShadow, Origin, IRB); + } + } + return {PrimitiveShadow, Origin}; +} + static AtomicOrdering addAcquireOrdering(AtomicOrdering AO) { switch (AO) { case AtomicOrdering::NotAtomic: @@ -2380,6 +2438,12 @@ } } +Value *DFSanFunction::updateOriginIfTainted(Value *Shadow, Value *Origin, + IRBuilder<> &IRB) { + assert(DFS.shouldTrackOrigins()); + return IRB.CreateCall(DFS.DFSanChainOriginIfTaintedFn, {Shadow, Origin}); +} + Value *DFSanFunction::updateOrigin(Value *V, IRBuilder<> &IRB) { if (!DFS.shouldTrackOrigins()) return V; diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll b/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll --- a/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll +++ b/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll @@ -55,5 +55,6 @@ ; CHECK: declare void @__dfsan_nonzero_label() ; CHECK: declare void @__dfsan_vararg_wrapper(i8*) ; CHECK: declare zeroext i32 @__dfsan_chain_origin(i32 zeroext) +; CHECK: declare zeroext i32 @__dfsan_chain_origin_if_tainted(i[[#SBITS]] zeroext, i32 zeroext) ; CHECK: declare void @__dfsan_mem_origin_transfer(i8*, i8*, i64) ; CHECK: declare void @__dfsan_maybe_store_origin(i[[#SBITS]] zeroext, i8*, i64, i32 zeroext) diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_track_load.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_track_load.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_track_load.ll @@ -0,0 +1,32 @@ +; RUN: opt < %s -dfsan -dfsan-track-origins=2 -dfsan-fast-8-labels -S | FileCheck %s +; RUN: opt < %s -dfsan -dfsan-track-origins=2 -dfsan-fast-16-labels -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK: @__dfsan_shadow_width_bits = weak_odr constant i32 [[#SBITS:]] +; CHECK: @__dfsan_shadow_width_bytes = weak_odr constant i32 [[#SBYTES:]] + +define i64 @load64(i64* %p) { + ; CHECK-LABEL: @"dfs$load64" + + ; CHECK-NEXT: %[[#PO:]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4 + ; CHECK-NEXT: %[[#PS:]] = load i[[#SBITS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_arg_tls to i[[#SBITS]]*), align [[ALIGN:2]] + + ; CHECK-NEXT: %[[#INTP:]] = bitcast i64* %p to i8* + ; CHECK-NEXT: %[[#LABEL_ORIGIN:]] = call zeroext i64 @__dfsan_load_label_and_origin(i8* %[[#INTP]], i64 8) + ; CHECK-NEXT: %[[#LABEL_ORIGIN_H32:]] = lshr i64 %[[#LABEL_ORIGIN]], 32 + ; CHECK-NEXT: %[[#LABEL:]] = trunc i64 %[[#LABEL_ORIGIN_H32]] to i[[#SBITS]] + ; CHECK-NEXT: %[[#ORIGIN:]] = trunc i64 %[[#LABEL_ORIGIN]] to i32 + ; CHECK-NEXT: %[[#ORIGIN_CHAINED:]] = call i32 @__dfsan_chain_origin_if_tainted(i[[#SBITS]] %[[#LABEL]], i32 %[[#ORIGIN]]) + + ; CHECK-NEXT: %[[#LABEL:]] = or i[[#SBITS]] %[[#LABEL]], %[[#PS]] + ; CHECK-NEXT: %[[#NZ:]] = icmp ne i[[#SBITS]] %[[#PS]], 0 + ; CHECK-NEXT: %[[#ORIGIN_SEL:]] = select i1 %[[#NZ]], i32 %[[#PO]], i32 %[[#ORIGIN_CHAINED]] + + ; CHECK-NEXT: %a = load i64, i64* %p + ; CHECK-NEXT: store i[[#SBITS]] %[[#LABEL]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[ALIGN]] + ; CHECK-NEXT: store i32 %[[#ORIGIN_SEL]], i32* @__dfsan_retval_origin_tls, align 4 + + %a = load i64, i64* %p + ret i64 %a +}