diff --git a/compiler-rt/include/sanitizer/dfsan_interface.h b/compiler-rt/include/sanitizer/dfsan_interface.h --- a/compiler-rt/include/sanitizer/dfsan_interface.h +++ b/compiler-rt/include/sanitizer/dfsan_interface.h @@ -27,6 +27,10 @@ /// Signature of the callback argument to dfsan_set_write_callback(). typedef void (*dfsan_write_callback_t)(int fd, const void *buf, size_t count); +/// Signature of the callback argument to dfsan_set_conditional_callback(). +typedef void (*dfsan_conditional_callback_t)(dfsan_label label, + dfsan_origin origin); + /// Computes the union of \c l1 and \c l2, resulting in a union label. dfsan_label dfsan_union(dfsan_label l1, dfsan_label l2); @@ -74,6 +78,19 @@ /// callback executes. Pass in NULL to remove any callback. void dfsan_set_write_callback(dfsan_write_callback_t labeled_write_callback); +/// Sets a callback to be invoked on any conditional expressions which have a +/// taint label set. This can be used to find where tainted data influences +/// the behavior of the program. +/// These callbacks will only be added when -dfsan-conditional-callbacks=true. +void dfsan_set_conditional_callback(dfsan_conditional_callback_t callback); + +/// Conditional expressions occur during signal handlers. +/// Making callbacks that handle signals well is tricky, so when +/// -dfsan-conditional-callbacks=true, conditional expressions used in signal +/// handlers will add the labels they see into a global (bitwise-or together). +/// This function returns all label bits seen in signal handler conditions. +dfsan_label dfsan_get_labels_in_signal_conditional(); + /// Interceptor hooks. /// Whenever a dfsan's custom function is called the corresponding /// hook is called it non-zero. The hooks should be defined by the user. diff --git a/compiler-rt/lib/dfsan/dfsan.cpp b/compiler-rt/lib/dfsan/dfsan.cpp --- a/compiler-rt/lib/dfsan/dfsan.cpp +++ b/compiler-rt/lib/dfsan/dfsan.cpp @@ -600,6 +600,60 @@ return (label & elem) == elem; } +namespace __dfsan { + +typedef void (*dfsan_conditional_callback_t)(dfsan_label label, + dfsan_origin origin); +static dfsan_conditional_callback_t conditional_callback = nullptr; +static dfsan_label labels_in_signal_conditional = 0; + +static void ConditionalCallback(dfsan_label label, dfsan_origin origin) { + // Programs have many branches. For efficiency the conditional sink callback + // handler needs to ignore as many as possible as early as possible. + if (label == 0) { + return; + } + if (conditional_callback == nullptr) { + return; + } + + // This initial ConditionalCallback handler needs to be in here in dfsan + // runtime (rather than being an entirely user implemented hook) so that it + // has access to dfsan thread information. + DFsanThread *t = GetCurrentThread(); + // A callback operation which does useful work (like record the flow) will + // likely be too long executed in a signal handler. + if (t && t->InSignalHandler()) { + // Record set of labels used in signal handler for completeness. + labels_in_signal_conditional |= label; + return; + } + + conditional_callback(label, origin); +} + +} // namespace __dfsan + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void +__dfsan_conditional_callback_origin(dfsan_label label, dfsan_origin origin) { + __dfsan::ConditionalCallback(label, origin); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __dfsan_conditional_callback( + dfsan_label label) { + __dfsan::ConditionalCallback(label, 0); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void dfsan_set_conditional_callback( + __dfsan::dfsan_conditional_callback_t callback) { + __dfsan::conditional_callback = callback; +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE dfsan_label +dfsan_get_labels_in_signal_conditional() { + return __dfsan::labels_in_signal_conditional; +} + class Decorator : public __sanitizer::SanitizerCommonDecorator { public: Decorator() : SanitizerCommonDecorator() {} @@ -898,6 +952,7 @@ Die(); } } + __dfsan::labels_in_signal_conditional = 0; } // TODO: CheckMemoryLayoutSanity is based on msan. diff --git a/compiler-rt/lib/dfsan/done_abilist.txt b/compiler-rt/lib/dfsan/done_abilist.txt --- a/compiler-rt/lib/dfsan/done_abilist.txt +++ b/compiler-rt/lib/dfsan/done_abilist.txt @@ -46,6 +46,10 @@ fun:dfsan_get_init_origin=discard fun:dfsan_get_track_origins=uninstrumented fun:dfsan_get_track_origins=discard +fun:dfsan_set_conditional_callback=uninstrumented +fun:dfsan_set_conditional_callback=discard +fun:dfsan_get_labels_in_signal_conditional=uninstrumented +fun:dfsan_get_labels_in_signal_conditional=discard ############################################################################### # glibc diff --git a/compiler-rt/test/dfsan/Inputs/flags_abilist.txt b/compiler-rt/test/dfsan/Inputs/flags_abilist.txt --- a/compiler-rt/test/dfsan/Inputs/flags_abilist.txt +++ b/compiler-rt/test/dfsan/Inputs/flags_abilist.txt @@ -7,3 +7,9 @@ fun:dfsan_set_label=uninstrumented fun:dfsan_set_label=discard + +fun:my_dfsan_conditional_callback=uninstrumented +fun:my_dfsan_conditional_callback=discard + +fun:dfsan_set_conditional_callback=uninstrumented +fun:dfsan_set_conditional_callback=discard diff --git a/compiler-rt/test/dfsan/conditional_callbacks.c b/compiler-rt/test/dfsan/conditional_callbacks.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/conditional_callbacks.c @@ -0,0 +1,106 @@ +// RUN: %clang_dfsan -fno-sanitize=dataflow -O2 -fPIE -DCALLBACKS -c %s -o %t-callbacks.o +// RUN: %clang_dfsan -fsanitize-ignorelist=%S/Inputs/flags_abilist.txt -O2 -mllvm -dfsan-conditional-callbacks %s %t-callbacks.o -o %t +// RUN: %run %t FooBarBaz 2>&1 | FileCheck %s +// +// REQUIRES: x86_64-target-arch + +// Tests that callbacks are inserted for conditionals when +// -dfsan-conditional-callbacks is specified. + +#include +#include +#include +#include + +#ifdef CALLBACKS +// Compile this code without DFSan to avoid recursive instrumentation. + +extern dfsan_label LabelI; +extern dfsan_label LabelJ; +extern dfsan_label LabelIJ; +extern dfsan_label LabelArgv; +extern size_t LenArgv; + +void my_dfsan_conditional_callback(dfsan_label Label, dfsan_origin Origin) { + assert(Label != 0); + assert(Origin == 0); + + static int Count = 0; + switch (Count++) { + case 0: + assert(Label == LabelI); + break; + case 1: + assert(Label == LabelJ); + break; + case 2: + assert(Label == LabelIJ); + break; + default: + break; + } + + fprintf(stderr, "Label %u used as condition\n", Label); +} + +#else +// Compile this code with DFSan and -dfsan-conditional-callbacks to insert the +// callbacks. + +dfsan_label LabelI; +dfsan_label LabelJ; +dfsan_label LabelIJ; +dfsan_label LabelArgv; + +size_t LenArgv; + +extern void my_dfsan_conditional_callback(dfsan_label Label, dfsan_origin Origin); + +int main(int Argc, char *Argv[]) { + assert(Argc == 2); + + dfsan_set_conditional_callback(my_dfsan_conditional_callback); + + int result = 0; + // Make these not look like constants, otherwise the branch we're expecting + // may be optimized out. + int DataI = (Argv[0][0] != 0) ? 1 : 0; + int DataJ = (Argv[1][0] != 0) ? 2 : 0; + LabelI = 1; + dfsan_set_label(LabelI, &DataI, sizeof(DataI)); + LabelJ = 2; + dfsan_set_label(LabelJ, &DataJ, sizeof(DataJ)); + LabelIJ = dfsan_union(LabelI, LabelJ); + + assert(dfsan_get_label(DataI) == LabelI); + + // CHECK: Label 1 used as condition + if (DataI) { + result = 42; + } + + assert(dfsan_get_label(DataJ) == LabelJ); + + // CHECK: Label 2 used as condition + switch (DataJ) { + case 1: + result += 10000; + break; + case 2: + result += 4200; + break; + default: + break; + } + + int tainted_cond = ((DataI * DataJ) != 1); + assert(dfsan_get_label(tainted_cond) == LabelIJ); + + // CHECK: Label 3 used as condition + result = tainted_cond ? result + 420000 : 9; + + assert(result == 424242); + return 0; +} + +#endif // #ifdef CALLBACKS diff --git a/compiler-rt/test/dfsan/conditional_callbacks_sig.c b/compiler-rt/test/dfsan/conditional_callbacks_sig.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/conditional_callbacks_sig.c @@ -0,0 +1,97 @@ +// RUN: %clang_dfsan -fno-sanitize=dataflow -O2 -fPIE -DCALLBACKS -c %s -o %t-callbacks.o +// RUN: %clang_dfsan -fsanitize-ignorelist=%S/Inputs/flags_abilist.txt -O2 -mllvm -dfsan-conditional-callbacks %s %t-callbacks.o -o %t +// RUN: %run %t FooBarBaz 2>&1 | FileCheck %s +// +// REQUIRES: x86_64-target-arch + +#include +#include +#include +#include +#include +#include +#include + +#ifdef CALLBACKS +// Compile this code without DFSan to avoid recursive instrumentation. + +void my_dfsan_conditional_callback(dfsan_label Label, dfsan_origin Origin) { + assert(Label != 0); + assert(Origin == 0); + + static int Count = 0; + switch (Count++) { + case 0: + assert(Label == 1); + break; + case 1: + assert(Label == 4); + break; + default: + break; + } + + fprintf(stderr, "Label %u used as condition\n", Label); +} + +#else +// Compile this code with DFSan and -dfsan-conditional-callbacks to insert the +// callbacks. + +extern void my_dfsan_conditional_callback(dfsan_label Label, dfsan_origin Origin); + +volatile int x = 0; +volatile int y = 1; +volatile int z = 0; + +void SignalHandler(int signo) { + assert(dfsan_get_label(x) == 0); + assert(dfsan_get_label(y) != 0); + assert(dfsan_get_label(z) != 0); + // Running the conditional callback from a signal handler is risky, + // because the code must be written with signal handler context in mind. + // Instead dfsan_get_labels_in_signal_conditional() will indicate labels + // used in conditions inside signal handlers. + // CHECK-NOT: Label 8 used as condition + if (z != 0) { + x = y; + } +} + +int main(int Argc, char *Argv[]) { + assert(Argc >= 1); + int unknown = (Argv[0][0] != 0) ? 1 : 0; + dfsan_set_label(1, &unknown, sizeof(unknown)); + + dfsan_set_conditional_callback(my_dfsan_conditional_callback); + + // CHECK: Label 1 used as condition + if (unknown) { + z = 42; + } + + assert(dfsan_get_labels_in_signal_conditional() == 0); + dfsan_set_label(4, (void *)&y, sizeof(y)); + dfsan_set_label(8, (void *)&z, sizeof(z)); + + struct sigaction sa = {}; + sa.sa_handler = SignalHandler; + int r = sigaction(SIGHUP, &sa, NULL); + assert(dfsan_get_label(r) == 0); + + kill(getpid(), SIGHUP); + signal(SIGHUP, SIG_DFL); + + assert(dfsan_get_labels_in_signal_conditional() == 8); + assert(x == 1); + // CHECK: Label 4 used as condition + if (x != 0) { + z = 123; + } + // Flush should clear the conditional signals seen. + dfsan_flush(); + assert(dfsan_get_labels_in_signal_conditional() == 0); + return 0; +} + +#endif // #ifdef CALLBACKS diff --git a/compiler-rt/test/dfsan/origin_conditional_callbacks.c b/compiler-rt/test/dfsan/origin_conditional_callbacks.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/origin_conditional_callbacks.c @@ -0,0 +1,106 @@ +// RUN: %clang_dfsan -fno-sanitize=dataflow -O2 -fPIE -DCALLBACKS -c %s -o %t-callbacks.o +// RUN: %clang_dfsan -fsanitize-ignorelist=%S/Inputs/flags_abilist.txt -O2 -mllvm -dfsan-conditional-callbacks -mllvm -dfsan-track-origins=1 %s %t-callbacks.o -o %t +// RUN: %run %t FooBarBaz 2>&1 | FileCheck %s +// +// REQUIRES: x86_64-target-arch + +// Tests that callbacks are inserted for conditionals when +// -dfsan-conditional-callbacks is specified. + +#include +#include +#include +#include + +#ifdef CALLBACKS +// Compile this code without DFSan to avoid recursive instrumentation. + +extern dfsan_label LabelI; +extern dfsan_label LabelJ; +extern dfsan_label LabelIJ; +extern dfsan_label LabelArgv; +extern size_t LenArgv; + +void my_dfsan_conditional_callback(dfsan_label Label, dfsan_origin Origin) { + assert(Label != 0); + assert(Origin != 0); + + static int Count = 0; + switch (Count++) { + case 0: + assert(Label == LabelI); + break; + case 1: + assert(Label == LabelJ); + break; + case 2: + assert(Label == LabelIJ); + break; + default: + break; + } + + fprintf(stderr, "Label %u used as condition\n", Label); +} + +#else +// Compile this code with DFSan and -dfsan-conditional-callbacks to insert the +// callbacks. + +dfsan_label LabelI; +dfsan_label LabelJ; +dfsan_label LabelIJ; +dfsan_label LabelArgv; + +size_t LenArgv; + +extern void my_dfsan_conditional_callback(dfsan_label Label, dfsan_origin Origin); + +int main(int Argc, char *Argv[]) { + assert(Argc == 2); + + dfsan_set_conditional_callback(my_dfsan_conditional_callback); + + int result = 0; + // Make these not look like constants, otherwise the branch we're expecting + // may be optimized out. + int DataI = (Argv[0][0] != 0) ? 1 : 0; + int DataJ = (Argv[1][0] != 0) ? 2 : 0; + LabelI = 1; + dfsan_set_label(LabelI, &DataI, sizeof(DataI)); + LabelJ = 2; + dfsan_set_label(LabelJ, &DataJ, sizeof(DataJ)); + LabelIJ = dfsan_union(LabelI, LabelJ); + + assert(dfsan_get_label(DataI) == LabelI); + + // CHECK: Label 1 used as condition + if (DataI) { + result = 42; + } + + assert(dfsan_get_label(DataJ) == LabelJ); + + // CHECK: Label 2 used as condition + switch (DataJ) { + case 1: + result += 10000; + break; + case 2: + result += 4200; + break; + default: + break; + } + + int tainted_cond = ((DataI * DataJ) != 1); + assert(dfsan_get_label(tainted_cond) == LabelIJ); + + // CHECK: Label 3 used as condition + result = tainted_cond ? result + 420000 : 9; + + assert(result == 424242); + return 0; +} + +#endif // #ifdef CALLBACKS diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -208,6 +208,14 @@ cl::desc("Insert calls to __dfsan_*_callback functions on data events."), cl::Hidden, cl::init(false)); +// Experimental feature that inserts callbacks for conditionals, including: +// conditional branch, switch, select. +// This must be true for dfsan_set_conditional_callback() to have effect. +static cl::opt ClConditionalCallbacks( + "dfsan-conditional-callbacks", + cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden, + cl::init(false)); + // Controls whether the pass tracks the control flow of select instructions. static cl::opt ClTrackSelectControlFlow( "dfsan-track-select-control-flow", @@ -428,6 +436,8 @@ FunctionType *DFSanSetLabelFnTy; FunctionType *DFSanNonzeroLabelFnTy; FunctionType *DFSanVarargWrapperFnTy; + FunctionType *DFSanConditionalCallbackFnTy; + FunctionType *DFSanConditionalCallbackOriginFnTy; FunctionType *DFSanCmpCallbackFnTy; FunctionType *DFSanLoadStoreCallbackFnTy; FunctionType *DFSanMemTransferCallbackFnTy; @@ -444,6 +454,8 @@ FunctionCallee DFSanLoadCallbackFn; FunctionCallee DFSanStoreCallbackFn; FunctionCallee DFSanMemTransferCallbackFn; + FunctionCallee DFSanConditionalCallbackFn; + FunctionCallee DFSanConditionalCallbackOriginFn; FunctionCallee DFSanCmpCallbackFn; FunctionCallee DFSanChainOriginFn; FunctionCallee DFSanChainOriginIfTaintedFn; @@ -642,6 +654,10 @@ Align getShadowAlign(Align InstAlignment); + // If ClConditionalCallbacks is enabled, insert a callback after a given + // branch instruction using the given conditional expression. + void addConditionalCallbacksIfEnabled(Instruction &I, Value *Condition); + private: /// Collapses the shadow with aggregate type into a single primitive shadow /// value. @@ -748,6 +764,8 @@ void visitSelectInst(SelectInst &I); void visitMemSetInst(MemSetInst &I); void visitMemTransferInst(MemTransferInst &I); + void visitBranchInst(BranchInst &BR); + void visitSwitchInst(SwitchInst &SW); private: void visitCASOrRMW(Align InstAlignment, Instruction &I); @@ -971,6 +989,22 @@ return PrimitiveShadow; } +void DFSanFunction::addConditionalCallbacksIfEnabled(Instruction &I, + Value *Condition) { + if (!ClConditionalCallbacks) { + return; + } + IRBuilder<> IRB(&I); + Value *CondShadow = getShadow(Condition); + if (DFS.shouldTrackOrigins()) { + Value *CondOrigin = getOrigin(Condition); + IRB.CreateCall(DFS.DFSanConditionalCallbackOriginFn, + {CondShadow, CondOrigin}); + } else { + IRB.CreateCall(DFS.DFSanConditionalCallbackFn, {CondShadow}); + } +} + Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) { if (!OrigTy->isSized()) return PrimitiveShadowTy; @@ -1032,6 +1066,13 @@ FunctionType::get(Type::getVoidTy(*Ctx), None, /*isVarArg=*/false); DFSanVarargWrapperFnTy = FunctionType::get( Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false); + DFSanConditionalCallbackFnTy = + FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy, + /*isVarArg=*/false); + Type *DFSanConditionalCallbackOriginArgs[2] = {PrimitiveShadowTy, OriginTy}; + DFSanConditionalCallbackOriginFnTy = FunctionType::get( + Type::getVoidTy(*Ctx), DFSanConditionalCallbackOriginArgs, + /*isVarArg=*/false); DFSanCmpCallbackFnTy = FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy, /*isVarArg=*/false); @@ -1270,6 +1311,10 @@ DFSanStoreCallbackFn.getCallee()->stripPointerCasts()); DFSanRuntimeFunctions.insert( DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts()); + DFSanRuntimeFunctions.insert( + DFSanConditionalCallbackFn.getCallee()->stripPointerCasts()); + DFSanRuntimeFunctions.insert( + DFSanConditionalCallbackOriginFn.getCallee()->stripPointerCasts()); DFSanRuntimeFunctions.insert( DFSanCmpCallbackFn.getCallee()->stripPointerCasts()); DFSanRuntimeFunctions.insert( @@ -1292,6 +1337,12 @@ "__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy); DFSanCmpCallbackFn = Mod->getOrInsertFunction("__dfsan_cmp_callback", DFSanCmpCallbackFnTy); + + DFSanConditionalCallbackFn = Mod->getOrInsertFunction( + "__dfsan_conditional_callback", DFSanConditionalCallbackFnTy); + DFSanConditionalCallbackOriginFn = + Mod->getOrInsertFunction("__dfsan_conditional_callback_origin", + DFSanConditionalCallbackOriginFnTy); } void DataFlowSanitizer::injectMetadataGlobals(Module &M) { @@ -2593,6 +2644,8 @@ Value *FalseOrigin = ShouldTrackOrigins ? DFSF.getOrigin(I.getFalseValue()) : nullptr; + DFSF.addConditionalCallbacksIfEnabled(I, I.getCondition()); + if (isa(I.getCondition()->getType())) { ShadowSel = DFSF.combineShadowsThenConvert(I.getType(), TrueShadow, FalseShadow, &I); @@ -2683,6 +2736,17 @@ } } +void DFSanVisitor::visitBranchInst(BranchInst &BR) { + if (!BR.isConditional()) + return; + + DFSF.addConditionalCallbacksIfEnabled(BR, BR.getCondition()); +} + +void DFSanVisitor::visitSwitchInst(SwitchInst &SW) { + DFSF.addConditionalCallbacksIfEnabled(SW, SW.getCondition()); +} + static bool isAMustTailRetVal(Value *RetVal) { // Tail call may have a bitcast between return. if (auto *I = dyn_cast(RetVal)) {