diff --git a/compiler-rt/test/dfsan/lookup_table.c b/compiler-rt/test/dfsan/lookup_table.c new file mode 100644 --- /dev/null +++ b/compiler-rt/test/dfsan/lookup_table.c @@ -0,0 +1,61 @@ +// RUN: %clang_dfsan %s -mllvm -dfsan-combine-offset-labels-on-gep=false -mllvm -dfsan-combine-pointer-labels-on-load=false -mllvm -dfsan-combine-taint-lookup-table=remap_to_upper -DLOOKUP_TABLE -o %t && %run %t +// RUN: %clang_dfsan %s -mllvm -dfsan-combine-offset-labels-on-gep=false -mllvm -dfsan-combine-pointer-labels-on-load=false -mllvm -dfsan-combine-taint-lookup-table=no_match -o %t && %run %t +// RUN: %clang_dfsan %s -mllvm -dfsan-combine-offset-labels-on-gep=false -mllvm -dfsan-combine-pointer-labels-on-load=false -o %t && %run %t +// +// REQUIRES: x86_64-target-arch + +#include +#include + +const char remap_to_upper[256] = { + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', 'A', 'B', + 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', + 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', + 'U', 'V', 'W', 'X', 'Y', 'Z', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', '.', '.', '.', '.', '.', + '.', '.', '.', '.', +}; + +char character_mapping(unsigned char c) { + return remap_to_upper[c]; +} + +int main(void) { + char a = 'b'; + dfsan_label i_label = 1; + dfsan_set_label(i_label, &a, sizeof(a)); + assert(dfsan_read_label(&a, sizeof(a)) == i_label); + + char b = character_mapping(a); + assert(b == 'B'); + +#ifdef LOOKUP_TABLE + assert(dfsan_read_label(&b, sizeof(b)) == i_label); +#else + assert(dfsan_read_label(&b, sizeof(b)) == 0); +#endif + return 0; +} diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -67,6 +67,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/iterator.h" #include "llvm/Analysis/ValueTracking.h" @@ -183,6 +184,15 @@ "doing pointer arithmetic."), cl::Hidden, cl::init(true)); +static cl::list ClCombineTaintLookupTables( + "dfsan-combine-taint-lookup-table", + cl::desc( + "When dfsan-combine-offset-labels-on-gep and/or " + "dfsan-combine-pointer-labels-on-load are false, this flag can " + "be used to re-enable combining offset and/or pointer taint when " + "loading specific constant global variables (i.e. lookup tables)."), + cl::Hidden); + static cl::opt ClDebugNonzeroLabels( "dfsan-debug-nonzero-labels", cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, " @@ -463,6 +473,7 @@ DFSanABIList ABIList; DenseMap UnwrappedFnMap; AttributeMask ReadOnlyNoneAttrs; + StringSet<> CombineTaintLookupTableNames; /// Memory map parameters used in calculation mapping application addresses /// to shadow addresses and origin addresses. @@ -652,6 +663,8 @@ // branch instruction using the given conditional expression. void addConditionalCallbacksIfEnabled(Instruction &I, Value *Condition); + bool isLookupTableConstant(Value *P); + private: /// Collapses the shadow with aggregate type into a single primitive shadow /// value. @@ -786,6 +799,9 @@ // FIXME: should we propagate vfs::FileSystem to this constructor? ABIList.set( SpecialCaseList::createOrDie(AllABIListFiles, *vfs::getRealFileSystem())); + + for (StringRef v : ClCombineTaintLookupTables) + CombineTaintLookupTableNames.insert(v); } TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) { @@ -1831,6 +1847,14 @@ return Align(std::max(MinOriginAlignment, Alignment)); } +bool DFSanFunction::isLookupTableConstant(Value *P) { + if (GlobalVariable *GV = dyn_cast(P->stripPointerCasts())) + if (GV->isConstant() && GV->hasName()) + return DFS.CombineTaintLookupTableNames.count(GV->getName()); + + return false; +} + bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size, Align InstAlignment) { // When enabling tracking load instructions, we always use @@ -2084,6 +2108,29 @@ llvm_unreachable("Unknown ordering"); } +Value *StripPointerGEPsAndCasts(Value *V) { + if (!V->getType()->isPointerTy()) + return V; + + // DFSan pass should be running on valid IR, but we'll + // keep a seen set to ensure there are no issues. + SmallPtrSet Visited; + Visited.insert(V); + do { + if (auto *GEP = dyn_cast(V)) { + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast(V)->getOperand(0); + if (!V->getType()->isPointerTy()) + return V; + } else if (isa(V)) { + V = cast(V)->getAliasee(); + } + } while (Visited.insert(V).second); + + return V; +} + void DFSanVisitor::visitLoadInst(LoadInst &LI) { auto &DL = LI.getModule()->getDataLayout(); uint64_t Size = DL.getTypeStoreSize(LI.getType()); @@ -2112,7 +2159,9 @@ Shadows.push_back(PrimitiveShadow); Origins.push_back(Origin); } - if (ClCombinePointerLabelsOnLoad) { + if (ClCombinePointerLabelsOnLoad || + DFSF.isLookupTableConstant( + StripPointerGEPsAndCasts(LI.getPointerOperand()))) { Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand()); PrimitiveShadow = DFSF.combineShadows(PrimitiveShadow, PtrShadow, Pos); if (ShouldTrackOrigins) { @@ -2474,7 +2523,9 @@ } void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { - if (ClCombineOffsetLabelsOnGEP) { + if (ClCombineOffsetLabelsOnGEP || + DFSF.isLookupTableConstant( + StripPointerGEPsAndCasts(GEPI.getPointerOperand()))) { visitInstOperands(GEPI); return; } diff --git a/llvm/test/Instrumentation/DataFlowSanitizer/lookup_table.ll b/llvm/test/Instrumentation/DataFlowSanitizer/lookup_table.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/DataFlowSanitizer/lookup_table.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -dfsan -dfsan-combine-pointer-labels-on-load=false -dfsan-combine-offset-labels-on-gep=false -dfsan-combine-taint-lookup-table=lookup_table_a -S | FileCheck %s --check-prefixes=CHECK,LOOKUP_A +; RUN: opt < %s -dfsan -dfsan-combine-pointer-labels-on-load=false -dfsan-combine-offset-labels-on-gep=false -S | FileCheck %s --check-prefixes=CHECK,NO_LOOKUP_A +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK: @__dfsan_arg_tls = external thread_local(initialexec) global [[TLS_ARR:\[100 x i64\]]] +; CHECK: @__dfsan_retval_tls = external thread_local(initialexec) global [[TLS_ARR]] +; CHECK: @__dfsan_shadow_width_bits = weak_odr constant i32 [[#SBITS:]] +; CHECK: @__dfsan_shadow_width_bytes = weak_odr constant i32 [[#SBYTES:]] + +@lookup_table_a = external local_unnamed_addr constant [256 x i8], align 16 +@lookup_table_b = external local_unnamed_addr constant [256 x i8], align 16 + +define i8 @load_lookup_table_a(i8 %p) { + ; CHECK-LABEL: @load_lookup_table_a.dfsan + ; CHECK-NEXT: %[[#PS:]] = load i[[#SBITS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_arg_tls to i[[#SBITS]]*), align [[ALIGN:2]] + ; CHECK-NEXT: %c = zext i8 %p to i64 + ; CHECK-NEXT: %b = getelementptr inbounds [256 x i8], [256 x i8]* @lookup_table_a, i64 0, i64 %c + ; CHECK-NEXT: %a = load i8, i8* %b, align 1 + ; Propagates p shadow when lookup_table_a flag is provided, otherwise propagates 0 shadow + ; LOOKUP_A-NEXT: store i[[#SBITS]] %[[#PS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[ALIGN]] + ; NO_LOOKUP_A-NEXT: store i[[#SBITS]] 0, i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[ALIGN]] + ; CHECK-NEXT: ret i8 %a + + %c = zext i8 %p to i64 + %b = getelementptr inbounds [256 x i8], [256 x i8]* @lookup_table_a, i64 0, i64 %c + %a = load i8, i8* %b + ret i8 %a +} + +define i8 @load_lookup_table_b(i8 %p) { + ; CHECK-LABEL: @load_lookup_table_b.dfsan + ; CHECK-NEXT: %[[#PS:]] = load i[[#SBITS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_arg_tls to i[[#SBITS]]*), align 2 + ; CHECK-NEXT: %c = zext i8 %p to i64 + ; CHECK-NEXT: %b = getelementptr inbounds [256 x i8], [256 x i8]* @lookup_table_b, i64 0, i64 %c + ; CHECK-NEXT: %a = load i8, i8* %b, align 1 + ; Propagates 0 shadow + ; CHECK-NEXT: store i[[#SBITS]] 0, i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[ALIGN]] + ; CHECK-NEXT: ret i8 %a + + %c = zext i8 %p to i64 + %b = getelementptr inbounds [256 x i8], [256 x i8]* @lookup_table_b, i64 0, i64 %c + %a = load i8, i8* %b, align 1 + ret i8 %a +}