Index: compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp =================================================================== --- compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp +++ compiler-rt/trunk/lib/fuzzer/dataflow/DataFlow.cpp @@ -63,6 +63,9 @@ } // extern "C" static size_t InputLen; +static size_t InputLabelBeg; +static size_t InputLabelEnd; +static size_t InputSizeLabel; static size_t NumFuncs; static const uintptr_t *FuncsBeg; static __thread size_t CurrentFunc; @@ -95,8 +98,10 @@ return; LabelSeen[L] = true; assert(L); - if (L <= InputLen + 1) { - Bytes[L - 1] = '1'; + if (L < InputSizeLabel) { + Bytes[L + InputLabelBeg - 1] = '1'; + } else if (L == InputSizeLabel) { + Bytes[InputLen] = '1'; } else { auto *DLI = dfsan_get_label_info(L); SetBytesForLabel(DLI->l1, Bytes); @@ -124,9 +129,9 @@ if (argc == 1) return PrintFunctions(); assert(argc == 4 || argc == 5); - size_t Beg = atoi(argv[1]); - size_t End = atoi(argv[2]); - assert(Beg < End); + InputLabelBeg = atoi(argv[1]); + InputLabelEnd = atoi(argv[2]); + assert(InputLabelBeg < InputLabelEnd); const char *Input = argv[3]; fprintf(stderr, "INFO: reading '%s'\n", Input); @@ -143,14 +148,16 @@ fprintf(stderr, "INFO: running '%s'\n", Input); for (size_t I = 1; I <= InputLen; I++) { - dfsan_label L = dfsan_create_label("", nullptr); - assert(L == I); size_t Idx = I - 1; - if (Idx >= Beg && Idx < End) + if (Idx >= InputLabelBeg && Idx < InputLabelEnd) { + dfsan_label L = dfsan_create_label("", nullptr); + assert(L == I - InputLabelBeg); dfsan_set_label(L, Buf + Idx, 1); + } } dfsan_label SizeL = dfsan_create_label("", nullptr); - assert(SizeL == InputLen + 1); + InputSizeLabel = SizeL; + assert(InputSizeLabel == InputLabelEnd - InputLabelBeg + 1); dfsan_set_label(SizeL, &InputLen, sizeof(InputLen)); LLVMFuzzerTestOneInput(Buf, InputLen); Index: compiler-rt/trunk/lib/fuzzer/scripts/collect_data_flow.py =================================================================== --- compiler-rt/trunk/lib/fuzzer/scripts/collect_data_flow.py +++ compiler-rt/trunk/lib/fuzzer/scripts/collect_data_flow.py @@ -65,8 +65,8 @@ tmpfile = os.path.join(tmpdir, str(r[0]) + "-" + str(r[1])) ret = subprocess.call([exe, str(r[0]), str(r[1]), inp, tmpfile]) if ret and r[1] - r[0] >= 2: - q.append([r[0], (r[1] + r[0]) / 2]) - q.append([(r[1] + r[0]) / 2, r[1]]) + q.append([r[0], (r[1] + r[0]) // 2]) + q.append([(r[1] + r[0]) // 2, r[1]]) else: outputs.append(tmpfile) print("******* Success: ", r) Index: compiler-rt/trunk/test/fuzzer/dataflow.test =================================================================== --- compiler-rt/trunk/test/fuzzer/dataflow.test +++ compiler-rt/trunk/test/fuzzer/dataflow.test @@ -82,3 +82,14 @@ USE_DATA_FLOW_TRACE-DAG: a8eefe2fd5d6b32028f355fafa3e739a6bf5edc => |000001| USE_DATA_FLOW_TRACE-DGA: d28cb407e8e1a702c72d25473f0553d3ec172262 => |0000011| USE_DATA_FLOW_TRACE: INFO: DataFlowTrace: 6 trace files, 3 functions, 2 traces with focus function + +# Test that we can run collect_data_flow on a long input (>2**16 bytes) +RUN: rm -rf %t/OUT +RUN: printf "%0.sA" {1..150001} > %t/IN/very_long_input +RUN: %libfuzzer_src/scripts/collect_data_flow.py %t-ThreeFunctionsTestDF %t/IN/very_long_input %t/OUT | FileCheck %s --check-prefix=COLLECT_TRACE_FOR_LONG_INPUT +RUN: rm %t/IN/very_long_input +COLLECT_TRACE_FOR_LONG_INPUT: ******* Trying:{{[ ]+}}[0, 150001] +COLLECT_TRACE_FOR_LONG_INPUT: ******* Trying:{{[ ]+}}[75000, 150001] +COLLECT_TRACE_FOR_LONG_INPUT: ******* Trying:{{[ ]+}}[112500, 150001] +COLLECT_TRACE_FOR_LONG_INPUT: ******* Success:{{[ ]+}}[{{[0123456789]+}}, 150001] +COLLECT_TRACE_FOR_LONG_INPUT: ******* Success:{{[ ]+}}[0, {{[0123456789]+}}]