Diff 484052

libc/fuzzing/math/Compare.h

Show All 22 Lines	ValuesEqual(T x1, T x2) {

// For all other values, we want the values to be bitwise equal.		// For all other values, we want the values to be bitwise equal.
return bits1.uintval() == bits2.uintval();		return bits1.uintval() == bits2.uintval();
}		}

template <typename T>		template <typename T>
__llvm_libc::cpp::enable_if_t<__llvm_libc::cpp::is_integral_v<T>, bool>		__llvm_libc::cpp::enable_if_t<__llvm_libc::cpp::is_integral_v<T>, bool>
ValuesEqual(T x1, T x2) {		ValuesEqual(T x1, T x2) {
return x1 == x1;		return x1 == x2;
}		}

#endif // LLVM_LIBC_FUZZING_MATH_COMPARE_H		#endif // LLVM_LIBC_FUZZING_MATH_COMPARE_H

libc/fuzzing/stdlib/CMakeLists.txt

Show All 10 Lines	add_libc_fuzzer(
SRCS		SRCS
atof_differential_fuzz.cpp		atof_differential_fuzz.cpp
HDRS		HDRS
StringParserOutputDiff.h		StringParserOutputDiff.h
DEPENDS		DEPENDS
libc.src.stdlib.atof		libc.src.stdlib.atof
)		)

		add_libc_fuzzer(
		strtointeger_differential_fuzz
		SRCS
		strtointeger_differential_fuzz.cpp
		HDRS
		StringParserOutputDiff.h
		DEPENDS
		libc.src.stdlib.atoi
		libc.src.stdlib.atol
		libc.src.stdlib.atoll
		libc.src.stdlib.strtol
		libc.src.stdlib.strtoll
		libc.src.stdlib.strtoul
		libc.src.stdlib.strtoull
		)

libc/fuzzing/stdlib/StringParserOutputDiff.h

Show All 26 Lines	void StringParserOutputDiff(StringInputSingleOutputFunc<T> func1,

T result1 = func1(x);		T result1 = func1(x);
T result2 = func2(x);		T result2 = func2(x);

if (!ValuesEqual(result1, result2))		if (!ValuesEqual(result1, result2))
__builtin_trap();		__builtin_trap();
}		}

		template <typename T>
		using StringToNumberFunc = T ()(const char , char **, int);

		template <typename T>
		void StringToNumberOutputDiff(StringToNumberFunc<T> func1,
		StringToNumberFunc<T> func2, const uint8_t *data,
		size_t size) {
		if (size < sizeof(T))
		return;

		const char x = reinterpret_cast<const char >(data + 1);
		int base = data[0] % 36;
		base = base + ((base == 0) ? 0 : 1);

		char *outPtr1 = nullptr;
		char *outPtr2 = nullptr;

		T result1 = func1(x, &outPtr1, base);
		T result2 = func2(x, &outPtr2, base);

		if (!(ValuesEqual(result1, result2) && (outPtr1 == outPtr2)))
		__builtin_trap();
		}

#endif // LLVM_LIBC_FUZZING_STDLIB_STRING_PARSER_OUTPUT_DIFF_H		#endif // LLVM_LIBC_FUZZING_STDLIB_STRING_PARSER_OUTPUT_DIFF_H

libc/fuzzing/stdlib/strtointeger_differential_fuzz.cpp

This file was added.

				//===-- strtointeger_differential_fuzz.cpp --------------------------------===//
				//
				// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				// See https://llvm.org/LICENSE.txt for license information.
				// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				//
				//===----------------------------------------------------------------------===//
				///
				/// Fuzzing test for llvm-libc atof implementation.
				///
				//===----------------------------------------------------------------------===//
				#include "src/stdlib/atoi.h"
				#include "src/stdlib/atol.h"
				#include "src/stdlib/atoll.h"
				#include "src/stdlib/strtol.h"
				#include "src/stdlib/strtoll.h"
				#include "src/stdlib/strtoul.h"
				#include "src/stdlib/strtoull.h"
				#include <stddef.h>
				#include <stdint.h>
				#include <stdlib.h>

				#include "fuzzing/stdlib/StringParserOutputDiff.h"

				// This list contains (almost) all character that can possibly be accepted by a
				// string to integer conversion. Those are: space, tab, any digit, and any
				// letter. Technically there are some space characters accepted by isspace that
				// aren't in this list, but since space characters are just skipped over anyways
				// I'm not really worried.
				constexpr char VALID_CHARS[] = {
				' ', '\t', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a',
				sivachandraUnsubmitted Done Reply Inline Actions `constexpr` and `VALID_CHARS`. sivachandra: `constexpr` and `VALID_CHARS`.
				'A', 'b', 'B', 'c', 'C', 'd', 'D', 'e', 'E', 'f', 'F', 'g', 'G',
				'h', 'H', 'i', 'I', 'j', 'J', 'k', 'K', 'l', 'L', 'm', 'M', 'n',
				'N', 'o', 'O', 'p', 'P', 'q', 'Q', 'r', 'R', 's', 'S', 't', 'T',
				'u', 'U', 'v', 'V', 'w', 'W', 'x', 'X', 'y', 'Y', 'z', 'Z'};

				// This takes the randomized bytes in data and interprets the first byte as the
				// base for the string to integer conversion and the rest of them as a string to
				sivachandraUnsubmitted Done Reply Inline Actions Add a comment briefly explaining the fuzz strategy. Like, give the information that the first byte of of `data` will be treated as the target base for `strto` functions. sivachandra:* Add a comment briefly explaining the fuzz strategy. Like, give the information that the first…
				// be passed to the string to integer conversion. For most of the tests the
				// string is modified so that it's only made of characters that the string to
				// integer functions could accept. This is because every other character is
				// effectively identical, and will be treated as the end of the integer. For the
				// fully randomized string this gives a greater than 50% chance for each
				// character to end the string, making the odds of getting long numbers very
				// low.

				extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
				uint8_t *container = new uint8_t[size + 1];
				// Cleaner is a cleaner input for string to integer functions. It is much more
				sivachandraUnsubmitted Done Reply Inline Actions Why should we make the input clean? It defeats the purpose of fuzzing in a way. Since this is a differential fuzz setup, I am not really worried about garbage. Also, valid ASCII characters make half of the `uint8_t` range anyway. So, we don't have to worry about most of the `data` inputs being just invalid numbers. Also, the above list of valid chars does not have the `+` and `-` signs. sivachandra: Why should we make the input clean? It defeats the purpose of fuzzing in a way. Since this is a…
				michaelrjAuthorUnsubmitted Done Reply Inline Actions The reason I want to clean the input is because otherwise almost all of the inputs won't actually exercise the function at all. If the first character is a question mark (for example), then the function won't actually parse anything and it will return 0. Testing that isn't bad, but given that more than half of the ascii range falls into that category there will be a lot of cycles spent on those obvious cases instead of actually testing the branches. michaelrj: The reason I want to clean the input is because otherwise almost all of the inputs won't…
				sivachandraUnsubmitted Done Reply Inline Actions Your argument is fair. My point is that it might not help in exercising all code paths. Take decimal numbers for example. The set of valid characters is only 10 out of the 62 there are in `VALID_CHARS` now. But, there are more valid numbers as you progress to higher bases. For base 36, I think there are no invalid inputs at all? You can choose to implement two modes - one for human testing and another for continuous testing. For human testing, clean the input like you do in this patch currently. For continuous testing, don't clean the input. You will have to add two targets with a distinguishing pre-processor macro. You should add `-` and `+` to the set of valid chars. Or, you can prepend a `-` and `+` to each clean input in the human mode and test again. sivachandra: Your argument is fair. My point is that it might not help in exercising all code paths. Take…
				michaelrjAuthorUnsubmitted Done Reply Inline Actions this sounds like a good compromise to me. michaelrj: this sounds like a good compromise to me.
				// likely to actually be a value with some sort of conversion.
				uint8_t *cleaner = new uint8_t[size + 1];
				lntueUnsubmitted Not Done Reply Inline Actions It looks like `cleaner` is not used anymore and not get deleted afterward? lntue: It looks like `cleaner` is not used anymore and not get deleted afterward?
				if (!container \|\| !cleaner)
				__builtin_trap();
				size_t i;
				sivachandraUnsubmitted Done Reply Inline Actions should sivachandra: should

				for (i = 0; i < size; ++i) {
				container[i] = data[i];
				cleaner[i] = VALID_CHARS[data[i] % sizeof(VALID_CHARS)];
				}
				container[size] = '\0'; // Add null terminator to container.
				cleaner[size] = '\0';
				cleaner[0] = container[0]; // the first character is interpreted as the base,
				// so it should be fully random.

				// Check container with one pass, but most of the time it will just return 0.
				StringToNumberOutputDiff<long>(&__llvm_libc::strtol, &::strtol, container,
				size);

				StringParserOutputDiff<int>(&__llvm_libc::atoi, &::atoi, cleaner, size);
				StringParserOutputDiff<long>(&__llvm_libc::atol, &::atol, cleaner, size);
				StringParserOutputDiff<long long>(&__llvm_libc::atoll, &::atoll, cleaner,
				size);

				StringToNumberOutputDiff<long>(&__llvm_libc::strtol, &::strtol, cleaner,
				size);
				StringToNumberOutputDiff<long long>(&__llvm_libc::strtoll, &::strtoll,
				cleaner, size);

				StringToNumberOutputDiff<unsigned long>(&__llvm_libc::strtoul, &::strtoul,
				cleaner, size);
				StringToNumberOutputDiff<unsigned long long>(&__llvm_libc::strtoull,
				&::strtoull, cleaner, size);

				delete[] container;
				delete[] cleaner;
				return 0;
				}

libc/src/stdlib/atoi.cpp

	//===-- Implementation of atoi --------------------------------------------===//			//===-- Implementation of atoi --------------------------------------------===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "src/stdlib/atoi.h"			#include "src/stdlib/atoi.h"
	#include "src/__support/common.h"			#include "src/__support/common.h"
	#include "src/__support/str_to_integer.h"			#include "src/__support/str_to_integer.h"

	namespace __llvm_libc {			namespace __llvm_libc {

	LLVM_LIBC_FUNCTION(int, atoi, (const char *str)) {			LLVM_LIBC_FUNCTION(int, atoi, (const char *str)) {
	auto result = internal::strtointeger<int>(str, 10);			auto result = internal::strtointeger<long>(str, 10);
				sivachandraUnsubmitted Done Reply Inline Actions I think this change is trying to match the standard prescribed behavior. Can you please do this separately and add unit tests with inputs which after affected by this behavior if any? sivachandra: I think this change is trying to match the standard prescribed behavior. Can you please do this…
				michaelrjAuthorUnsubmitted Done Reply Inline Actions I've uploaded a separate patch here: https://reviews.llvm.org/D140350 michaelrj: I've uploaded a separate patch here: https://reviews.llvm.org/D140350
	if (result.has_error())			if (result.has_error())
	errno = result.error;			errno = result.error;

	return result;			return static_cast<int>(result);
	}			}

	} // namespace __llvm_libc			} // namespace __llvm_libc

This is an archive of the discontinued LLVM Phabricator instance.

[libc] add fuzz target for strtointeger functions
ClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 484052

libc/fuzzing/math/Compare.h

libc/fuzzing/stdlib/CMakeLists.txt

libc/fuzzing/stdlib/StringParserOutputDiff.h

libc/fuzzing/stdlib/strtointeger_differential_fuzz.cpp

libc/src/stdlib/atoi.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[libc] add fuzz target for strtointeger functionsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 484052

libc/fuzzing/math/Compare.h

libc/fuzzing/stdlib/CMakeLists.txt

libc/fuzzing/stdlib/StringParserOutputDiff.h

libc/fuzzing/stdlib/strtointeger_differential_fuzz.cpp

libc/src/stdlib/atoi.cpp

[libc] add fuzz target for strtointeger functions
ClosedPublic