diff --git a/clang/include/clang/Basic/Features.def b/clang/include/clang/Basic/Features.def --- a/clang/include/clang/Basic/Features.def +++ b/clang/include/clang/Basic/Features.def @@ -92,6 +92,7 @@ FEATURE(thread_sanitizer, LangOpts.Sanitize.has(SanitizerKind::Thread)) FEATURE(dataflow_sanitizer, LangOpts.Sanitize.has(SanitizerKind::DataFlow)) FEATURE(scudo, LangOpts.Sanitize.hasOneOf(SanitizerKind::Scudo)) +FEATURE(numericalstability_sanitizer, LangOpts.Sanitize.has(SanitizerKind::NumericalStability)) // Objective-C features FEATURE(objc_arr, LangOpts.ObjCAutoRefCount) // FIXME: REMOVE? FEATURE(objc_arc, LangOpts.ObjCAutoRefCount) diff --git a/clang/include/clang/Basic/Sanitizers.def b/clang/include/clang/Basic/Sanitizers.def --- a/clang/include/clang/Basic/Sanitizers.def +++ b/clang/include/clang/Basic/Sanitizers.def @@ -73,6 +73,9 @@ // ThreadSanitizer SANITIZER("thread", Thread) +// Numerical stability sanitizer. +SANITIZER("numerical", NumericalStability) + // LeakSanitizer SANITIZER("leak", Leak) diff --git a/clang/include/clang/Driver/SanitizerArgs.h b/clang/include/clang/Driver/SanitizerArgs.h --- a/clang/include/clang/Driver/SanitizerArgs.h +++ b/clang/include/clang/Driver/SanitizerArgs.h @@ -86,6 +86,7 @@ bool needsCfiDiagRt() const; bool needsStatsRt() const { return Stats; } bool needsScudoRt() const { return Sanitizers.has(SanitizerKind::Scudo); } + bool needsNsanRt() const { return Sanitizers.has(SanitizerKind::NumericalStability); } bool requiresPIE() const; bool needsUnwindTables() const; diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -72,6 +72,7 @@ #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" +#include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h" #include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" #include "llvm/Transforms/ObjCARC.h" @@ -359,6 +360,11 @@ PM.add(createThreadSanitizerLegacyPassPass()); } +static void addNumericalStabilitySanitizerPass(const PassManagerBuilder &Builder, + legacy::PassManagerBase &PM) { + PM.add(createNumericalStabilitySanitizerLegacyPassPass()); +} + static void addDataFlowSanitizerPass(const PassManagerBuilder &Builder, legacy::PassManagerBase &PM) { const PassManagerBuilderWrapper &BuilderWrapper = @@ -776,6 +782,13 @@ addThreadSanitizerPass); } + if (LangOpts.Sanitize.has(SanitizerKind::NumericalStability)) { + PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast, + addNumericalStabilitySanitizerPass); + PMBuilder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0, + addNumericalStabilitySanitizerPass); + } + if (LangOpts.Sanitize.has(SanitizerKind::DataFlow)) { PMBuilder.addExtension(PassManagerBuilder::EP_OptimizerLast, addDataFlowSanitizerPass); @@ -1107,6 +1120,11 @@ MPM.addPass(createModuleToFunctionPassAdaptor(ThreadSanitizerPass())); } + if (LangOpts.Sanitize.has(SanitizerKind::NumericalStability)) { + MPM.addPass(NumericalStabilitySanitizerPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(NumericalStabilitySanitizerPass())); + } + auto ASanPass = [&](SanitizerMask Mask, bool CompileKernel) { if (LangOpts.Sanitize.has(Mask)) { bool Recover = CodeGenOpts.SanitizeRecover.has(Mask); diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -411,6 +411,10 @@ !isInNoSanitizeList(SanitizerKind::Thread, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeThread); + if (getLangOpts().Sanitize.has(SanitizerKind::NumericalStability) && + !isInNoSanitizeList(SanitizerKind::NumericalStability, Fn, Loc)) + Fn->addFnAttr(llvm::Attribute::SanitizeNumericalStability); + if (getLangOpts().Sanitize.has(SanitizerKind::Memory) && !isInNoSanitizeList(SanitizerKind::Memory, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeMemory); diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -751,6 +751,8 @@ Fn->addFnAttr(llvm::Attribute::SanitizeMemTag); if (SanOpts.has(SanitizerKind::Thread)) Fn->addFnAttr(llvm::Attribute::SanitizeThread); + if (SanOpts.has(SanitizerKind::NumericalStability)) + Fn->addFnAttr(llvm::Attribute::SanitizeNumericalStability); if (SanOpts.hasOneOf(SanitizerKind::Memory | SanitizerKind::KernelMemory)) Fn->addFnAttr(llvm::Attribute::SanitizeMemory); if (SanOpts.has(SanitizerKind::SafeStack)) diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -38,7 +38,7 @@ SanitizerKind::DataFlow | SanitizerKind::HWAddress | SanitizerKind::Scudo; static const SanitizerMask NeedsUnwindTables = SanitizerKind::Address | SanitizerKind::HWAddress | SanitizerKind::Thread | - SanitizerKind::Memory | SanitizerKind::DataFlow; + SanitizerKind::Memory | SanitizerKind::DataFlow | SanitizerKind::NumericalStability; static const SanitizerMask SupportsCoverage = SanitizerKind::Address | SanitizerKind::HWAddress | SanitizerKind::KernelAddress | SanitizerKind::KernelHWAddress | @@ -49,7 +49,7 @@ SanitizerKind::DataFlow | SanitizerKind::Fuzzer | SanitizerKind::FuzzerNoLink | SanitizerKind::FloatDivideByZero | SanitizerKind::SafeStack | SanitizerKind::ShadowCallStack | - SanitizerKind::Thread | SanitizerKind::ObjCCast; + SanitizerKind::Thread | SanitizerKind::ObjCCast | SanitizerKind::NumericalStability; static const SanitizerMask RecoverableByDefault = SanitizerKind::Undefined | SanitizerKind::Integer | SanitizerKind::ImplicitConversion | SanitizerKind::Nullability | @@ -143,6 +143,7 @@ {"memtag_blacklist.txt", SanitizerKind::MemTag}, {"msan_blacklist.txt", SanitizerKind::Memory}, {"tsan_blacklist.txt", SanitizerKind::Thread}, + {"nsan_blacklist.txt", SanitizerKind::NumericalStability}, {"dfsan_abilist.txt", SanitizerKind::DataFlow}, {"cfi_blacklist.txt", SanitizerKind::CFI}, {"ubsan_blacklist.txt", diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -855,6 +855,8 @@ if (SanArgs.linkCXXRuntimes()) StaticRuntimes.push_back("tsan_cxx"); } + if (SanArgs.needsNsanRt() && SanArgs.linkRuntimes()) + StaticRuntimes.push_back("nsan"); if (!SanArgs.needsSharedRt() && SanArgs.needsUbsanRt() && SanArgs.linkRuntimes()) { if (SanArgs.requiresMinimalRuntime()) { StaticRuntimes.push_back("ubsan_minimal"); diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -891,8 +891,10 @@ Res |= SanitizerKind::Leak; if (IsX86_64 || IsMIPS64 || IsAArch64 || IsPowerPC64) Res |= SanitizerKind::Thread; - if (IsX86_64) + if (IsX86_64) { Res |= SanitizerKind::KernelMemory; + Res |= SanitizerKind::NumericalStability; + } if (IsX86 || IsX86_64) Res |= SanitizerKind::Function; if (IsX86_64 || IsMIPS64 || IsAArch64 || IsX86 || IsMIPS || IsArmArch || diff --git a/clang/runtime/CMakeLists.txt b/clang/runtime/CMakeLists.txt --- a/clang/runtime/CMakeLists.txt +++ b/clang/runtime/CMakeLists.txt @@ -115,7 +115,7 @@ COMPONENT compiler-rt) # Add top-level targets that build specific compiler-rt runtimes. - set(COMPILER_RT_RUNTIMES fuzzer asan builtins dfsan lsan msan profile tsan ubsan ubsan-minimal) + set(COMPILER_RT_RUNTIMES fuzzer asan builtins dfsan lsan msan nsan profile tsan ubsan ubsan-minimal) foreach(runtime ${COMPILER_RT_RUNTIMES}) get_ext_project_build_command(build_runtime_cmd ${runtime}) add_custom_target(${runtime} @@ -132,7 +132,7 @@ # Add top-level targets for various compiler-rt test suites. set(COMPILER_RT_TEST_SUITES check-fuzzer check-asan check-hwasan check-asan-dynamic check-dfsan - check-lsan check-msan check-sanitizer check-tsan check-ubsan check-ubsan-minimal + check-lsan check-msan check-sanitizer check-nsan check-tsan check-ubsan check-ubsan-minimal check-profile check-cfi check-cfi-and-supported check-safestack check-gwp_asan) foreach(test_suite ${COMPILER_RT_TEST_SUITES}) get_ext_project_build_command(run_test_suite ${test_suite}) diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -325,6 +325,7 @@ set(ALL_LSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64} ${ARM64} ${ARM32} ${PPC64} ${S390X} ${RISCV64}) endif() set(ALL_MSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64} ${S390X}) +set(ALL_NSAN_SUPPORTED_ARCH ${X86} ${X86_64}) set(ALL_HWASAN_SUPPORTED_ARCH ${X86_64} ${ARM64}) set(ALL_MEMPROF_SUPPORTED_ARCH ${X86_64}) set(ALL_PROFILE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC32} ${PPC64} @@ -551,6 +552,9 @@ list_intersect(MSAN_SUPPORTED_ARCH ALL_MSAN_SUPPORTED_ARCH SANITIZER_COMMON_SUPPORTED_ARCH) + list_intersect(NSAN_SUPPORTED_ARCH + ALL_NSAN_SUPPORTED_ARCH + SANITIZER_COMMON_SUPPORTED_ARCH) list_intersect(HWASAN_SUPPORTED_ARCH ALL_HWASAN_SUPPORTED_ARCH SANITIZER_COMMON_SUPPORTED_ARCH) @@ -618,6 +622,7 @@ filter_available_targets(SHADOWCALLSTACK_SUPPORTED_ARCH ${ALL_SHADOWCALLSTACK_SUPPORTED_ARCH}) filter_available_targets(GWP_ASAN_SUPPORTED_ARCH ${ALL_GWP_ASAN_SUPPORTED_ARCH}) + filter_available_targets(NSAN_SUPPORTED_ARCH ${ALL_NSAN_SUPPORTED_ARCH}) endif() if (MSVC) @@ -640,7 +645,7 @@ endif() message(STATUS "Compiler-RT supported architectures: ${COMPILER_RT_SUPPORTED_ARCH}") -set(ALL_SANITIZERS asan;dfsan;msan;hwasan;tsan;safestack;cfi;scudo;ubsan_minimal;gwp_asan) +set(ALL_SANITIZERS asan;dfsan;msan;hwasan;tsan;safestack;cfi;scudo;ubsan_minimal;gwp_asan;nsan) set(COMPILER_RT_SANITIZERS_TO_BUILD all CACHE STRING "sanitizers to build if supported on the target (all;${ALL_SANITIZERS})") list_replace(COMPILER_RT_SANITIZERS_TO_BUILD all "${ALL_SANITIZERS}") @@ -803,4 +808,11 @@ else() set(COMPILER_RT_HAS_GWP_ASAN FALSE) endif() + +if (COMPILER_RT_HAS_SANITIZER_COMMON AND NSAN_SUPPORTED_ARCH AND + OS_NAME MATCHES "Linux") + set(COMPILER_RT_HAS_NSAN TRUE) +else() + set(COMPILER_RT_HAS_NSAN FALSE) +endif() pythonize_bool(COMPILER_RT_HAS_GWP_ASAN) diff --git a/compiler-rt/include/sanitizer/nsan_interface.h b/compiler-rt/include/sanitizer/nsan_interface.h new file mode 100644 --- /dev/null +++ b/compiler-rt/include/sanitizer/nsan_interface.h @@ -0,0 +1,75 @@ +//===-- sanitizer/nsan_interface.h ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Public interface for nsan. +// +//===----------------------------------------------------------------------===// +#ifndef SANITIZER_NSAN_INTERFACE_H +#define SANITIZER_NSAN_INTERFACE_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/// User-provided default option settings. +/// +/// You can provide your own implementation of this function to return a string +/// containing NSan runtime options (for example, +/// verbosity=1:halt_on_error=0). +/// +/// \returns Default options string. +const char *__nsan_default_options(void); + +// Dumps nsan shadow data for a block of `size_bytes` bytes of application +// memory at location `addr`. +// +// Each line contains application address, shadow types, then values. +// Unknown types are shown as `__`, while known values are shown as +// `f`, `d`, `l` for float, double, and long double respectively. Position is +// shown as a single hex digit. The shadow value itself appears on the line that +// contains the first byte of the value. +// FIXME: Show both shadow and application value. +// +// Example: `__nsan_dump_shadow_mem(addr, 32, 8, 0)` might print: +// +// 0x0add7359: __ f0 f1 f2 f3 __ __ __ (42.000) +// 0x0add7361: __ d1 d2 d3 d4 d5 d6 d7 +// 0x0add7369: d8 f0 f1 f2 f3 __ __ f2 (-1.000) (12.5) +// 0x0add7371: f3 __ __ __ __ __ __ __ +// +// This means that there is: +// - a shadow double for the float at address 0x0add7360, with value 42; +// - a shadow float128 for the double at address 0x0add7362, with value -1; +// - a shadow double for the float at address 0x0add736a, with value 12.5; +// There was also a shadow double for the float at address 0x0add736e, but bytes +// f0 and f1 were overwritten by one or several stores, so that the shadow value +// is no longer valid. +// The argument `reserved` can be any value. Its true value is provided by the +// instrumentation. +void __nsan_dump_shadow_mem(const char *addr, size_t size_bytes, + size_t bytes_per_line, size_t reserved); + +// Explicitly dumps a value. +// FIXME: vector versions ? +void __nsan_dump_float(float value); +void __nsan_dump_double(double value); +void __nsan_dump_longdouble(long double value); + +// Explicitly checks a value. +// FIXME: vector versions ? +void __nsan_check_float(float value); +void __nsan_check_double(double value); +void __nsan_check_longdouble(long double value); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // SANITIZER_NSAN_INTERFACE_H diff --git a/compiler-rt/lib/nsan/CMakeLists.txt b/compiler-rt/lib/nsan/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/nsan/CMakeLists.txt @@ -0,0 +1,61 @@ +add_compiler_rt_component(nsan) + +include_directories(..) + +set(NSAN_SOURCES + nsan.cc + nsan_flags.cc + nsan_interceptors.cc + nsan_stats.cc + nsan_suppressions.cc +) + +set(NSAN_HEADERS + nsan.h + nsan_flags.h + nsan_flags.inc + nsan_platform.h + nsan_stats.h + nsan_suppressions.h +) + +append_list_if(COMPILER_RT_HAS_FPIC_FLAG -fPIC NSAN_CFLAGS) + +set(NSAN_DYNAMIC_LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS}) + +set(NSAN_CFLAGS ${SANITIZER_COMMON_CFLAGS}) +#-fno-rtti -fno-exceptions +# -nostdinc++ -pthread -fno-omit-frame-pointer) + +# Remove -stdlib= which is unused when passing -nostdinc++. +# string(REGEX REPLACE "-stdlib=[a-zA-Z+]*" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) + +if (COMPILER_RT_HAS_NSAN) + foreach(arch ${NSAN_SUPPORTED_ARCH}) + add_compiler_rt_runtime( + clang_rt.nsan + STATIC + ARCHS ${arch} + SOURCES ${NSAN_SOURCES} + $ + $ + $ + $ + $ + $ + ADDITIONAL_HEADERS ${NSAN_HEADERS} + CFLAGS ${NSAN_CFLAGS} + PARENT_TARGET nsan + ) + endforeach() + + add_compiler_rt_object_libraries(RTNsan + ARCHS ${NSAN_SUPPORTED_ARCH} + SOURCES ${NSAN_SOURCES} + ADDITIONAL_HEADERS ${NSAN_HEADERS} + CFLAGS ${NSAN_CFLAGS}) +endif() + +if(COMPILER_RT_INCLUDE_TESTS) + add_subdirectory(tests) +endif() diff --git a/compiler-rt/lib/nsan/nsan.h b/compiler-rt/lib/nsan/nsan.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/nsan/nsan.h @@ -0,0 +1,224 @@ +//===-- nsan.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of NumericalStabilitySanitizer. +// +// Private NSan header. +//===----------------------------------------------------------------------===// + +#ifndef NSAN_H +#define NSAN_H + +#include "sanitizer_common/sanitizer_internal_defs.h" + +using __sanitizer::sptr; +using __sanitizer::u16; +using __sanitizer::uptr; + +#include "nsan_platform.h" + +#include +#include +#include +#include +#include + +// Private nsan interface. Used e.g. by interceptors. +extern "C" { + +// This marks the shadow type of the given block of application memory as +// unknown. +// printf-free (see comment in nsan_interceptors.cc). +void __nsan_set_value_unknown(const char *addr, uptr size); + +// Copies annotations in the shadow memory for a block of application memory to +// a new address. This function is used together with memory-copying functions +// in application memory, e.g. the instrumentation inserts +// `__nsan_copy_values(dest, src, size)` after builtin calls to +// `memcpy(dest, src, size)`. Intercepted memcpy calls also call this function. +// printf-free (see comment in nsan_interceptors.cc). +void __nsan_copy_values(const char *daddr, const char *saddr, uptr size); + +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE const char * +__nsan_default_options(); +} + +namespace __nsan { + +extern bool NsanInitialized; +extern bool NsanInitIsRunning; + +void initializeInterceptors(); + +// See notes in nsan_platform. +// printf-free (see comment in nsan_interceptors.cc). +inline char *getShadowAddrFor(char *Ptr) { + uptr AppOffset = ((uptr)Ptr) & ShadowMask(); + return (char *)(AppOffset * kShadowScale + ShadowAddr()); +} + +// printf-free (see comment in nsan_interceptors.cc). +inline const char *getShadowAddrFor(const char *Ptr) { + return getShadowAddrFor(const_cast(Ptr)); +} + +// printf-free (see comment in nsan_interceptors.cc). +inline unsigned char *getShadowTypeAddrFor(char *Ptr) { + uptr AppOffset = ((uptr)Ptr) & ShadowMask(); + return (unsigned char *)(AppOffset + TypesAddr()); +} + +// printf-free (see comment in nsan_interceptors.cc). +inline const unsigned char *getShadowTypeAddrFor(const char *Ptr) { + return getShadowTypeAddrFor(const_cast(Ptr)); +} + +// Information about value types and their shadow counterparts. +template struct FTInfo {}; +template <> struct FTInfo { + using orig_type = float; + using orig_bits_type = __sanitizer::u32; + using mantissa_bits_type = __sanitizer::u32; + using shadow_type = double; + static const char* kCppTypeName; + static constexpr unsigned kMantissaBits = 23; + static constexpr const int kExponentBits = 8; + static constexpr const int kExponentBias = 127; + static constexpr const int kValueType = kFloatValueType; + static constexpr const char kTypePattern[sizeof(float)] = { + static_cast(kValueType | (0 << kValueSizeSizeBits)), + static_cast(kValueType | (1 << kValueSizeSizeBits)), + static_cast(kValueType | (2 << kValueSizeSizeBits)), + static_cast(kValueType | (3 << kValueSizeSizeBits)), + }; + static constexpr const float kEpsilon = FLT_EPSILON; +}; +template <> struct FTInfo { + using orig_type = double; + using orig_bits_type = __sanitizer::u64; + using mantissa_bits_type = __sanitizer::u64; + using shadow_type = __float128; + static const char *kCppTypeName; + static constexpr unsigned kMantissaBits = 52; + static constexpr const int kExponentBits = 11; + static constexpr const int kExponentBias = 1023; + static constexpr const int kValueType = kDoubleValueType; + static constexpr char kTypePattern[sizeof(double)] = { + static_cast(kValueType | (0 << kValueSizeSizeBits)), + static_cast(kValueType | (1 << kValueSizeSizeBits)), + static_cast(kValueType | (2 << kValueSizeSizeBits)), + static_cast(kValueType | (3 << kValueSizeSizeBits)), + static_cast(kValueType | (4 << kValueSizeSizeBits)), + static_cast(kValueType | (5 << kValueSizeSizeBits)), + static_cast(kValueType | (6 << kValueSizeSizeBits)), + static_cast(kValueType | (7 << kValueSizeSizeBits)), + }; + static constexpr const float kEpsilon = DBL_EPSILON; +}; +template <> struct FTInfo { + using orig_type = long double; + using mantissa_bits_type = __sanitizer::u64; + using shadow_type = __float128; + static const char* kCppTypeName; + static constexpr unsigned kMantissaBits = 63; + static constexpr const int kExponentBits = 15; + static constexpr const int kExponentBias = (1 << (kExponentBits - 1)) - 1; + static constexpr const int kValueType = kFp80ValueType; + static constexpr char kTypePattern[sizeof(long double)] = { + static_cast(kValueType | (0 << kValueSizeSizeBits)), + static_cast(kValueType | (1 << kValueSizeSizeBits)), + static_cast(kValueType | (2 << kValueSizeSizeBits)), + static_cast(kValueType | (3 << kValueSizeSizeBits)), + static_cast(kValueType | (4 << kValueSizeSizeBits)), + static_cast(kValueType | (5 << kValueSizeSizeBits)), + static_cast(kValueType | (6 << kValueSizeSizeBits)), + static_cast(kValueType | (7 << kValueSizeSizeBits)), + static_cast(kValueType | (8 << kValueSizeSizeBits)), + static_cast(kValueType | (9 << kValueSizeSizeBits)), + static_cast(kValueType | (10 << kValueSizeSizeBits)), + static_cast(kValueType | (11 << kValueSizeSizeBits)), + static_cast(kValueType | (12 << kValueSizeSizeBits)), + static_cast(kValueType | (13 << kValueSizeSizeBits)), + static_cast(kValueType | (14 << kValueSizeSizeBits)), + static_cast(kValueType | (15 << kValueSizeSizeBits)), + }; + static constexpr const float kEpsilon = LDBL_EPSILON; +}; + +template <> struct FTInfo<__float128> { + using orig_type = __float128; + using orig_bits_type = __uint128_t; + using mantissa_bits_type = __uint128_t; + static const char* kCppTypeName; + static constexpr unsigned kMantissaBits = 112; + static constexpr const int kExponentBits = 15; + static constexpr const int kExponentBias = (1 << (kExponentBits - 1)) - 1; +}; + +constexpr double kMaxULPDiff = INFINITY; + +// Helper for getULPDiff that works on bit representations. +template double getULPDiffBits(BT V1Bits, BT V2Bits) { + // If the integer representations of two same-sign floats are subtracted then + // the absolute value of the result is equal to one plus the number of + // representable floats between them. + return V1Bits >= V2Bits ? V1Bits - V2Bits : V2Bits - V1Bits; +} + +// Returns the the number of floating point values between V1 and V2, capped to +// u64max. Return 0 for (-0.0,0.0). +template double getULPDiff(FT V1, FT V2) { + if (V1 == V2) { + return 0; // Typically, -0.0 and 0.0 + } + using BT = typename FTInfo::orig_bits_type; + static_assert(sizeof(FT) == sizeof(BT), "not implemented"); + static_assert(sizeof(BT) <= 64, "not implemented"); + BT V1Bits; + __builtin_memcpy(&V1Bits, &V1, sizeof(BT)); + BT V2Bits; + __builtin_memcpy(&V2Bits, &V2, sizeof(BT)); + // Check whether the signs differ. IEEE-754 float types always store the sign + // in the most significant bit. NaNs and infinities are handled by the calling + // code. + constexpr const BT kSignMask = BT{1} << (CHAR_BIT * sizeof(BT) - 1); + if ((V1Bits ^ V2Bits) & kSignMask) { + // Signs differ. We can get the ULPs as `getULPDiff(negative_number, -0.0) + // + getULPDiff(0.0, positive_number)`. + if (V1Bits & kSignMask) { + return getULPDiffBits(V1Bits, kSignMask) + + getULPDiffBits(0, V2Bits); + } else { + return getULPDiffBits(V2Bits, kSignMask) + + getULPDiffBits(0, V1Bits); + } + } + return getULPDiffBits(V1Bits, V2Bits); +} + +// FIXME: This needs mor work: Because there is no 80-bit integer type, we have +// to go through __uint128_t. Therefore the assumptions about the sign bit do +// not hold. +template <> inline double getULPDiff(long double V1, long double V2) { + using BT = __uint128_t; + BT V1Bits = 0; + __builtin_memcpy(&V1Bits, &V1, sizeof(long double)); + BT V2Bits = 0; + __builtin_memcpy(&V2Bits, &V2, sizeof(long double)); + if ((V1Bits ^ V2Bits) & (BT{1} << (CHAR_BIT * sizeof(BT) - 1))) + return (V1 == V2) ? __sanitizer::u64{0} : kMaxULPDiff; // Signs differ. + // If the integer representations of two same-sign floats are subtracted then + // the absolute value of the result is equal to one plus the number of + // representable floats between them. + BT Diff = V1Bits >= V2Bits ? V1Bits - V2Bits : V2Bits - V1Bits; + return Diff >= kMaxULPDiff ? kMaxULPDiff : Diff; +} + +} // end namespace __nsan + +#endif // NSAN_H diff --git a/compiler-rt/lib/nsan/nsan.cc b/compiler-rt/lib/nsan/nsan.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/nsan/nsan.cc @@ -0,0 +1,832 @@ +//===-- nsan.cc -----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// NumericalStabilitySanitizer runtime. +// +// This implements: +// - The public nsan interface (include/sanitizer/nsan_interface.h). +// - The private nsan interface (./nsan.h). +// - The internal instrumentation interface. These are function emitted by the +// instrumentation pass: +// * __nsan_get_shadow_ptr_for_{float,double,longdouble}_load +// These return the shadow memory pointer for loading the shadow value, +// after checking that the types are consistent. If the types are not +// consistent, returns nullptr. +// * __nsan_get_shadow_ptr_for_{float,double,longdouble}_store +// Sets the shadow types appropriately and returns the shadow memory +// pointer for storing the shadow value. +// * __nsan_internal_check_{float,double,long double}_{f,d,l} checks the +// accuracy of a value against its shadow and emits a warning depending +// on the runtime configuration. The middle part indicates the type of +// the application value, the suffix (f,d,l) indicates the type of the +// shadow, and depends on the instrumentation configuration. +// * __nsan_fcmp_fail_* emits a warning for an fcmp instruction whose +// corresponding shadow fcmp result differs. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +#include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "sanitizer_common/sanitizer_report_decorator.h" +#include "sanitizer_common/sanitizer_stacktrace.h" +#include "sanitizer_common/sanitizer_symbolizer.h" + +#include "nsan/nsan.h" +#include "nsan/nsan_flags.h" +#include "nsan/nsan_stats.h" +#include "nsan/nsan_suppressions.h" + +using namespace __sanitizer; +using namespace __nsan; + +static constexpr const int kMaxVectorWidth = 8; + +// When copying application memory, we also copy its shadow and shadow type. +// FIXME: We could provide fixed-size versions that would nicely +// vectorize for known sizes. +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void +__nsan_copy_values(const char *daddr, const char *saddr, uptr size) { + internal_memmove((void *)getShadowTypeAddrFor(daddr), + getShadowTypeAddrFor(saddr), size); + internal_memmove((void *)getShadowAddrFor(daddr), getShadowAddrFor(saddr), + size * kShadowScale); +} + +// FIXME: We could provide fixed-size versions that would nicely +// vectorize for known sizes. +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void +__nsan_set_value_unknown(const char *addr, uptr size) { + internal_memset((void *)getShadowTypeAddrFor(addr), 0, size); +} + +namespace __nsan { + +const char *FTInfo::kCppTypeName = "float"; +const char *FTInfo::kCppTypeName = "double"; +const char *FTInfo::kCppTypeName = "long double"; +const char *FTInfo<__float128>::kCppTypeName = "__float128"; + +const char FTInfo::kTypePattern[sizeof(float)]; +const char FTInfo::kTypePattern[sizeof(double)]; +const char FTInfo::kTypePattern[sizeof(long double)]; + +// Helper for __nsan_dump_shadow_mem: Reads the value at address `Ptr`, +// identified by its type id. +template __float128 readShadowInternal(const char *Ptr) { + ShadowFT Shadow; + __builtin_memcpy(&Shadow, Ptr, sizeof(Shadow)); + return Shadow; +} + +__float128 readShadow(const char *Ptr, const char ShadowTypeId) { + switch (ShadowTypeId) { + case 'd': + return readShadowInternal(Ptr); + case 'l': + return readShadowInternal(Ptr); + case 'q': + return readShadowInternal<__float128>(Ptr); + default: + return 0.0; + } +} + +class Decorator : public __sanitizer::SanitizerCommonDecorator { +public: + Decorator() : SanitizerCommonDecorator() {} + const char *Warning() { return Red(); } + const char *Name() { return Green(); } + const char *End() { return Default(); } +}; + +namespace { + +// Workaround for the fact that Printf() does not support floats. +struct PrintBuffer { + char Buffer[64]; +}; +template struct FTPrinter {}; + +template <> struct FTPrinter { + static PrintBuffer dec(double Value) { + PrintBuffer Result; + snprintf(Result.Buffer, sizeof(Result.Buffer) - 1, "%.20f", Value); + return Result; + } + static PrintBuffer hex(double Value) { + PrintBuffer Result; + snprintf(Result.Buffer, sizeof(Result.Buffer) - 1, "%.20a", Value); + return Result; + } +}; + +template <> struct FTPrinter : FTPrinter {}; + +template <> struct FTPrinter { + static PrintBuffer dec(long double Value) { + PrintBuffer Result; + snprintf(Result.Buffer, sizeof(Result.Buffer) - 1, "%.20Lf", Value); + return Result; + } + static PrintBuffer hex(long double Value) { + PrintBuffer Result; + snprintf(Result.Buffer, sizeof(Result.Buffer) - 1, "%.20La", Value); + return Result; + } +}; + +// FIXME: print with full precision. +template <> struct FTPrinter<__float128> : FTPrinter {}; + +// This is a template so that there are no implicit conversions. +template inline FT ftAbs(FT V); + +template <> inline long double ftAbs(long double V) { return fabsl(V); } +template <> inline double ftAbs(double V) { return fabs(V); } + +// We don't care about nans. +// std::abs(__float128) code is suboptimal and generates a function call to +// __getf2(). +template inline FT ftAbs(FT V) { return V >= FT{0} ? V : -V; } + +template +struct LargestFTImpl { + using type = FT2; +}; + +template +struct LargestFTImpl { + using type = FT1; +}; + +template +using LargestFT = + typename LargestFTImpl sizeof(FT2))>::type; + +template +T max(T a, T b) { return a < b ? b : a; } + +} // end anonymous namespace + +} // end namespace __nsan + +void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, uptr bp, + void *context, + bool request_fast, + u32 max_depth) { + using namespace __nsan; + return Unwind(max_depth, pc, bp, context, 0, 0, false); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __nsan_print_accumulated_stats() { + if (nsan_stats) + nsan_stats->print(); +} + +static void nsanAtexit() { + Printf("Numerical Sanitizer exit stats:\n"); + __nsan_print_accumulated_stats(); + nsan_stats = nullptr; +} + +// The next three functions return a pointer for storing a shadow value for `n` +// values, after setting the shadow types. We return the pointer instead of +// storing ourselves because it avoids having to rely on the calling convention +// around long double being the same for nsan and the target application. +// We have to have 3 versions because we need to know which type we are storing +// since we are setting the type shadow memory. +template +static char *getShadowPtrForStore(char *StoreAddr, uptr N) { + unsigned char *ShadowType = getShadowTypeAddrFor(StoreAddr); + for (uptr I = 0; I < N; ++I) { + __builtin_memcpy(ShadowType + I * sizeof(FT), FTInfo::kTypePattern, + sizeof(FTInfo::kTypePattern)); + } + return getShadowAddrFor(StoreAddr); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE char * +__nsan_get_shadow_ptr_for_float_store(char *store_addr, uptr n) { + return getShadowPtrForStore(store_addr, n); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE char * +__nsan_get_shadow_ptr_for_double_store(char *store_addr, uptr n) { + return getShadowPtrForStore(store_addr, n); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE char * +__nsan_get_shadow_ptr_for_longdouble_store(char *store_addr, uptr n) { + return getShadowPtrForStore(store_addr, n); +} + +template +static bool isValidShadowType(const unsigned char *ShadowType) { + return __builtin_memcmp(ShadowType, FTInfo::kTypePattern, sizeof(FT)) == + 0; +} + +template static bool isZero(const T *Ptr) { + constexpr const char kZeros[kSize] = {}; // Zero initialized. + return __builtin_memcmp(Ptr, kZeros, kSize) == 0; +} + +template +static bool isUnknownShadowType(const unsigned char *ShadowType) { + return isZero::kTypePattern)>(ShadowType); +} + +// The three folowing functions check that the address stores a complete +// shadow value of the given type and return a pointer for loading. +// They return nullptr if the type of the value is unknown or incomplete. +template +static const char *getShadowPtrForLoad(const char *LoadAddr, uptr N) { + const unsigned char *const ShadowType = getShadowTypeAddrFor(LoadAddr); + for (uptr I = 0; I < N; ++I) { + if (!isValidShadowType(ShadowType + I * sizeof(FT))) { + // If loadtracking stats are enabled, log loads with invalid types + // (tampered with through type punning). + if (flags().enable_loadtracking_stats) { + if (isUnknownShadowType(ShadowType + I * sizeof(FT))) { + // Warn only if the value is non-zero. Zero is special because + // applications typically initialize large buffers to zero in an + // untyped way. + if (!isZero(LoadAddr)) { + GET_CALLER_PC_BP; + nsan_stats->addUnknownLoadTrackingEvent(pc, bp); + } + } else { + GET_CALLER_PC_BP; + nsan_stats->addInvalidLoadTrackingEvent(pc, bp); + } + } + return nullptr; + } + } + return getShadowAddrFor(LoadAddr); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE const char * +__nsan_get_shadow_ptr_for_float_load(const char *load_addr, uptr n) { + return getShadowPtrForLoad(load_addr, n); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE const char * +__nsan_get_shadow_ptr_for_double_load(const char *load_addr, uptr n) { + return getShadowPtrForLoad(load_addr, n); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE const char * +__nsan_get_shadow_ptr_for_longdouble_load(const char *load_addr, uptr n) { + return getShadowPtrForLoad(load_addr, n); +} + +// Returns the raw shadow pointer. The returned pointer should be considered +// opaque. +extern "C" SANITIZER_INTERFACE_ATTRIBUTE char * +__nsan_internal_get_raw_shadow_ptr(const char *addr) { + return getShadowAddrFor(const_cast(addr)); +} + +// Returns the raw shadow type pointer. The returned pointer should be +// considered opaque. +extern "C" SANITIZER_INTERFACE_ATTRIBUTE char * +__nsan_internal_get_raw_shadow_type_ptr(const char *addr) { + return reinterpret_cast( + getShadowTypeAddrFor(const_cast(addr))); +} + +static ValueType getValueType(unsigned char c) { + return static_cast(c & 0x3); +} + +static int getValuePos(unsigned char c) { return c >> kValueSizeSizeBits; } + +// Checks the consistency of the value types at the given type pointer. +// If the value is inconsistent, returns ValueType::kUnknown. Else, return the +// consistent type. +template +static bool checkValueConsistency(const unsigned char *ShadowType) { + const int Pos = getValuePos(*ShadowType); + // Check that all bytes from the start of the value are ordered. + for (uptr I = 0; I < sizeof(FT); ++I) { + const unsigned char T = *(ShadowType - Pos + I); + if (!(getValueType(T) == FTInfo::kValueType && getValuePos(T) == I)) { + return false; + } + } + return true; +} + +// The instrumentation automatically appends `shadow_value_type_ids`, see +// maybeAddSuffixForNsanInterface. +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void +__nsan_dump_shadow_mem(const char *addr, size_t size_bytes, + size_t bytes_per_line, size_t shadow_value_type_ids) { + const unsigned char *const ShadowType = getShadowTypeAddrFor(addr); + const char *const Shadow = getShadowAddrFor(addr); + + constexpr const int kMaxNumDecodedValues = 16; + __float128 DecodedValues[kMaxNumDecodedValues]; + int NumDecodedValues = 0; + if (bytes_per_line > 4 * kMaxNumDecodedValues) { + bytes_per_line = 4 * kMaxNumDecodedValues; + } + + // We keep track of the current type and position as we go. + ValueType LastValueTy = kUnknownValueType; + int LastPos = -1; + size_t Offset = 0; + for (size_t R = 0; R < (size_bytes + bytes_per_line - 1) / bytes_per_line; ++R) { + printf("%p: ", (void*)(addr + R * bytes_per_line)); + for (size_t C = 0; C < bytes_per_line && Offset < size_bytes; ++C) { + const ValueType ValueTy = getValueType(ShadowType[Offset]); + const int pos = getValuePos(ShadowType[Offset]); + if (ValueTy == LastValueTy && pos == LastPos + 1) { + ++LastPos; + } else { + LastValueTy = ValueTy; + LastPos = pos == 0 ? 0 : -1; + } + + switch (ValueTy) { + case kUnknownValueType: + printf("__ "); + break; + case kFloatValueType: + printf("f%x ", pos); + if (LastPos == sizeof(float) - 1) { + DecodedValues[NumDecodedValues] = + readShadow(Shadow + kShadowScale * (Offset + 1 - sizeof(float)), + static_cast(shadow_value_type_ids & 0xff)); + ++NumDecodedValues; + } + break; + case kDoubleValueType: + printf("d%x ", pos); + if (LastPos == sizeof(double) - 1) { + DecodedValues[NumDecodedValues] = readShadow( + Shadow + kShadowScale * (Offset + 1 - sizeof(double)), + static_cast((shadow_value_type_ids >> 8) & 0xff)); + ++NumDecodedValues; + } + break; + case kFp80ValueType: + printf("l%x ", pos); + if (LastPos == sizeof(long double) - 1) { + DecodedValues[NumDecodedValues] = readShadow( + Shadow + kShadowScale * (Offset + 1 - sizeof(long double)), + static_cast((shadow_value_type_ids >> 16) & 0xff)); + ++NumDecodedValues; + } + break; + } + ++Offset; + } + for (int I = 0; I < NumDecodedValues; ++I) { + printf(" (%s)", FTPrinter<__float128>::dec(DecodedValues[I]).Buffer); + } + NumDecodedValues = 0; + printf("\n"); + } +} + +SANITIZER_INTERFACE_ATTRIBUTE +ALIGNED(16) +THREADLOCAL +uptr __nsan_shadow_ret_tag = 0; + +SANITIZER_INTERFACE_ATTRIBUTE +ALIGNED(16) +THREADLOCAL +char __nsan_shadow_ret_ptr[kMaxVectorWidth * sizeof(__float128)]; + +SANITIZER_INTERFACE_ATTRIBUTE +ALIGNED(16) +THREADLOCAL +uptr __nsan_shadow_args_tag = 0; + +// Maximum number of args. This should be enough for anyone (tm). An alternate +// scheme is to have the generated code create an alloca and make +// __nsan_shadow_args_ptr point ot the alloca. +constexpr const int kMaxNumArgs = 128; +SANITIZER_INTERFACE_ATTRIBUTE +ALIGNED(16) +THREADLOCAL +char __nsan_shadow_args_ptr[kMaxVectorWidth * kMaxNumArgs * sizeof(__float128)]; + +enum ContinuationType { // Keep in sync with instrumentation pass. + kContinueWithShadow = 0, + kResumeFromValue = 1, +}; + +// Checks the consistency between application and shadow value. Returns true +// when the instrumented code should resume computations from the original value +// rather than the shadow value. This prevents one error to propagate to all +// subsequent operations. This behaviour is tunable with flags. +template +int32_t checkFT(const FT Value, ShadowFT Shadow, CheckTypeT CheckType, + uptr CheckArg) { + // We do all comparisons in the InternalFT domain, which is the largest FT + // type. + using InternalFT = LargestFT; + const InternalFT CheckValue = Value; + const InternalFT CheckShadow = Shadow; + + // See this article for an interesting discussion of how to compare floats: + // https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/ + static constexpr const FT Eps = FTInfo::kEpsilon; + + const InternalFT AbsErr = ftAbs(CheckValue - CheckShadow); + + if (flags().enable_check_stats) { + GET_CALLER_PC_BP; + // We are re-computing `Largest` here because this is a cold branch, and we + // want to avoid having to move the computation of `Largest` before the + // absolute value check when this branch is not taken. + const InternalFT Largest = max(ftAbs(CheckValue), ftAbs(CheckShadow)); + nsan_stats->addCheck(CheckType, pc, bp, AbsErr / Largest); + } + + // Note: writing the comparison that way ensures that when `AbsErr` is Nan + // (value and shadow are inf or -inf), we pass the test. + if (!(AbsErr >= flags().cached_absolute_error_threshold)) + return kContinueWithShadow; + + const InternalFT Largest = max(ftAbs(CheckValue), ftAbs(CheckShadow)); + if (AbsErr * (1ull << flags().log2_max_relative_error) <= Largest) + return kContinueWithShadow; // No problem here. + + if (!flags().disable_warnings) { + GET_CALLER_PC_BP; + BufferedStackTrace stack; + stack.Unwind(pc, bp, nullptr, false); + if (GetSuppressionForStack(&stack, kSuppressionConsistency)) { + // FIXME: optionally print. + return flags().resume_after_suppression ? kResumeFromValue + : kContinueWithShadow; + } + + Decorator D; + Printf("%s", D.Warning()); + // Printf does not support float formatting. + char RelErrBuf[64] = "inf"; + if (Largest > Eps) { + snprintf(RelErrBuf, sizeof(RelErrBuf) - 1, "%.20Lf%% (2^%.0Lf epsilons)", + static_cast(100.0 * AbsErr / Largest), + log2l(static_cast(AbsErr / Largest / Eps))); + } + char UlpErrBuf[128] = ""; + const double ShadowUlpDiff = getULPDiff(CheckValue, CheckShadow); + if (ShadowUlpDiff != kMaxULPDiff) { + // This is the ULP diff in the internal domain. The user actually cares + // about that in the original domain. + const double UlpDiff = + ShadowUlpDiff / (u64{1} << (FTInfo::kMantissaBits - + FTInfo::kMantissaBits)); + snprintf(UlpErrBuf, sizeof(UlpErrBuf) - 1, + "(%.0f ULPs == %.1f digits == %.1f bits)", UlpDiff, + log10(UlpDiff), log2(UlpDiff)); + } + Printf("WARNING: NumericalStabilitySanitizer: inconsistent shadow results"); + switch (CheckType) { + case CheckTypeT::kUnknown: + case CheckTypeT::kFcmp: + case CheckTypeT::kMaxCheckType: + break; + case CheckTypeT::kRet: + Printf(" while checking return value"); + break; + case CheckTypeT::kArg: + Printf(" while checking call argument #%d", static_cast(CheckArg)); + break; + case CheckTypeT::kLoad: + Printf(" while checking load from address 0x%x. This is due to incorrect " + "shadow memory tracking, typically due to uninstrumented code " + "writing to memory.", + CheckArg); + break; + case CheckTypeT::kStore: + Printf(" while checking store to address 0x%x", CheckArg); + break; + case CheckTypeT::kInsert: + Printf(" while checking vector insert"); + break; + case CheckTypeT::kUser: + Printf(" in user-initiated check"); + break; + } + using ValuePrinter = FTPrinter; + using ShadowPrinter = FTPrinter; + Printf("\n" + "%-12s precision (native): dec: %s hex: %s\n" + "%-12s precision (shadow): dec: %s hex: %s\n" + "shadow truncated to %-12s: dec: %s hex: %s\n" + "Relative error: %s\n" + "Absolute error: %s\n" + "%s\n", + FTInfo::kCppTypeName, ValuePrinter::dec(Value).Buffer, + ValuePrinter::hex(Value).Buffer, FTInfo::kCppTypeName, + ShadowPrinter::dec(Shadow).Buffer, ShadowPrinter::hex(Shadow).Buffer, + FTInfo::kCppTypeName, ValuePrinter::dec(Shadow).Buffer, + ValuePrinter::hex(Shadow).Buffer, RelErrBuf, + ValuePrinter::hex(AbsErr).Buffer, UlpErrBuf, D.End()); + stack.Print(); + } + + if (flags().enable_warning_stats) { + GET_CALLER_PC_BP; + nsan_stats->addWarning(CheckType, pc, bp, AbsErr / Largest); + } + + if (flags().halt_on_error) { + Printf("Exiting\n"); + Die(); + } + return flags().resume_after_warning ? kResumeFromValue : kContinueWithShadow; +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE int32_t +__nsan_internal_check_float_d(float value, double shadow, int32_t check_type, + uptr check_arg) { + return checkFT(value, shadow, static_cast(check_type), check_arg); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE int32_t +__nsan_internal_check_double_l(double value, long double shadow, + int32_t check_type, uptr check_arg) { + return checkFT(value, shadow, static_cast(check_type), check_arg); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE int32_t +__nsan_internal_check_double_q(double value, __float128 shadow, + int32_t check_type, uptr check_arg) { + return checkFT(value, shadow, static_cast(check_type), check_arg); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE int32_t +__nsan_internal_check_longdouble_q(long double value, __float128 shadow, + int32_t check_type, uptr check_arg) { + return checkFT(value, shadow, static_cast(check_type), check_arg); +} + +static const char *getTruthValueName(bool v) { return v ? "true" : "false"; } + +// This uses the same values as CmpInst::Predicate. +static const char *getPredicateName(int v) { + switch (v) { + case 0: + return "(false)"; + case 1: + return "=="; + case 2: + return ">"; + case 3: + return ">="; + case 4: + return "<"; + case 5: + return "<="; + case 6: + return "!="; + case 7: + return "(ordered)"; + case 8: + return "(unordered)"; + case 9: + return "=="; + case 10: + return ">"; + case 11: + return ">="; + case 12: + return "<"; + case 13: + return "<="; + case 14: + return "!="; + case 15: + return "(true)"; + } + return "??"; +} + +template +void fCmpFailFT(const FT Lhs, const FT Rhs, ShadowFT LhsShadow, + ShadowFT RhsShadow, int Predicate, bool Result, + bool ShadowResult) { + if (Result == ShadowResult) { + // When a vector comparison fails, we fail each element of the comparison + // to simplify instrumented code. Skip elements where the shadow comparison + // gave the same result as the original one. + return; + } + + GET_CALLER_PC_BP; + BufferedStackTrace Stack; + Stack.Unwind(pc, bp, nullptr, false); + + if (GetSuppressionForStack(&Stack, kSuppressionFcmp)) { + // FIXME: optionally print. + return; + } + + if (flags().enable_warning_stats) { + nsan_stats->addWarning(CheckTypeT::kFcmp, pc, bp, 0.0); + } + + if (flags().disable_warnings) { + return; + } + + // FIXME: ideally we would print the shadow value as FP128. Right now because + // we truncate to long double we can sometimes see stuff like: + // shadow == (false) + using ValuePrinter = FTPrinter; + using ShadowPrinter = FTPrinter; + Decorator D; + const char *const PredicateName = getPredicateName(Predicate); + Printf("%s", D.Warning()); + Printf("WARNING: NumericalStabilitySanitizer: floating-point comparison " + "results depend on precision\n" + "%-12s precision dec (native): %s %s %s (%s)\n" + "%-12s precision dec (shadow): %s %s %s (%s)\n" + "%-12s precision hex (native): %s %s %s (%s)\n" + "%-12s precision hex (shadow): %s %s %s (%s)\n" + "%s", + // Native, decimal. + FTInfo::kCppTypeName, ValuePrinter::dec(Lhs).Buffer, PredicateName, + ValuePrinter::dec(Rhs).Buffer, getTruthValueName(Result), + // Shadow, decimal + FTInfo::kCppTypeName, ShadowPrinter::dec(LhsShadow).Buffer, + PredicateName, ShadowPrinter::dec(RhsShadow).Buffer, + getTruthValueName(ShadowResult), + // Native, hex. + FTInfo::kCppTypeName, ValuePrinter::hex(Lhs).Buffer, PredicateName, + ValuePrinter::hex(Rhs).Buffer, getTruthValueName(Result), + // Shadow, hex + FTInfo::kCppTypeName, ShadowPrinter::hex(LhsShadow).Buffer, + PredicateName, ShadowPrinter::hex(RhsShadow).Buffer, + getTruthValueName(ShadowResult), D.End()); + Printf("%s", D.Default()); + Stack.Print(); + if (flags().halt_on_error) { + Printf("Exiting\n"); + Die(); + } +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void +__nsan_fcmp_fail_float_d(float lhs, float rhs, double lhs_shadow, + double rhs_shadow, int predicate, bool result, + bool shadow_result) { + fCmpFailFT(lhs, rhs, lhs_shadow, rhs_shadow, predicate, result, + shadow_result); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void +__nsan_fcmp_fail_double_q(double lhs, double rhs, __float128 lhs_shadow, + __float128 rhs_shadow, int predicate, bool result, + bool shadow_result) { + fCmpFailFT(lhs, rhs, lhs_shadow, rhs_shadow, predicate, result, + shadow_result); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void +__nsan_fcmp_fail_double_l(double lhs, double rhs, long double lhs_shadow, + long double rhs_shadow, int predicate, bool result, + bool shadow_result) { + fCmpFailFT(lhs, rhs, lhs_shadow, rhs_shadow, predicate, result, + shadow_result); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void +__nsan_fcmp_fail_longdouble_q(long double lhs, long double rhs, + __float128 lhs_shadow, __float128 rhs_shadow, + int predicate, bool result, bool shadow_result) { + fCmpFailFT(lhs, rhs, lhs_shadow, rhs_shadow, predicate, result, + shadow_result); +} + +template void checkFTFromShadowStack(const FT Value) { + // Get the shadow 2FT value from the shadow stack. Note that + // __nsan_check_{float,double,long double} is a function like any other, so + // the instrumentation will have placed the shadow value on the shadow stack. + using ShadowFT = typename FTInfo::shadow_type; + ShadowFT Shadow; + __builtin_memcpy(&Shadow, __nsan_shadow_args_ptr, sizeof(ShadowFT)); + checkFT(Value, Shadow, CheckTypeT::kUser, 0); +} + +// FIXME: Add suffixes and let the instrumentation pass automatically add +// suffixes. +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __nsan_check_float(float Value) { + assert(__nsan_shadow_args_tag == (uptr)&__nsan_check_float && + "__nsan_check_float called from non-instrumented function"); + checkFTFromShadowStack(Value); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void +__nsan_check_double(double Value) { + assert(__nsan_shadow_args_tag == (uptr)&__nsan_check_double && + "__nsan_check_double called from non-instrumented function"); + checkFTFromShadowStack(Value); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void +__nsan_check_longdouble(long double Value) { + assert(__nsan_shadow_args_tag == (uptr)&__nsan_check_longdouble && + "__nsan_check_longdouble called from non-instrumented function"); + checkFTFromShadowStack(Value); +} + +template static void dumpFTFromShadowStack(const FT Value) { + // Get the shadow 2FT value from the shadow stack. Note that + // __nsan_dump_{float,double,long double} is a function like any other, so + // the instrumentation will have placed the shadow value on the shadow stack. + using ShadowFT = typename FTInfo::shadow_type; + ShadowFT Shadow; + __builtin_memcpy(&Shadow, __nsan_shadow_args_ptr, sizeof(ShadowFT)); + using ValuePrinter = FTPrinter; + using ShadowPrinter = FTPrinter::shadow_type>; + printf("value dec:%s hex:%s\n" + "shadow dec:%s hex:%s\n", + ValuePrinter::dec(Value).Buffer, ValuePrinter::hex(Value).Buffer, + ShadowPrinter::dec(Shadow).Buffer, ShadowPrinter::hex(Shadow).Buffer); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __nsan_dump_float(float Value) { + assert(__nsan_shadow_args_tag == (uptr)&__nsan_dump_float && + "__nsan_dump_float called from non-instrumented function"); + dumpFTFromShadowStack(Value); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __nsan_dump_double(double Value) { + assert(__nsan_shadow_args_tag == (uptr)&__nsan_dump_double && + "__nsan_dump_double called from non-instrumented function"); + dumpFTFromShadowStack(Value); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void +__nsan_dump_longdouble(long double Value) { + assert(__nsan_shadow_args_tag == (uptr)&__nsan_dump_longdouble && + "__nsan_dump_longdouble called from non-instrumented function"); + dumpFTFromShadowStack(Value); +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __nsan_dump_shadow_ret() { + printf("ret tag: %lx\n", __nsan_shadow_ret_tag); + double V; + __builtin_memcpy(&V, __nsan_shadow_ret_ptr, sizeof(double)); + printf("double Value: %f\n", V); + // FIXME: float128 value. +} + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __nsan_dump_shadow_args() { + printf("args tag: %lx\n", __nsan_shadow_args_tag); +} + +namespace __nsan { +bool NsanInitialized = false; +bool NsanInitIsRunning; +} // end namespace __nsan + +extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __nsan_init() { + CHECK(!NsanInitIsRunning); + if (NsanInitialized) + return; + NsanInitIsRunning = true; + + InitializeFlags(); + InitializeSuppressions(); + InitializePlatformEarly(); + + if (!MmapFixedNoReserve(TypesAddr(), UnusedAddr() - TypesAddr())) + Die(); + + initializeInterceptors(); + + initializeStats(); + if (flags().print_stats_on_exit) + Atexit(nsanAtexit); + + NsanInitIsRunning = false; + NsanInitialized = true; +} + +#if SANITIZER_CAN_USE_PREINIT_ARRAY +__attribute__((section(".preinit_array"), + used)) static void (*nsan_init_ptr)() = __nsan_init; +#endif diff --git a/compiler-rt/lib/nsan/nsan.syms.extra b/compiler-rt/lib/nsan/nsan.syms.extra new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/nsan/nsan.syms.extra @@ -0,0 +1,2 @@ +nsan_* +__nsan_* \ No newline at end of file diff --git a/compiler-rt/lib/nsan/nsan_flags.h b/compiler-rt/lib/nsan/nsan_flags.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/nsan/nsan_flags.h @@ -0,0 +1,35 @@ +//===-- nsan_flags.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of NumericalStabilitySanitizer. +//===----------------------------------------------------------------------===// + +#ifndef NSAN_FLAGS_H +#define NSAN_FLAGS_H + +namespace __nsan { + +struct Flags { +#define NSAN_FLAG(Type, Name, DefaultValue, Description) Type Name; +#include "nsan_flags.inc" +#undef NSAN_FLAG + + double cached_absolute_error_threshold = 0.0; + + void SetDefaults(); + void PopulateCache(); +}; + +extern Flags flags_data; +inline Flags &flags() { return flags_data; } + +void InitializeFlags(); + +} // namespace __nsan + +#endif diff --git a/compiler-rt/lib/nsan/nsan_flags.cc b/compiler-rt/lib/nsan/nsan_flags.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/nsan/nsan_flags.cc @@ -0,0 +1,78 @@ +//===-- nsan_flags.cc -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of NumericalStabilitySanitizer. +// +//===----------------------------------------------------------------------===// + +#include "nsan_flags.h" + +#include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_flags.h" + +namespace __nsan { + +SANITIZER_INTERFACE_WEAK_DEF(const char *, __nsan_default_options, void) { + return ""; +} + +using namespace __sanitizer; + +Flags flags_data; + +void Flags::SetDefaults() { +#define NSAN_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue; +#include "nsan_flags.inc" +#undef NSAN_FLAG +} + +void Flags::PopulateCache() { + cached_absolute_error_threshold = + 1.0 / (1ull << log2_absolute_error_threshold); +} + +static void RegisterNSanFlags(FlagParser *parser, Flags *f) { +#define NSAN_FLAG(Type, Name, DefaultValue, Description) \ + RegisterFlag(parser, #Name, Description, &f->Name); +#include "nsan_flags.inc" +#undef NSAN_FLAG +} + +static const char *MaybeCallNsanDefaultOptions() { + return (&__nsan_default_options) ? __nsan_default_options() : ""; +} + +void InitializeFlags() { + SetCommonFlagsDefaults(); + { + CommonFlags cf; + cf.CopyFrom(*common_flags()); + cf.external_symbolizer_path = GetEnv("NSAN_SYMBOLIZER_PATH"); + OverrideCommonFlags(cf); + } + + flags().SetDefaults(); + + FlagParser parser; + RegisterCommonFlags(&parser); + RegisterNSanFlags(&parser, &flags()); + + const char *nsan_default_options = MaybeCallNsanDefaultOptions(); + parser.ParseString(nsan_default_options); + + parser.ParseString(GetEnv("NSAN_OPTIONS")); + InitializeCommonFlags(); + if (Verbosity()) + ReportUnrecognizedFlags(); + if (common_flags()->help) + parser.PrintFlagDescriptions(); + + flags().PopulateCache(); +} + +} // namespace __nsan diff --git a/compiler-rt/lib/nsan/nsan_flags.inc b/compiler-rt/lib/nsan/nsan_flags.inc new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/nsan/nsan_flags.inc @@ -0,0 +1,49 @@ +//===-- nsan_flags.inc ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// NSan runtime flags. +// +//===----------------------------------------------------------------------===// +#ifndef NSAN_FLAG +# error "Define NSAN_FLAG prior to including this file!" +#endif + +// NSAN_FLAG(Type, Name, DefaultValue, Description) +// See COMMON_FLAG in sanitizer_flags.inc for more details. + +NSAN_FLAG(bool, halt_on_error, true, "If true, halt after the first error.") +NSAN_FLAG(bool, resume_after_warning, true, + "If true, we resume resume the computation from the original " + "application floating-point value after a warning. If false, " + "computations continue with the shadow value.") +NSAN_FLAG(const char *, suppressions, "", "Suppressions file name.") +NSAN_FLAG(bool, resume_after_suppression, true, + "If true, a suppression will also resume the computation from the FT" + " domain. If false, output is suppressed but the shadow value is" + " retained.") +// FIXME: should this be specified in units of epsilon instead? +NSAN_FLAG(int, log2_max_relative_error, 19, + "Log2 maximum admissible relative error, e.g. 19 means max relative " + "error of 1/2^19 ~= 0.000002.") +NSAN_FLAG(int, log2_absolute_error_threshold, 32, + "Log2 maximum admissible absolute error. Any numbers closer than " + "1/2^n are considered to be the same.") +NSAN_FLAG(bool, disable_warnings, false, + "If true, disable warning printing. This is useful to only compute " + "stats.") +NSAN_FLAG(bool, enable_check_stats, false, + "If true, compute check stats, i.e. for each line, the number of " + "times a check was performed on this line.") +NSAN_FLAG(bool, enable_warning_stats, false, + "If true, compute warning stats, i.e. for each line, the number of " + "times a warning was emitted for this line.") +NSAN_FLAG(bool, enable_loadtracking_stats, false, + "If true, compute load tracking stats, i.e. for each load from " + "memory, the number of times nsan resumed from the original value " + "due to invalid or unknown types.") +NSAN_FLAG(bool, print_stats_on_exit, false, "If true, print stats on exit.") diff --git a/compiler-rt/lib/nsan/nsan_interceptors.cc b/compiler-rt/lib/nsan/nsan_interceptors.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/nsan/nsan_interceptors.cc @@ -0,0 +1,367 @@ +//===-- nsan_interceptors.cc ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Interceptors for standard library functions. +// +// A note about `printf`: Make sure none of the interceptor code calls any +// part of the nsan framework that can call `printf`, since this could create +// a loop (`printf` itself uses the libc). printf-free functions are documented +// as such in nsan.h. +// +//===----------------------------------------------------------------------===// + +#include "interception/interception.h" +#include "nsan/nsan.h" +#include "sanitizer_common/sanitizer_common.h" + +#include + +#if SANITIZER_LINUX +extern "C" int mallopt(int param, int value); +#endif + +using namespace __sanitizer; +using __nsan::NsanInitialized; +using __nsan::NsanInitIsRunning; + +static constexpr uptr kEarlyAllocBufSize = 16384; +static uptr AllocatedBytes; +static char EarlyAllocBuf[kEarlyAllocBufSize]; + +static bool isInEarlyAllocBuf(const void *Ptr) { + return ((uptr)Ptr >= (uptr)EarlyAllocBuf && + ((uptr)Ptr - (uptr)EarlyAllocBuf) < sizeof(EarlyAllocBuf)); +} + +static char *toCharPtr(wchar_t *ptr) { return reinterpret_cast(ptr); } +static const char *toCharPtr(const wchar_t *ptr) { + return reinterpret_cast(ptr); +} + +template +T min(T a, T b) { + return a < b ? a : b; +} + +// Handle allocation requests early (before all interceptors are setup). dlsym, +// for example, calls calloc. +static void *handleEarlyAlloc(uptr Size) { + void *const Mem = (void *)&EarlyAllocBuf[AllocatedBytes]; + AllocatedBytes += Size; + CHECK_LT(AllocatedBytes, kEarlyAllocBufSize); + return Mem; +} + +INTERCEPTOR(void *, memset, void *Dst, int V, uptr Size) { + // NOTE: This guard is needed because nsan's initialization code might call + // memset. + if (!NsanInitialized && REAL(memset) == nullptr) + return internal_memset(Dst, V, Size); + + void *const Res = REAL(memset)(Dst, V, Size); + __nsan_set_value_unknown(static_cast(Dst), Size); + return Res; +} + +INTERCEPTOR(wchar_t *, wmemset, wchar_t *Dst, wchar_t V, uptr Size) { + wchar_t *const Res = REAL(wmemset)(Dst, V, Size); + __nsan_set_value_unknown(toCharPtr(Dst), sizeof(wchar_t) * Size); + return Res; +} + +INTERCEPTOR(void *, memmove, void *Dst, const void *Src, uptr Size) { + // NOTE: This guard is needed because nsan's initialization code might call + // memmove. + if (!NsanInitialized && REAL(memmove) == nullptr) + return internal_memmove(Dst, Src, Size); + + void *const Res = REAL(memmove)(Dst, Src, Size); + __nsan_copy_values(static_cast(Dst), static_cast(Src), + Size); + return Res; +} + +INTERCEPTOR(wchar_t *, wmemmove, wchar_t *Dst, const wchar_t *Src, uptr Size) { + wchar_t *const Res = REAL(wmemmove)(Dst, Src, Size); + __nsan_copy_values(toCharPtr(Dst), toCharPtr(Src), sizeof(wchar_t) * Size); + return Res; +} + +INTERCEPTOR(void *, memcpy, void *Dst, const void *Src, uptr Size) { + // NOTE: This guard is needed because nsan's initialization code might call + // memcpy. + if (!NsanInitialized && REAL(memcpy) == nullptr) { + // memmove is used here because on some platforms this will also + // intercept the memmove implementation. + return internal_memmove(Dst, Src, Size); + } + + void *const Res = REAL(memcpy)(Dst, Src, Size); + __nsan_copy_values(static_cast(Dst), static_cast(Src), + Size); + return Res; +} + +INTERCEPTOR(wchar_t *, wmemcpy, wchar_t *Dst, const wchar_t *Src, uptr Size) { + wchar_t *const Res = REAL(wmemcpy)(Dst, Src, Size); + __nsan_copy_values(toCharPtr(Dst), toCharPtr(Src), sizeof(wchar_t) * Size); + return Res; +} + +INTERCEPTOR(void *, malloc, uptr Size) { + // NOTE: This guard is needed because nsan's initialization code might call + // malloc. + if (NsanInitIsRunning && REAL(malloc) == nullptr) + return handleEarlyAlloc(Size); + + void *const Res = REAL(malloc)(Size); + if (Res) + __nsan_set_value_unknown(static_cast(Res), Size); + return Res; +} + +INTERCEPTOR(void *, realloc, void *Ptr, uptr Size) { + void *const Res = REAL(realloc)(Ptr, Size); + // FIXME: We might want to copy the types from the original allocation + // (although that would require that we know its size). + if (Res) + __nsan_set_value_unknown(static_cast(Res), Size); + return Res; +} + +INTERCEPTOR(void *, calloc, uptr Nmemb, uptr Size) { + // NOTE: This guard is needed because nsan's initialization code might call + // calloc. + if (NsanInitIsRunning && REAL(calloc) == nullptr) { + // Note: EarlyAllocBuf is initialized with zeros. + return handleEarlyAlloc(Nmemb * Size); + } + + void *const Res = REAL(calloc)(Nmemb, Size); + if (Res) + __nsan_set_value_unknown(static_cast(Res), Nmemb * Size); + return Res; +} + +INTERCEPTOR(void, free, void *P) { + // There are only a few early allocation requests, so we simply skip the free. + if (isInEarlyAllocBuf(P)) + return; + REAL(free)(P); +} + +INTERCEPTOR(void *, valloc, uptr Size) { + void *const Res = REAL(valloc)(Size); + if (Res) + __nsan_set_value_unknown(static_cast(Res), Size); + return Res; +} + +INTERCEPTOR(void *, memalign, uptr Alignment, uptr Size) { + void *const Res = REAL(memalign)(Alignment, Size); + if (Res) + __nsan_set_value_unknown(static_cast(Res), Size); + return Res; +} + +INTERCEPTOR(void *, __libc_memalign, uptr Alignment, uptr Size) { + void *const Res = REAL(__libc_memalign)(Alignment, Size); + if (Res) + __nsan_set_value_unknown(static_cast(Res), Size); + return Res; +} + +INTERCEPTOR(void *, pvalloc, uptr Size) { + void *const Res = REAL(pvalloc)(Size); + if (Res) + __nsan_set_value_unknown(static_cast(Res), Size); + return Res; +} + +INTERCEPTOR(void *, aligned_alloc, uptr Alignment, uptr Size) { + void *const Res = REAL(aligned_alloc)(Alignment, Size); + if (Res) + __nsan_set_value_unknown(static_cast(Res), Size); + return Res; +} + +INTERCEPTOR(int, posix_memalign, void **Memptr, uptr Alignment, uptr Size) { + int Res = REAL(posix_memalign)(Memptr, Alignment, Size); + if (Res == 0 && *Memptr) + __nsan_set_value_unknown(static_cast(*Memptr), Size); + return Res; +} + +INTERCEPTOR(char *, strfry, char *S) { + const auto Len = internal_strlen(S); + char *const Res = REAL(strfry)(S); + if (Res) + __nsan_set_value_unknown(S, Len); + return Res; +} + +INTERCEPTOR(char *, strsep, char **Stringp, const char *Delim) { + char *const OrigStringp = REAL(strsep)(Stringp, Delim); + if (Stringp != nullptr) { + // The previous character has been overwritten with a '\0' char. + __nsan_set_value_unknown(*Stringp - 1, 1); + } + return OrigStringp; +} + +INTERCEPTOR(char *, strtok, char *Str, const char *Delim) { + // This is overly conservative, but the probability that modern code is using + // strtok on double data is essentially zero anyway. + if (Str) + __nsan_set_value_unknown(Str, internal_strlen(Str)); + return REAL(strtok)(Str, Delim); +} + +static void nsanCopyZeroTerminated(const char *Dst, const char *Src, uptr N) { + __nsan_copy_values(Dst, Src, N); // Data. + __nsan_set_value_unknown(Dst + N, 1); // Terminator. +} + +static void nsanWCopyZeroTerminated(const wchar_t *Dst, const wchar_t *Src, + uptr N) { + __nsan_copy_values(toCharPtr(Dst), toCharPtr(Src), sizeof(wchar_t) * N); + __nsan_set_value_unknown(toCharPtr(Dst + N), sizeof(wchar_t)); +} + +INTERCEPTOR(char *, strdup, const char *S) { + char *const Res = REAL(strdup)(S); + if (Res) { + nsanCopyZeroTerminated(Res, S, internal_strlen(S)); + } + return Res; +} + +INTERCEPTOR(wchar_t *, wcsdup, const wchar_t *S) { + wchar_t *const Res = REAL(wcsdup)(S); + if (Res) { + nsanWCopyZeroTerminated(Res, S, wcslen(S)); + } + return Res; +} + +INTERCEPTOR(char *, strndup, const char *S, uptr Size) { + char *const Res = REAL(strndup)(S, Size); + if (Res) { + nsanCopyZeroTerminated(Res, S, min(internal_strlen(S), Size)); + } + return Res; +} + +INTERCEPTOR(char *, strcpy, char *Dst, const char *Src) { + char *const Res = REAL(strcpy)(Dst, Src); + nsanCopyZeroTerminated(Dst, Src, internal_strlen(Src)); + return Res; +} + +INTERCEPTOR(wchar_t *, wcscpy, wchar_t *Dst, const wchar_t *Src) { + wchar_t *const Res = REAL(wcscpy)(Dst, Src); + nsanWCopyZeroTerminated(Dst, Src, wcslen(Src)); + return Res; +} + +INTERCEPTOR(char *, strncpy, char *Dst, const char *Src, uptr Size) { + char *const Res = REAL(strncpy)(Dst, Src, Size); + nsanCopyZeroTerminated(Dst, Src, min(Size, internal_strlen(Src))); + return Res; +} + +INTERCEPTOR(char *, strcat, char *Dst, const char *Src) { + const auto DstLenBeforeCat = internal_strlen(Dst); + char *const Res = REAL(strcat)(Dst, Src); + nsanCopyZeroTerminated(Dst + DstLenBeforeCat, Src, internal_strlen(Src)); + return Res; +} + +INTERCEPTOR(wchar_t *, wcscat, wchar_t *Dst, const wchar_t *Src) { + const auto DstLenBeforeCat = wcslen(Dst); + wchar_t *const Res = REAL(wcscat)(Dst, Src); + nsanWCopyZeroTerminated(Dst + DstLenBeforeCat, Src, wcslen(Src)); + return Res; +} + +INTERCEPTOR(char *, strncat, char *Dst, const char *Src, uptr Size) { + const auto DstLen = internal_strlen(Dst); + char *const Res = REAL(strncat)(Dst, Src, Size); + nsanCopyZeroTerminated(Dst + DstLen, Src, + min(Size, internal_strlen(Src))); + return Res; +} + +INTERCEPTOR(char *, stpcpy, char *Dst, const char *Src) { + char *const Res = REAL(stpcpy)(Dst, Src); + nsanCopyZeroTerminated(Dst, Src, internal_strlen(Src)); + return Res; +} + +INTERCEPTOR(wchar_t *, wcpcpy, wchar_t *Dst, const wchar_t *Src) { + wchar_t *const Res = REAL(wcpcpy)(Dst, Src); + nsanWCopyZeroTerminated(Dst, Src, wcslen(Src)); + return Res; +} + +INTERCEPTOR(uptr, strxfrm, char *Dst, const char *Src, uptr Size) { + // This is overly conservative, but this function should very rarely be used. + __nsan_set_value_unknown(Dst, internal_strlen(Dst)); + const uptr Res = REAL(strxfrm)(Dst, Src, Size); + return Res; +} + +namespace __nsan { +void initializeInterceptors() { + static bool Initialized = false; + CHECK(!Initialized); + + // Instruct libc malloc to consume less memory. +#if SANITIZER_LINUX + mallopt(1, 0); // M_MXFAST + mallopt(-3, 32 * 1024); // M_MMAP_THRESHOLD +#endif + + INTERCEPT_FUNCTION(malloc); + INTERCEPT_FUNCTION(calloc); + INTERCEPT_FUNCTION(free); + INTERCEPT_FUNCTION(realloc); + INTERCEPT_FUNCTION(valloc); + INTERCEPT_FUNCTION(memalign); + INTERCEPT_FUNCTION(__libc_memalign); + INTERCEPT_FUNCTION(pvalloc); + INTERCEPT_FUNCTION(aligned_alloc); + INTERCEPT_FUNCTION(posix_memalign); + + INTERCEPT_FUNCTION(memset); + INTERCEPT_FUNCTION(wmemset); + INTERCEPT_FUNCTION(memmove); + INTERCEPT_FUNCTION(wmemmove); + INTERCEPT_FUNCTION(memcpy); + INTERCEPT_FUNCTION(wmemcpy); + + INTERCEPT_FUNCTION(strdup); + INTERCEPT_FUNCTION(wcsdup); + INTERCEPT_FUNCTION(strndup); + INTERCEPT_FUNCTION(stpcpy); + INTERCEPT_FUNCTION(wcpcpy); + INTERCEPT_FUNCTION(strcpy); + INTERCEPT_FUNCTION(wcscpy); + INTERCEPT_FUNCTION(strncpy); + INTERCEPT_FUNCTION(strcat); + INTERCEPT_FUNCTION(wcscat); + INTERCEPT_FUNCTION(strncat); + INTERCEPT_FUNCTION(strxfrm); + + INTERCEPT_FUNCTION(strfry); + INTERCEPT_FUNCTION(strsep); + INTERCEPT_FUNCTION(strtok); + + Initialized = 1; +} +} // end namespace __nsan diff --git a/compiler-rt/lib/nsan/nsan_platform.h b/compiler-rt/lib/nsan/nsan_platform.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/nsan/nsan_platform.h @@ -0,0 +1,144 @@ +//===------------------------ nsan_platform.h -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Platform specific information for NSan. +// +//===----------------------------------------------------------------------===// + +#ifndef NSAN_PLATFORM_H +#define NSAN_PLATFORM_H + +namespace __nsan { + +// NSan uses two regions of memory to store information: +// - 'shadow memory' stores the shadow copies of numerical values stored in +// application memory. +// - 'shadow types' is used to determine which value type each byte of memory +// belongs to. This makes sure that we always know whether a shadow value is +// valid. Shadow values may be tampered with using access through other +// pointer types (type punning). Each byte stores: +// - bit 1-0: whether the corresponding value is of unknown (00), +// float (01), double (10), or long double (11) type. +// - bit 5-2: the index of this byte in the value, or 0000 if type is +// unknown. +// This allows handling unaligned loat load/stores by checking that a load +// with a given alignment corresponds to the alignment of the store. +// Any store of a non-floating point type invalidates the corresponding +// bytes, so that subsequent overlapping loads (aligned or not) know that +// the corresponding shadow value is no longer valid. + +// On Linux/x86_64, memory is laid out as follows: +// +// +--------------------+ 0x800000000000 (top of memory) +// | application memory | +// +--------------------+ 0x700000008000 (kAppAddr) +// | | +// | unused | +// | | +// +--------------------+ 0x400000000000 (kUnusedAddr) +// | shadow memory | +// +--------------------+ 0x200000000000 (kShadowAddr) +// | shadow types | +// +--------------------+ 0x100000000000 (kTypesAddr) +// | reserved by kernel | +// +--------------------+ 0x000000000000 +// +// +// To derive a shadow memory address from an application memory address, +// bits 44-46 are cleared to bring the address into the range +// [0x000000000000,0x100000000000). We scale to account for the fact that a +// shadow value takes twice as much space as the original value. +// Then we add kShadowAddr to put the shadow relative offset into the shadow +// memory. See getShadowAddrFor(). +// The process is similar for the shadow types. + +// The ratio of app to shadow memory. +enum { + kShadowScale = 2 +}; + +// The original value type of a byte in app memory. Uses LLVM terminology: +// https://llvm.org/docs/LangRef.html#floating-point-types +// FIXME: support half and bfloat. +enum ValueType { + kUnknownValueType = 0, + kFloatValueType = 1, // LLVM float, shadow type double. + kDoubleValueType = 2, // LLVM double, shadow type fp128. + kFp80ValueType = 3, // LLVM x86_fp80, shadow type fp128. +}; + +// The size of ValueType encoding, in bits. +enum { + kValueSizeSizeBits = 2, +}; + +#if defined(__x86_64__) +struct Mapping { + // FIXME: kAppAddr == 0x700000000000 ? + static const uptr kAppAddr = 0x700000008000; + static const uptr kUnusedAddr = 0x400000000000; + static const uptr kShadowAddr = 0x200000000000; + static const uptr kTypesAddr = 0x100000000000; + static const uptr kShadowMask = ~0x700000000000; +}; +#else +# error "NSan not supported for this platform!" +#endif + +enum MappingType { + MAPPING_APP_ADDR, + MAPPING_UNUSED_ADDR, + MAPPING_SHADOW_ADDR, + MAPPING_TYPES_ADDR, + MAPPING_SHADOW_MASK +}; + +template +uptr MappingImpl() { + switch (Type) { + case MAPPING_APP_ADDR: return Mapping::kAppAddr; + case MAPPING_UNUSED_ADDR: return Mapping::kUnusedAddr; + case MAPPING_SHADOW_ADDR: return Mapping::kShadowAddr; + case MAPPING_TYPES_ADDR: return Mapping::kTypesAddr; + case MAPPING_SHADOW_MASK: return Mapping::kShadowMask; + } +} + +template +uptr MappingArchImpl() { + return MappingImpl(); +} + +ALWAYS_INLINE +uptr AppAddr() { + return MappingArchImpl(); +} + +ALWAYS_INLINE +uptr UnusedAddr() { + return MappingArchImpl(); +} + +ALWAYS_INLINE +uptr ShadowAddr() { + return MappingArchImpl(); +} + +ALWAYS_INLINE +uptr TypesAddr() { + return MappingArchImpl(); +} + +ALWAYS_INLINE +uptr ShadowMask() { + return MappingArchImpl(); +} + +} // end namespace __nsan + +#endif diff --git a/compiler-rt/lib/nsan/nsan_stats.h b/compiler-rt/lib/nsan/nsan_stats.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/nsan/nsan_stats.h @@ -0,0 +1,92 @@ +//===-- nsan_stats.h --------------------------------------------*- C++- *-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of NumericalStabilitySanitizer. +// +// NSan statistics. This class counts the number of checks per code location, +// and is used to output statistics (typically when using +// `disable_warnings=1,enable_check_stats=1,enable_warning_stats=1`). +//===----------------------------------------------------------------------===// + +#ifndef NSAN_STATS_H +#define NSAN_STATS_H + +#include "sanitizer_common/sanitizer_addrhashmap.h" +#include "sanitizer_common/sanitizer_internal_defs.h" +#include "sanitizer_common/sanitizer_mutex.h" + +namespace __nsan { + +enum class CheckTypeT { + kUnknown = 0, + kRet, + kArg, + kLoad, + kStore, + kInsert, + kUser, // User initiated. + kFcmp, + kMaxCheckType, +}; + +class Stats { +public: + Stats(); + ~Stats(); + + // Signal that we checked the instruction at the given address. + void addCheck(CheckTypeT CheckType, __sanitizer::uptr PC, + __sanitizer::uptr BP, double RelErr); + // Signal that we warned for the instruction at the given address. + void addWarning(CheckTypeT CheckType, __sanitizer::uptr PC, + __sanitizer::uptr BP, double RelErr); + + // Signal that we detected a floating-point load where the shadow type was + // invalid. + void addInvalidLoadTrackingEvent(__sanitizer::uptr PC, __sanitizer::uptr BP); + // Signal that we detected a floating-point load where the shadow type was + // unknown but the value was nonzero. + void addUnknownLoadTrackingEvent(__sanitizer::uptr PC, __sanitizer::uptr BP); + + void print() const; + +private: + using IndexMap = __sanitizer::AddrHashMap<__sanitizer::uptr, 11>; + + struct CheckAndWarningsValue { + CheckTypeT CheckTy; + __sanitizer::u32 StackId = 0; + __sanitizer::u64 NumChecks = 0; + __sanitizer::u64 NumWarnings = 0; + // This is a bitcasted double. Doubles have the nice idea to be ordered as + // ints. + double MaxRelativeError = 0; + }; + // Maps key(CheckType, StackId) to indices in CheckAndWarnings. + IndexMap CheckAndWarningsMap; + __sanitizer::InternalMmapVectorNoCtor CheckAndWarnings; + mutable __sanitizer::BlockingMutex CheckAndWarningsMutex; + + struct LoadTrackingValue { + CheckTypeT CheckTy; + __sanitizer::u32 StackId = 0; + __sanitizer::u64 NumInvalid = 0; + __sanitizer::u64 NumUnknown = 0; + }; + // Maps key(CheckTypeT::kLoad, StackId) to indices in TrackedLoads. + IndexMap LoadTrackingMap; + __sanitizer::InternalMmapVectorNoCtor TrackedLoads; + mutable __sanitizer::BlockingMutex TrackedLoadsMutex; +}; + +extern Stats* nsan_stats; +void initializeStats(); + +} // namespace __nsan + +#endif // NSAN_STATS_H diff --git a/compiler-rt/lib/nsan/nsan_stats.cc b/compiler-rt/lib/nsan/nsan_stats.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/nsan/nsan_stats.cc @@ -0,0 +1,161 @@ +//===-- nsan_stats.cc -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of NumericalStabilitySanitizer. +// +// NumericalStabilitySanitizer statistics. +//===----------------------------------------------------------------------===// + +#include "nsan/nsan_stats.h" + +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_stackdepot.h" +#include "sanitizer_common/sanitizer_stacktrace.h" +#include "sanitizer_common/sanitizer_placement_new.h" +#include "sanitizer_common/sanitizer_symbolizer.h" + +#include +#include + +namespace __nsan { + +using namespace __sanitizer; + +Stats::Stats() { + CheckAndWarnings.Initialize(0); + TrackedLoads.Initialize(0); +} + +Stats::~Stats() { Printf("deleting nsan stats\n"); } + +static uptr key(CheckTypeT CheckType, u32 StackId) { + return static_cast(CheckType) + + StackId * static_cast(CheckTypeT::kMaxCheckType); +} + +template +void UpdateEntry(CheckTypeT CheckTy, uptr PC, uptr BP, + MapT* Map, VectorT* Vector, + BlockingMutex* Mutex, Fn F) { + BufferedStackTrace Stack; + Stack.Unwind(PC, BP, nullptr, false); + u32 StackId = StackDepotPut(Stack); + typename MapT::Handle Handle(Map, key(CheckTy, StackId)); + BlockingMutexLock Lock(Mutex); + if (Handle.created()) { + typename VectorT::value_type Entry; + Entry.StackId = StackId; + Entry.CheckTy = CheckTy; + F(Entry); + Vector->push_back(Entry); + } else { + auto& Entry = (*Vector)[*Handle]; + F(Entry); + } +} + +void Stats::addCheck(CheckTypeT CheckTy, uptr PC, uptr BP, double RelErr) { + UpdateEntry(CheckTy, PC, BP, &CheckAndWarningsMap, &CheckAndWarnings, &CheckAndWarningsMutex, [RelErr](CheckAndWarningsValue& Entry) { + ++Entry.NumChecks; + if (RelErr > Entry.MaxRelativeError) { + Entry.MaxRelativeError = RelErr; + } + }); +} + +void Stats::addWarning(CheckTypeT CheckTy, uptr PC, uptr BP, double RelErr) { + UpdateEntry(CheckTy, PC, BP, &CheckAndWarningsMap, &CheckAndWarnings, &CheckAndWarningsMutex, [RelErr](CheckAndWarningsValue& Entry) { + ++Entry.NumWarnings; + if (RelErr > Entry.MaxRelativeError) { + Entry.MaxRelativeError = RelErr; + } + }); +} + +void Stats::addInvalidLoadTrackingEvent(uptr PC, uptr BP) { + UpdateEntry(CheckTypeT::kLoad, PC, BP, &LoadTrackingMap, &TrackedLoads, &TrackedLoadsMutex, [](LoadTrackingValue& Entry) { + ++Entry.NumInvalid; + }); +} + +void Stats::addUnknownLoadTrackingEvent(uptr PC, uptr BP) { + UpdateEntry(CheckTypeT::kLoad, PC, BP, &LoadTrackingMap, &TrackedLoads, &TrackedLoadsMutex, [](LoadTrackingValue& Entry) { + ++Entry.NumUnknown; + }); +} + +static const char *CheckTypeDisplay(CheckTypeT CheckType) { + switch (CheckType) { + case CheckTypeT::kUnknown: + return "unknown"; + case CheckTypeT::kRet: + return "return"; + case CheckTypeT::kArg: + return "argument"; + case CheckTypeT::kLoad: + return "load"; + case CheckTypeT::kStore: + return "store"; + case CheckTypeT::kInsert: + return "vector insert"; + case CheckTypeT::kUser: + return "user-initiated"; + case CheckTypeT::kFcmp: + return "fcmp"; + case CheckTypeT::kMaxCheckType: + return "[max]"; + } + assert(false && "unknown CheckType case"); + return ""; +} + +void Stats::print() const { + { + BlockingMutexLock Lock(&CheckAndWarningsMutex); + for (const auto &Entry : CheckAndWarnings) { + Printf("warned %llu times out of %llu %s checks ", + Entry.NumWarnings, Entry.NumChecks, + CheckTypeDisplay(Entry.CheckTy)); + if (Entry.NumWarnings > 0) { + char RelErrBuf[64]; + snprintf(RelErrBuf, sizeof(RelErrBuf) - 1, "%f", + Entry.MaxRelativeError * 100.0); + Printf("(max relative error: %s%%) ", RelErrBuf); + } + Printf("at:\n"); + StackDepotGet(Entry.StackId).Print(); + } + } + + { + BlockingMutexLock Lock(&TrackedLoadsMutex); + u64 TotalInvalidLoadTracking = 0; + u64 TotalUnknownLoadTracking = 0; + for (const auto &Entry : TrackedLoads) { + TotalInvalidLoadTracking += Entry.NumInvalid; + TotalUnknownLoadTracking += Entry.NumUnknown; + Printf("invalid/unknown type for %llu/%llu loads at:\n", + Entry.NumInvalid, Entry.NumUnknown); + StackDepotGet(Entry.StackId).Print(); + } + Printf( + "There were %llu/%llu floating-point loads where the shadow type was " + "invalid/unknown.\n", + TotalInvalidLoadTracking, TotalUnknownLoadTracking); + } +} + + +ALIGNED(64) static char StatsPlaceholder[sizeof(Stats)]; +Stats* nsan_stats = nullptr; + +void initializeStats() { + nsan_stats = new (StatsPlaceholder)Stats(); +} + +} // namespace __nsan diff --git a/compiler-rt/lib/nsan/nsan_suppressions.h b/compiler-rt/lib/nsan/nsan_suppressions.h new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/nsan/nsan_suppressions.h @@ -0,0 +1,31 @@ +//===-- nsan_suppressions.h -------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines nsan suppression rules. +//===----------------------------------------------------------------------===// + +#ifndef NSAN_SUPPRESSIONS_H +#define NSAN_SUPPRESSIONS_H + +#include "sanitizer_common/sanitizer_suppressions.h" + +namespace __nsan { + +extern const char* const kSuppressionNone; +extern const char* const kSuppressionFcmp; +extern const char* const kSuppressionConsistency; + +void InitializeSuppressions(); + +__sanitizer::Suppression * +GetSuppressionForStack(const __sanitizer::StackTrace *Stack, + const char *SupprType); + +} // namespace __nsan + +#endif diff --git a/compiler-rt/lib/nsan/nsan_suppressions.cc b/compiler-rt/lib/nsan/nsan_suppressions.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/nsan/nsan_suppressions.cc @@ -0,0 +1,76 @@ +//===-- nsan_suppressions.cc ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "nsan_suppressions.h" + +#include "sanitizer_common/sanitizer_placement_new.h" +#include "sanitizer_common/sanitizer_stacktrace.h" +#include "sanitizer_common/sanitizer_symbolizer.h" + +#include "nsan_flags.h" + +// Can be overriden in frontend. +SANITIZER_WEAK_DEFAULT_IMPL +const char *__nsan_default_suppressions() { return 0; } + +namespace __nsan { + +const char* const kSuppressionNone = "none"; +const char* const kSuppressionFcmp = "fcmp"; +const char* const kSuppressionConsistency = "consistency"; + +using namespace __sanitizer; + +ALIGNED(64) static char SuppressionPlaceholder[sizeof(SuppressionContext)]; +static SuppressionContext *SuppressionCtx = nullptr; +static const char *kSuppressionTypes[] = {kSuppressionFcmp, + kSuppressionConsistency}; + +void InitializeSuppressions() { + CHECK_EQ(nullptr, SuppressionCtx); + SuppressionCtx = new (SuppressionPlaceholder) + SuppressionContext(kSuppressionTypes, ARRAY_SIZE(kSuppressionTypes)); + SuppressionCtx->ParseFromFile(flags().suppressions); + SuppressionCtx->Parse(__nsan_default_suppressions()); +} + +static Suppression *GetSuppressionForAddr(uptr Addr, const char *SupprType) { + Suppression *S = nullptr; + + // Suppress by module name. + SuppressionContext *Suppressions = SuppressionCtx; + if (const char *ModuleName = + Symbolizer::GetOrInit()->GetModuleNameForPc(Addr)) { + if (Suppressions->Match(ModuleName, SupprType, &S)) + return S; + } + + // Suppress by file or function name. + SymbolizedStack *Frames = Symbolizer::GetOrInit()->SymbolizePC(Addr); + for (SymbolizedStack *Cur = Frames; Cur; Cur = Cur->next) { + if (Suppressions->Match(Cur->info.function, SupprType, &S) || + Suppressions->Match(Cur->info.file, SupprType, &S)) { + break; + } + } + Frames->ClearAll(); + return S; +} + +Suppression *GetSuppressionForStack(const StackTrace *Stack, + const char *SupprType) { + for (uptr I = 0, E = Stack->size; I < E; I++) { + Suppression *S = GetSuppressionForAddr( + StackTrace::GetPreviousInstructionPc(Stack->trace[I]), SupprType); + if (S) + return S; + } + return nullptr; +} + +} // end namespace __nsan diff --git a/compiler-rt/lib/nsan/tests/CMakeLists.txt b/compiler-rt/lib/nsan/tests/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/nsan/tests/CMakeLists.txt @@ -0,0 +1,50 @@ +include(CompilerRTCompile) + +set(NSAN_UNITTEST_CFLAGS + ${COMPILER_RT_UNITTEST_CFLAGS} + ${COMPILER_RT_GTEST_CFLAGS} + -I${COMPILER_RT_SOURCE_DIR}/lib/ + -O2 + -g + -fno-omit-frame-pointer) + +file(GLOB NSAN_HEADERS ../*.h) +set(NSAN_UNITTESTS + NSanUnitTest.cpp) + +add_custom_target(NsanUnitTests) +set_target_properties(NsanUnitTests PROPERTIES FOLDER "Compiler-RT Tests") + +# set(NSAN_UNITTEST_LINK_FLAGS ${COMPILER_RT_UNITTEST_LINK_FLAGS} -ldl) +# list(APPEND NSAN_UNITTEST_LINK_FLAGS --driver-mode=g++) + +if(COMPILER_RT_DEFAULT_TARGET_ARCH IN_LIST NSAN_SUPPORTED_ARCH) + # NSan unit tests are only run on the host machine. + set(arch ${COMPILER_RT_DEFAULT_TARGET_ARCH}) + + set(NSAN_TEST_RUNTIME RTNsanTest.${arch}) + + set(NSAN_TEST_RUNTIME_OBJECTS + $ + $ + $ + $) + + add_library(${NSAN_TEST_RUNTIME} STATIC + ${NSAN_TEST_RUNTIME_OBJECTS}) + + set_target_properties(${NSAN_TEST_RUNTIME} PROPERTIES + ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + FOLDER "Compiler-RT Runtime tests") + + set(NsanTestObjects) + generate_compiler_rt_tests(NsanTestObjects + NsanUnitTests "Nsan-${arch}-Test" ${arch} + SOURCES ${NSAN_UNITTESTS} ${COMPILER_RT_GTEST_SOURCE} + RUNTIME ${NSAN_TEST_RUNTIME} + DEPS gtest ${NSAN_UNIT_TEST_HEADERS} + CFLAGS ${NSAN_UNITTEST_CFLAGS} + LINK_FLAGS ${NSAN_UNITTEST_LINK_FLAGS}) + set_target_properties(NsanUnitTests PROPERTIES + RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) +endif() diff --git a/compiler-rt/lib/nsan/tests/NSanUnitTest.cpp b/compiler-rt/lib/nsan/tests/NSanUnitTest.cpp new file mode 100644 --- /dev/null +++ b/compiler-rt/lib/nsan/tests/NSanUnitTest.cpp @@ -0,0 +1,67 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// Do not attempt to use LLVM ostream etc from gtest. +#define GTEST_NO_LLVM_SUPPORT 1 + +#include "nsan.h" +#include "gtest/gtest.h" + +#include + +namespace __nsan { + +template void TestFT() { + // Basic local tests anchored at 0.0. + ASSERT_EQ(getULPDiff(0.0, 0.0), 0); + ASSERT_EQ(getULPDiff(-0.0, 0.0), 0); + ASSERT_EQ(getULPDiff(next(-0.0, -1.0), 0.0), 1); + ASSERT_EQ(getULPDiff(next(0.0, 1.0), -0.0), 1); + ASSERT_EQ(getULPDiff(next(-0.0, -1.0), next(0.0, 1.0)), 2); + // Basic local tests anchored at 2.0. + ASSERT_EQ(getULPDiff(next(2.0, 1.0), 2.0), 1); + ASSERT_EQ(getULPDiff(next(2.0, 3.0), 2.0), 1); + ASSERT_EQ(getULPDiff(next(2.0, 1.0), next(2.0, 3.0)), 2); + + ASSERT_NE(getULPDiff(-0.01, 0.01), kMaxULPDiff); + + // Basic local tests anchored at a random number. + const FT X = 4863.5123; + const FT To = 2 * X; + FT Y = X; + ASSERT_EQ(getULPDiff(X, Y), 0); + ASSERT_EQ(getULPDiff(-X, -Y), 0); + Y = next(Y, To); + ASSERT_EQ(getULPDiff(X, Y), 1); + ASSERT_EQ(getULPDiff(-X, -Y), 1); + Y = next(Y, To); + ASSERT_EQ(getULPDiff(X, Y), 2); + ASSERT_EQ(getULPDiff(-X, -Y), 2); + Y = next(Y, To); + ASSERT_EQ(getULPDiff(X, Y), 3); + ASSERT_EQ(getULPDiff(-X, -Y), 3); + + // Values with larger differences. + static constexpr const __sanitizer::u64 MantissaSize = + __sanitizer::u64{1} << FTInfo::kMantissaBits; + ASSERT_EQ(getULPDiff(1.0, next(2.0, 1.0)), MantissaSize - 1); + ASSERT_EQ(getULPDiff(1.0, 2.0), MantissaSize); + ASSERT_EQ(getULPDiff(1.0, next(2.0, 3.0)), MantissaSize + 1); + ASSERT_EQ(getULPDiff(1.0, 3.0), (3 * MantissaSize) / 2); +} + +TEST(NSanTest, Float) { TestFT(); } + +TEST(NSanTest, Double) { + TestFT(nextafter)>(); +} + +TEST(NSanTest, Float128) { + // Very basic tests. FIXME: improve when we have nextafter<__float128>. + ASSERT_EQ(getULPDiff<__float128>(0.0, 0.0), 0); + ASSERT_EQ(getULPDiff<__float128>(-0.0, 0.0), 0); + ASSERT_NE(getULPDiff<__float128>(-0.01, 0.01), kMaxULPDiff); +} + +} // end namespace __nsan diff --git a/compiler-rt/test/nsan/CMakeLists.txt b/compiler-rt/test/nsan/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/CMakeLists.txt @@ -0,0 +1,32 @@ +set(NSAN_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(NSAN_LIT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) + +set(NSAN_TESTSUITES) + +set(NSAN_UNITTEST_DEPS) +set(NSAN_TEST_DEPS + ${SANITIZER_COMMON_LIT_TEST_DEPS} + nsan) + +configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py + ) + +foreach(arch ${NSAN_SUPPORTED_ARCH}) + set(NSAN_TEST_TARGET_ARCH ${arch}) + string(TOLOWER "-${arch}" NSAN_TEST_CONFIG_SUFFIX) + get_test_cc_for_arch(${arch} NSAN_TEST_TARGET_CC NSAN_TEST_TARGET_CFLAGS) + string(TOUPPER ${arch} ARCH_UPPER_CASE) + set(CONFIG_NAME ${ARCH_UPPER_CASE}${OS_NAME}Config) + + configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py) + list(APPEND NSAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) +endforeach() + +add_lit_testsuite(check-nsan "Running the NSan tests" + ${NSAN_TESTSUITES} + DEPENDS ${NSAN_TEST_DEPS}) +set_target_properties(check-nsan PROPERTIES FOLDER "Compiler-RT Misc") diff --git a/compiler-rt/test/nsan/alloca.cc b/compiler-rt/test/nsan/alloca.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/alloca.cc @@ -0,0 +1,22 @@ +// RUN: %clangxx_nsan -O2 -g %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +#include + +#include "helpers.h" + +extern "C" void __nsan_dump_shadow_mem(const char *addr, size_t size_bytes, size_t bytes_per_line, size_t reserved); + +int main() { + int size = 3 * sizeof(float); + // Make sure we allocate dynamically: https://godbolt.org/z/T3h998. + DoNotOptimize(size); + float* array = reinterpret_cast(__builtin_alloca(size)); + DoNotOptimize(array); + array[0] = 1.0; + array[1] = 2.0; + // The third float is uninitialized. + __nsan_dump_shadow_mem((const char*)array, 3 * sizeof(float), 16, 0); + // CHECK: {{.*}} f0 f1 f2 f3 f0 f1 f2 f3 __ __ __ __ (1.00000000000000000000) (2.00000000000000000000) + return 0; +} diff --git a/compiler-rt/test/nsan/cadna_ex1.cc b/compiler-rt/test/nsan/cadna_ex1.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/cadna_ex1.cc @@ -0,0 +1,20 @@ +// RUN: %clangxx_nsan -O0 -g %s -o %t && NSAN_OPTIONS=halt_on_error=0 %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// http://cadna.lip6.fr/Examples_Dir/ex1.php +// This checks that nsan can detect basic cancellations. + +#include + +// Adapted from Fortran: http://cadna.lip6.fr/Examples_Dir/source/ex1.f +__attribute__((noinline)) void Ex1(double x, double y) { + printf("P(%f,%f) = %f\n", x, y, 9.0*x*x*x*x - y*y*y*y + 2.0*y*y); + // CHECK: #0 {{.*}} in Ex1{{.*}}[[@LINE-1]] +} + +int main() { + Ex1(10864.0, 18817.0); + // CHECK: #1 {{.*}} in main{{.*}}[[@LINE-1]] + Ex1(1.0 / 3, 2.0 / 3); + return 0; +} diff --git a/compiler-rt/test/nsan/cadna_ex2.cc b/compiler-rt/test/nsan/cadna_ex2.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/cadna_ex2.cc @@ -0,0 +1,52 @@ +// RUN: %clangxx_nsan -O0 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 %run %t + +// http://cadna.lip6.fr/Examples_Dir/ex2.php +// This is an example where nsan fail to detect an issue. Doing the computations +// in quad instead of double precision does not help in detecting that the +// computation of the determinant is unstable: both double and quad precision +// find it to be positive. + +#include +#include + +extern "C" void __nsan_dump_double(double value); + +// Adapted from Fortran: http://cadna.lip6.fr/Examples_Dir/source/ex2.f +__attribute__((noinline)) void Solve(double a, double b, double c) { + if (a == 0) { + if (b == 0) { + if (c == 0) { + printf("Every complex value is solution.\n"); + } else { + printf("There is no solution.\n"); + } + } else { + double x1 = -c / b; + printf("'The equation is degenerated. There is one real solution: %f\n", + x1); + } + } else { + b = b / a; + c = c / a; + double d = b * b - 4.0 * c; + __nsan_dump_double(d); // Print the discriminant shadow value. + if (d == 0.0) { + double x1 = -b * 0.5; + printf("Discriminant is zero. The double solution is %f\n", x1); + } else if (d > 0) { + double x1 = (-b - sqrt(d)) * 0.5; + double x2 = (-b + sqrt(d)) * 0.5; + printf("There are two real solutions. x1 = %f x2 = %f\n", x1, x2); + } else { + double x1 = -b * 0.5; + double x2 = sqrt(-d) * 0.5; + printf("There are two complex solutions. z1 = %f %f z2 = %f %f\n", x1, x2, + x1, -x2); + } + } +} + +int main() { + Solve(0.3, - 2.1, 3.675); + return 0; +} diff --git a/compiler-rt/test/nsan/cadna_ex3.cc b/compiler-rt/test/nsan/cadna_ex3.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/cadna_ex3.cc @@ -0,0 +1,50 @@ +// RUN: %clangxx_nsan -O0 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 %run %t + +// http://cadna.lip6.fr/Examples_Dir/ex3.php +// The determinant of Hilbert's matrix (11x11) without pivoting strategy is +// computed. After triangularization, the determinant is the product of the +// diagonal elements. +// Although the algorithm suffers from loss of precision, it is stable, and +// nsan does not warn. + +#include + +// Adapted from Fortran: http://cadna.lip6.fr/Examples_Dir/source/ex3.f +int main() { + constexpr const int kN = 11; + double amat[kN][kN]; + for (int i = 0; i < kN; ++i) { + for (int j = 0; j < kN; ++j) { + // Hilbert's matrix is defined by: a(i,j) = 1/(i+j+1), + // where i and j are zero-based. + amat[i][j] = 1.0 / (i + j + 1); + printf("%.3f, ", amat[i][j]); + } + printf("\n"); + } + printf("\n"); + + double det = 1.0; + for (int i = 0; i < kN - 1; ++i) { + printf("Pivot number %2i = %f\n", i, amat[i][i]); + det = det * amat[i][i]; + const double aux = 1.0 / amat[i][i]; + for (int j = i + 1; j < kN; ++j) { + amat[i][j] = amat[i][j] * aux; + } + + for (int j = i + 1; j < kN; ++j) { + const double aux = amat[j][i]; + for (int k = i + 1; k < kN; ++k) { + amat[j][k] = amat[j][k] - aux * amat[i][k]; + } + } + } + + constexpr const int kLastElem = kN-1; + const double last_pivot = amat[kLastElem][kLastElem]; + printf("Pivot number %2i = %f\n", kLastElem, last_pivot); + det = det * last_pivot; + printf("Determinant = %.12g\n", det); + return 0; +} diff --git a/compiler-rt/test/nsan/cadna_ex4.cc b/compiler-rt/test/nsan/cadna_ex4.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/cadna_ex4.cc @@ -0,0 +1,37 @@ +// RUN: %clangxx_nsan -O0 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// http://cadna.lip6.fr/Examples_Dir/ex4.php +// This example was proposed by J.-M. Muller [1]. The 25 first iterations of the +// following recurrent sequence are computed: +// U(n+1) = 111 - 1130/U(n) + 3000/(U(n)*U(n-1)) +// with U(0) = 5.5 and U(1) = 61/11. +// The exact value for the limit is 6. +// [1] J.-M. Muller, "Arithmetique des ordinateurs", Ed. Masson, 1987. +// +// This checks that nsan correctly detects the instability. + + +#include + +// Adapted from Fortran: http://cadna.lip6.fr/Examples_Dir/source/ex4.f +__attribute__((noinline)) // Prevent constant folding. +void +Ex4(double u_n_minus_1, double u_n, const int end_iter) { + for (int i = 3; i < end_iter; ++i) { + const double u_n_plus_1 = + 111.0 - 1130.0 / u_n + 3000.0 / (u_n * u_n_minus_1); + u_n_minus_1 = u_n; + u_n = u_n_plus_1; + printf("U(%i) = %f\n", i, u_n); +// CHECK: #0{{.*}}in Ex4{{.*}}cadna_ex4.cc:[[@LINE-1]] + } +} + +int main() { + constexpr const double kU1 = 5.5; + constexpr const double kU2 = 61.0 / 11.0; + constexpr const double kEndIter = 25; + Ex4(kU1, kU2, kEndIter); + return 0; +} diff --git a/compiler-rt/test/nsan/cadna_ex5.cc b/compiler-rt/test/nsan/cadna_ex5.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/cadna_ex5.cc @@ -0,0 +1,95 @@ +// RUN: %clangxx_nsan -O0 -DFN=Unstable -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=UNSTABLE < %t.out + +// RUN: %clangxx_nsan -O2 -DFN=Unstable -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=UNSTABLE < %t.out + +// RUN: %clangxx_nsan -O0 -DFN=StableRel -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 %run %t + +// RUN: %clangxx_nsan -O2 -DFN=StableRel -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 %run %t + +// RUN: %clangxx_nsan -O0 -DFN=StableEq -mllvm -nsan-truncate-fcmp-eq=true -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 %run %t + +// RUN: %clangxx_nsan -O2 -DFN=StableEq -mllvm -nsan-truncate-fcmp-eq=true -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 %run %t + +// RUN: %clangxx_nsan -O0 -DFN=StableEq -mllvm -nsan-truncate-fcmp-eq=false -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=STABLEEQ-NOTRUNCATE < %t.out + +// RUN: %clangxx_nsan -O2 -DFN=StableEq -mllvm -nsan-truncate-fcmp-eq=false -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefix=STABLEEQ-NOTRUNCATE < %t.out + + +// http://cadna.lip6.fr/Examples_Dir/ex5.php +// This program computes a root of the polynomial +// f(x) = 1.47*x**3 + 1.19*x**2 - 1.83*x + 0.45 +// using Newton's method. +// The sequence is initialized by x = 0.5. +// The iterative algorithm `x(n+1) = x(n) - f(x(n))/f'(x(n))` is stopped by the +// criterion |x(n)-x(n-1)|<=1.0e-12. +// +// The first algorithm is inherently unstable, this checks that nsan detects the +// issue with the unstable code and does not trigger on the stabilized version. + +#include +#include + +constexpr const double kEpsilon = 1e-12; +constexpr const double kNMax = 100; + +// The unstable version. +// Adapted from Fortran: http://cadna.lip6.fr/Examples_Dir/source/ex5.f +__attribute__((noinline)) // Prevent constant folding. +void Unstable(double y) { + double x; + int i; + for (i = 1; i < kNMax; ++i) { + x = y; + y = x - (1.47 * x * x * x + 1.19 * x * x - 1.83 * x + 0.45) / + (4.41 * x * x + 2.38 * x - 1.83); + if (fabs(x - y) < kEpsilon) break; +// UNSTABLE: #0{{.*}}in Unstable{{.*}}cadna_ex5.cc:[[@LINE-1]] + } + + printf("x(%i) = %g\n", i - 1, x); + printf("x(%i) = %g\n", i, y); +} + +// The stabilized version, where the termination criterion is an equality +// comparison. The equality is considered unstable or not by nsan depending on +// the value of --nsan-truncate-fcmp-eq. +// Adapted from Fortran: http://cadna.lip6.fr/Examples_Dir/source/ex5_cad_opt.f +__attribute__((noinline)) // Prevent constant folding. +void StableEq(double y) { + double x; + int i; + for (i = 1; i < kNMax; ++i) { + x = y; + y = ((4.2*x + 3.5)*x + 1.5)/(6.3*x + 6.1); + if (x == y) break; +// STABLEEQ-NOTRUNCATE: #0{{.*}}in StableEq{{.*}}cadna_ex5.cc:[[@LINE-1]] + } + + printf("x(%i) = %g\n", i - 1, x); + printf("x(%i) = %g\n", i, y); +} + +// The stabilized version, where the termination criterion is a relative +// comparison. This is a more stable fix of `Unstable`. +__attribute__((noinline)) // Prevent constant folding. +void StableRel(double y) { + double x; + int i; + for (i = 1; i < kNMax; ++i) { + x = y; + y = ((4.2*x + 3.5)*x + 1.5)/(6.3*x + 6.1); + if (fabs(x - y) < kEpsilon) break; + } + + printf("x(%i) = %g\n", i - 1, x); + printf("x(%i) = %g\n", i, y); +} + +int main() { + FN(0.5); + return 0; +} diff --git a/compiler-rt/test/nsan/cadna_ex6.cc b/compiler-rt/test/nsan/cadna_ex6.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/cadna_ex6.cc @@ -0,0 +1,67 @@ +// RUN: %clangxx_nsan -O0 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t + +// RUN: %clangxx_nsan -O2 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t + +// http://cadna.lip6.fr/Examples_Dir/ex6.php +// The following linear system is solved with the Gaussian elimination method +// with partial pivoting. +// +// This test checks that nsan detects the instability. + +#include +#include + +// Adapted from Fortran: http://cadna.lip6.fr/Examples_Dir/source/ex6.f +int main() { + constexpr const int kDim = 4; + constexpr const int kDim1 = 5; + + float xsol[kDim] = {1.0, 1.0, 1.e-8, 1.0}; + float a[kDim][kDim1] = { + {21.0, 130.0, 0.0, 2.1, 153.1}, + {13.0, 80.0, 4.74e+8, 752.0, 849.74}, + {0.0, -0.4, 3.9816e+8, 4.2, 7.7816}, + {0.0, 0.0, 1.7, 9.0e-9, 2.6e-8}, + }; + + for (int i = 0; i < kDim - 1; ++i) { + float pmax = 0.0 ; + int ll; + for (int j = i; j < kDim; ++j) { + const float a_j_i = a[j][i]; + if (fabsf(a_j_i) > pmax) { + pmax = abs(a_j_i); + ll = j; + } + } + + if (ll != i) { + for (int j = i; j < kDim1; ++j) { + std::swap(a[i][j], a[ll][j]); + } + } + + const float a_i_i = a[i][i]; + for (int j = i + 1; j < kDim1; ++j) { + a[i][j] = a[i][j] / a_i_i; + } + + for (int k = i + 1; k < kDim; ++k) { + const float a_k_i = a[k][i]; + for (int j = i + 1; j < kDim1; ++j) { + a[k][j] = a[k][j] - a_k_i * a[i][j]; + } + } + } + + a[kDim - 1][kDim1 - 1] = a[kDim - 1][kDim1 - 1] / a[kDim - 1][kDim - 1]; + for (int i = kDim - 2; i >= 0; --i) { + for (int j = i + 1; j < kDim; ++j) { + a[i][kDim1 - 1] = a[i][kDim1 - 1] - a[i][j] * a[j][kDim1 - 1]; + } + } + for (int i = 0; i < kDim; ++i) { + printf("x_sol[%i] = %g (true value : %g)\n", i, a[i][kDim1 - 1], xsol[i]); + } + return 0; +} diff --git a/compiler-rt/test/nsan/cadna_ex7.cc b/compiler-rt/test/nsan/cadna_ex7.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/cadna_ex7.cc @@ -0,0 +1,110 @@ +// RUN: %clangxx_nsan -O0 -g %s -o %t && NSAN_OPTIONS=halt_on_error=0,log2_max_relative_error=0 %run %t >%t.out 2>&1 +// RUN: FileCheck --check-prefix=STOP %s < %t.out + +// RUN: %clangxx_nsan -O2 -g %s -o %t && NSAN_OPTIONS=halt_on_error=0,log2_max_relative_error=0 %run %t >%t.out 2>&1 +// RUN: FileCheck --check-prefix=STOP %s < %t.out + +// RUN: %clangxx_nsan -O0 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck --check-prefix=REL %s < %t.out + +// RUN: %clangxx_nsan -O2 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck --check-prefix=REL %s < %t.out + +// http://cadna.lip6.fr/Examples_Dir/ex7.php +// This program solves a linear system of order 20 by using Jacobi's method. +// The stopping criterion is +// || X(n+1) - X(n) || <= eps +// where ||X|| is the maximum norm and eps=0.0001. +// +// This tests that nsan catches two types of errors: +// - The first one is that the stopping criterion is not stable w.r.t. the +// precision (STOP). To show this we disable relative error +// checking and only let the fcmp checker detect the unstable branching. +// - The second one is that the computations are unstable anyway from the first +// iteration (REL). + +#include +#include + +// Adapted from Fortran: http://cadna.lip6.fr/Examples_Dir/source/ex7.f + +float random1() { + static int nrand = 23; + nrand = (nrand * 5363 + 143) % 1387; + return 2.0 * nrand / 1387.0 - 1.0; +} + +int main() { + constexpr const float kEpsilon = 1e-4; + constexpr const int kNDims = 20; + constexpr const int kNIters = 1000; + + float a[kNDims][kNDims]; + float b[kNDims]; + float x[kNDims]; + float y[kNDims]; + const float xsol[kNDims] = { + 1.7, -4746.89, 50.23, -245.32, 4778.29, -75.73, 3495.43, + 4.35, 452.98, -2.76, 8239.24, 3.46, 1000.0, -5.0, + 3642.4, 735.36, 1.7, -2349.17, -8247.52, 9843.57, + }; + + for (int i = 0; i < kNDims; ++i) { + for (int j = 0; j < kNDims; ++j) { + a[i][j] = random1(); + } + a[i][i] = a[i][i] + 4.9213648f; + } + + for (int i = 0; i < kNDims; ++i) { + float aux = 0.0f; + for (int j = 0; j < kNDims; ++j) { + aux = aux + a[i][j]*xsol[j]; + } + b[i] = aux; + y[i] = 10.0f; + } + + int iter = 0; + for (iter = 0; iter < kNIters; ++iter) { + float anorm = 0.0f; + for (int j = 0; j < kNDims; ++j) { + x[j] = y[j]; + } + for (int j = 0; j < kNDims; ++j) { + float aux = b[j]; + for (int k = 0; k < kNDims; ++k) { + if (k != j) { + aux = aux - a[j][k]*x[k]; + } + } +// REL: WARNING: NumericalStabilitySanitizer: inconsistent shadow +// Note: We are not checking the line because nsan detects the issue at the +// `y[j]=` store location in dbg mode, and at the `abs()` location in release +// because the store is optimized out. + y[j] = aux / a[j][j]; + +// STOP: WARNING: NumericalStabilitySanitizer: floating-point comparison results depend on precision +// STOP: #0{{.*}}in main{{.*}}cadna_ex7.cc:[[@LINE+1]] + if (fabsf(x[j]-y[j]) > anorm) { + anorm = fabsf(x[j]-y[j]); + } + } + printf("iter = %i\n", iter); +// STOP: WARNING: NumericalStabilitySanitizer: floating-point comparison results depend on precision +// STOP: #0{{.*}}in main{{.*}}cadna_ex7.cc:[[@LINE+1]] + if (anorm < kEpsilon) break; + } + + printf("niter = %i\n", iter); + for (int i = 0; i < kNDims; ++i) { + float aux = -b[i]; + for (int j = 0; j < kNDims; ++j) { + aux = aux + a[i][j]*y[j]; + } + printf("x_sol(%2i) = %15.7f (true value : %15.7f), residue(%2i) = %15.7f\n", + i, y[i], xsol[i], i, aux); + } + + return 0; +} diff --git a/compiler-rt/test/nsan/cancellation_fn_ptr.cc b/compiler-rt/test/nsan/cancellation_fn_ptr.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/cancellation_fn_ptr.cc @@ -0,0 +1,65 @@ +// RUN: %clangxx_nsan -O0 -g -DFN=Cube %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// RUN: %clangxx_nsan -O1 -g -DFN=Cube %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// RUN: %clangxx_nsan -O2 -g -DFN=Cube %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// RUN: %clangxx_nsan -O0 -g -DFN=Square %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// RUN: %clangxx_nsan -O2 -g -DFN=Square %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// RUN: %clangxx_nsan -O0 -g -DFN=Inverse %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// RUN: %clangxx_nsan -O2 -g -DFN=Inverse %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + + +// Computes the derivative of x -> fn(x) using a finite difference +// approximation: +// f'(a) = (f(a + da) - f(a)) / da +// https://en.wikipedia.org/wiki/Numerical_differentiation#Finite_differences +// Numerical differentiation is a is a well known case of numerical instability. +// It typically leads to cancellation errors and division issues as `da` +// approaches zero. +// This is similar to `cancellation_libm.cc`, but this variant uses a function +// pointer to a user-defined function instead of a libm function. + +#include +#include +#define xstr(s) str(s) +#define str(s) #s + +static float Square(float x) { + return x * x; +} + +static float Cube(float x) { + return x * x * x; +} + +static float Inverse(float x) { + return 1.0f / x; +} + +__attribute__((noinline)) // To check call stack reporting. +float ComputeDerivative(float(*fn)(float), float a, float da) { + return (fn(a + da) - fn(a)) / da; + // CHECK: WARNING: NumericalStabilitySanitizer: inconsistent shadow results while checking return + // CHECK: float {{ *}}precision (native): + // CHECK: double{{ *}}precision (shadow): + // CHECK: {{#0 .*in ComputeDerivative}} +} + +int main() { + for (int i = 7; i < 31; ++i) { + float step = 1.0f / (1ull << i); + printf("%s derivative: %.8f\n", xstr(FN), ComputeDerivative(&FN, 0.1f, step)); + } + return 0; +} diff --git a/compiler-rt/test/nsan/cancellation_libm.cc b/compiler-rt/test/nsan/cancellation_libm.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/cancellation_libm.cc @@ -0,0 +1,51 @@ +// RUN: %clangxx_nsan -O0 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// RUN: %clangxx_nsan -O1 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// RUN: %clangxx_nsan -O2 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + + +// NOTE: -fno-math-errno allows clang to emit an intrinsic. + +// RUN: %clangxx_nsan -O0 -g %s -o %t -fno-math-errno && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// RUN: %clangxx_nsan -O1 -g %s -o %t -fno-math-errno && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// RUN: %clangxx_nsan -O2 -g0 %s -o %t -fno-math-errno && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// Computes the derivative of x -> expf(x) using a finite difference +// aproximation: +// f'(a) = (f(a + da) - f(a)) / da +// https://en.wikipedia.org/wiki/Numerical_differentiation#Finite_differences +// Numerical differentiation is a is a well known case of numerical instability. +// It typically leads to cancellation errors and division issues as `da` +// approaches zero. + +#include +#include + +// Note that expf is not instrumented, so we cannot detect the numerical +// discrepancy if we do not recognize intrinsics. +__attribute__((noinline)) // To check call stack reporting. +float ComputeDerivative(float a, float da) { + return (expf(a + da) - expf(a)) / da; + // CHECK: WARNING: NumericalStabilitySanitizer: inconsistent shadow results while checking return + // CHECK: float {{ *}}precision (native): + // CHECK: double{{ *}}precision (shadow): + // CHECK: {{#0 .*in ComputeDerivative}} +} + +int main() { + for (int i = 1; i < 31; ++i) { + const float step = 1.0f / (1ull << i); + printf("derivative (step %f):\n", step); + printf(" %.8f\n", ComputeDerivative(0.1f, step)); + } + return 0; +} diff --git a/compiler-rt/test/nsan/cancellation_ok.cc b/compiler-rt/test/nsan/cancellation_ok.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/cancellation_ok.cc @@ -0,0 +1,53 @@ +// RUN: %clangxx_nsan -O0 -g -DIMPL=Naive -mllvm -nsan-instrument-fcmp=0 %s -o %t && NSAN_OPTIONS=halt_on_error=1 %run %t +// RUN: %clangxx_nsan -O2 -g -DIMPL=Naive -mllvm -nsan-instrument-fcmp=0 %s -o %t && NSAN_OPTIONS=halt_on_error=1 %run %t +// RUN: %clangxx_nsan -O0 -g -DIMPL=Better1 -mllvm -nsan-instrument-fcmp=0 %s -o %t && NSAN_OPTIONS=halt_on_error=1 %run %t +// RUN: %clangxx_nsan -O2 -g -DIMPL=Better1 -mllvm -nsan-instrument-fcmp=0 %s -o %t && NSAN_OPTIONS=halt_on_error=1 %run %t +// RUN: %clangxx_nsan -O0 -g -DIMPL=Better2 -mllvm -nsan-instrument-fcmp=0 %s -o %t && NSAN_OPTIONS=halt_on_error=1 %run %t +// RUN: %clangxx_nsan -O2 -g -DIMPL=Better2 -mllvm -nsan-instrument-fcmp=0 %s -o %t && NSAN_OPTIONS=halt_on_error=1 %run %t + +// This tests a few cancellations from the implementations of the function +// presented in: https://people.eecs.berkeley.edu/~wkahan/JAVAhurt.pdf, page 27. +// All three functions have varying degrees of cancellation, none of which +// lead to catastrophic errors. + + +#include +#include + +// This never loses more than 1/2 of the digits. +static double Naive(const double X) __attribute__((noinline)) { + double Y, Z; + Y = X - 1.0; + Z = exp(Y); + if (Z != 1.0) + Z = Y / (Z - 1.0); + return Z; +} + +static double Better1(const double X) __attribute__((noinline)) { + long double Y, Z; + Y = X - 1.0; + Z = exp(Y); + if (Z != 1.0) + Z = Y / (Z - 1.0); + return Z; +} + +// This is precise to a a few ulps. +static double Better2(const double X) __attribute__((noinline)) { + double Y, Z; + Y = X - 1.0; + Z = exp(Y); + if (Z != 1.0) + Z = log(Z) / (Z - 1.0); + return Z; +} + +int main() { + for (int i = 7; i < 31; ++i) { + const double x = 1.0 + 1.0 / (1ull << i); + printf("value at %.16f:\n", x); + printf(" %.16f\n", IMPL(x)); + } + return 0; +} diff --git a/compiler-rt/test/nsan/compare.cc b/compiler-rt/test/nsan/compare.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/compare.cc @@ -0,0 +1,28 @@ +// RUN: %clangxx_nsan -O2 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// This test checks that we warn when a floating-point comparison takes +// different values in the application and shadow domain. + +#include +#include + +// 0.6/0.2 is slightly below 3, so the comparison will fail after a certain +// threshold that depends on the precision of the computation. +__attribute__((noinline)) // To check call stack reporting. +bool DoCmp(double a, double b, double c, double threshold) { + return c - a / b < threshold; + // CHECK: WARNING: NumericalStabilitySanitizer: floating-point comparison results depend on precision + // CHECK: double {{ *}}precision dec (native): {{.*}}<{{.*}} + // CHECK: __float128{{ *}}precision dec (shadow): {{.*}}<{{.*}} + // CHECK: {{#0 .*in DoCmp}} +} + +int main() { + double threshold = 1.0; + for (int i = 0; i < 60; ++i) { + threshold /= 2; + printf("value at threshold %.20f: %i\n", threshold, DoCmp(0.6, 0.2, 3.0, threshold)); + } + return 0; +} diff --git a/compiler-rt/test/nsan/compute_pi.cc b/compiler-rt/test/nsan/compute_pi.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/compute_pi.cc @@ -0,0 +1,45 @@ +// RUN: %clangxx_nsan -O0 -mllvm -nsan-shadow-type-mapping=dqq -g -DRECURRENCE=Good %s -o %t && NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=10 %run %t + +// RUN: %clangxx_nsan -O1 -mllvm -nsan-shadow-type-mapping=dqq -g -DRECURRENCE=Good %s -o %t && NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=10 %run %t + +// RUN: %clangxx_nsan -O2 -mllvm -nsan-shadow-type-mapping=dqq -g0 -DRECURRENCE=Good %s -o %t && NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=10 %run %t + +// RUN: %clangxx_nsan -O0 -mllvm -nsan-shadow-type-mapping=dqq -g -DRECURRENCE=Bad %s -o %t && NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=10 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// RUN: %clangxx_nsan -O1 -mllvm -nsan-shadow-type-mapping=dqq -g -DRECURRENCE=Bad %s -o %t && NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=10 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// RUN: %clangxx_nsan -O2 -mllvm -nsan-shadow-type-mapping=dqq -g0 -DRECURRENCE=Bad %s -o %t && NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=10 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// This is the Archimedes algorithm for computing pi, starting from a hexagon +// and doubling the number of edges at every iteration. +// https://en.wikipedia.org/wiki/Floating-point_arithmetic#Minimizing_the_effect_of_accuracy_problems + +#include +#include + +__attribute__((noinline)) // To check call stack reporting. +double Bad(double ti) { + return (sqrt(ti * ti + 1) - 1) / ti; + // CHECK: WARNING: NumericalStabilitySanitizer: inconsistent shadow results + // CHECK: double {{ *}}precision (native): + // CHECK: __float128 {{ *}}precision (shadow): + // CHECK: {{#0 .*in Bad}} +} + +// This is a better equivalent that does not have the unstable cancellation. +__attribute__((noinline)) // For consistency. +double Good(double ti) { + return ti / (sqrt(ti * ti + 1) + 1); +} + +int main() { + double ti = 1/sqrt(3); // t0; + for (int i = 0; i < 60; ++i) { + printf("%2i pi= %.16f\n", i, 6.0 * (1ull << i) * ti); + ti = RECURRENCE(ti); + } + return 0; +} diff --git a/compiler-rt/test/nsan/helpers.h b/compiler-rt/test/nsan/helpers.h new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/helpers.h @@ -0,0 +1,17 @@ + +// Prevents the compiler from optimizing everything away. +template +void DoNotOptimize(const T& var) { + asm volatile("" : "+m"(const_cast(var))); +} + +// Writes a single double with inconsistent shadow to v. +void CreateInconsistency(double* data) { + double num = 0.6; + double denom = 0.2; + // Prevent the compiler from constant-folding this. + DoNotOptimize(num); + DoNotOptimize(denom); + // Both values are very close to 0.0, but shadow value is closer. + *data = 1.0 / (num/denom - 3.0); +} diff --git a/compiler-rt/test/nsan/infinity.cc b/compiler-rt/test/nsan/infinity.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/infinity.cc @@ -0,0 +1,25 @@ +// This test case verifies that we handle infinity correctly. + +// RUN: %clangxx_nsan -O2 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 %run %t >%t.out 2>&1 + + +#include +#include + +#include "helpers.h" + +__attribute__((noinline)) // To check call stack reporting. +void StoreInf(double* a) { + DoNotOptimize(a); + double inf = std::numeric_limits::infinity(); + DoNotOptimize(inf); + *a = inf; +} + +int main() { + double d; + StoreInf(&d); + DoNotOptimize(d); + printf("%.16f\n", d); + return 0; +} diff --git a/compiler-rt/test/nsan/intercept_libc_str.cc b/compiler-rt/test/nsan/intercept_libc_str.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/intercept_libc_str.cc @@ -0,0 +1,149 @@ +// RUN: %clangxx_nsan -O2 -g -DFN=StrFry %s -o %t && NSAN_OPTIONS=halt_on_error=0,resume_after_warning=false %run %t >%t.out 2>&1 +// RUN: FileCheck --check-prefix=STRFRY %s < %t.out + +// RUN: %clangxx_nsan -O2 -g -DFN=StrSep %s -o %t && NSAN_OPTIONS=halt_on_error=0,resume_after_warning=false %run %t >%t.out 2>&1 +// RUN: FileCheck --check-prefix=STRSEP %s < %t.out + +// RUN: %clangxx_nsan -O2 -g -DFN=StrTok %s -o %t && NSAN_OPTIONS=halt_on_error=0,resume_after_warning=false %run %t >%t.out 2>&1 +// RUN: FileCheck --check-prefix=STRTOK %s < %t.out + +// RUN: %clangxx_nsan -O2 -g -DFN=StrDup %s -o %t && NSAN_OPTIONS=halt_on_error=0,resume_after_warning=false %run %t >%t.out 2>&1 +// RUN: FileCheck --check-prefix=STRDUP %s < %t.out + +// RUN: %clangxx_nsan -O2 -g -DFN=StrNDup %s -o %t && NSAN_OPTIONS=halt_on_error=0,resume_after_warning=false %run %t >%t.out 2>&1 +// RUN: FileCheck --check-prefix=STRNDUP %s < %t.out + +// RUN: %clangxx_nsan -O2 -g -DFN=StpCpy %s -o %t && NSAN_OPTIONS=halt_on_error=0,resume_after_warning=false %run %t >%t.out 2>&1 +// RUN: FileCheck --check-prefix=STPCPY %s < %t.out + +// RUN: %clangxx_nsan -O2 -g -DFN=StrCpy %s -o %t && NSAN_OPTIONS=halt_on_error=0,resume_after_warning=false %run %t >%t.out 2>&1 +// RUN: FileCheck --check-prefix=STRCPY %s < %t.out + +// RUN: %clangxx_nsan -O2 -g -DFN=StrNCpy %s -o %t && NSAN_OPTIONS=halt_on_error=0,resume_after_warning=false %run %t >%t.out 2>&1 +// RUN: FileCheck --check-prefix=STRNCPY %s < %t.out + +// RUN: %clangxx_nsan -O2 -g -DFN=StrCat %s -o %t && NSAN_OPTIONS=halt_on_error=0,resume_after_warning=false %run %t >%t.out 2>&1 +// RUN: FileCheck --check-prefix=STRCAT %s < %t.out + +// RUN: %clangxx_nsan -O2 -g -DFN=StrNCat %s -o %t && NSAN_OPTIONS=halt_on_error=0,resume_after_warning=false %run %t >%t.out 2>&1 +// RUN: FileCheck --check-prefix=STRNCAT %s < %t.out + +// This test case checks libc string operations interception. + +#include +#include +#include + +#include "helpers.h" + +extern "C" void __nsan_dump_shadow_mem(const char *addr, size_t size_bytes, size_t bytes_per_line, size_t reserved); + +void StrFry(char* const s) { + strfry(s); + __nsan_dump_shadow_mem(s, sizeof(float), sizeof(float), 0); +// strfry just destroys the whole area. +// STRFRY: StrFry +// STRFRY-NEXT: f0 f1 f2 f3 +// STRFRY-NEXT: __ __ __ f3 +} + +void StrSep(char* const s) { + char* sc = s; + strsep(&sc, "\x40"); + __nsan_dump_shadow_mem(s, sizeof(float), sizeof(float), 0); +// strsep destroys the element that was replaced with a null character. +// STRSEP: StrSep +// STRSEP-NEXT: f0 f1 f2 f3 +// STRSEP-NEXT: f0 __ f2 f3 +} + +void StrTok(char* const s) { + strtok(s, "\x40"); + __nsan_dump_shadow_mem(s, sizeof(float), sizeof(float), 0); +// strtok just destroys the whole area except the terminator. +// STRTOK: StrTok +// STRTOK-NEXT: f0 f1 f2 f3 +// STRTOK-NEXT: __ __ __ f3 +} + +void StrDup(char* const s) { + char* const dup = strdup(s); + __nsan_dump_shadow_mem(dup, 4, 4, 0); + free(dup); +// STRDUP: StrDup +// STRDUP-NEXT: f0 f1 f2 f3 +// STRDUP-NEXT: f0 f1 f2 __ +} + + +void StrNDup(char* const s) { + char* const dup = strndup(s, 2); + __nsan_dump_shadow_mem(dup, 3, 3, 0); + free(dup); +// STRNDUP: StrNDup +// STRNDUP-NEXT: f0 f1 f2 f3 +// STRNDUP-NEXT: f0 f1 __ +} + +void StpCpy(char* const s) { + char buffer[] = "abcdef\0"; + stpcpy(buffer, s); + __nsan_dump_shadow_mem(buffer, sizeof(buffer), sizeof(buffer), 0); +// STPCPY: StpCpy +// STPCPY-NEXT: f0 f1 f2 f3 +// STPCPY-NEXT: f0 f1 f2 __ +} + +void StrCpy(char* const s) { + char buffer[] = "abcdef\0"; + strcpy(buffer, s); + __nsan_dump_shadow_mem(buffer, sizeof(buffer), sizeof(buffer), 0); +// STRCPY: StrCpy +// STRCPY-NEXT: f0 f1 f2 f3 +// STRCPY-NEXT: f0 f1 f2 __ +} + +void StrNCpy(char* const s) { + char buffer[] = "abcdef\0"; + strncpy(buffer, s, 2); + __nsan_dump_shadow_mem(buffer, sizeof(buffer), sizeof(buffer), 0); +// STRNCPY: StrNCpy +// STRNCPY-NEXT: f0 f1 f2 f3 +// STRNCPY-NEXT: f0 f1 __ +} + +void StrCat(char* const s) { + char buffer[] = "abcd\0 "; + strcat(buffer, s); + __nsan_dump_shadow_mem(buffer, sizeof(buffer), sizeof(buffer), 0); +// STRCAT: StrCat +// STRCAT-NEXT: f0 f1 f2 f3 +// STRCAT-NEXT: __ __ __ __ f0 f1 f2 __ +} + +void StrNCat(char* const s) { + char buffer[] = "abcd\0 "; + strncat(buffer, s, 2); + __nsan_dump_shadow_mem(buffer, sizeof(buffer), sizeof(buffer), 0); +// STRNCAT: StrNCat +// STRNCAT-NEXT: f0 f1 f2 f3 +// STRNCAT-NEXT: __ __ __ __ f0 f1 __ +} + +int main() { + // This has binary representation 0x00804020, which in memory (little-endian) + // is {0x20,0x40,0x80,0x00}. + float f = 1.17779472238e-38f; + DoNotOptimize(f); + char buffer[sizeof(float)]; + memcpy(buffer, &f, sizeof(float)); + printf("{0x%x, 0x%x, 0x%x, 0x%x}\n", + (unsigned char)buffer[0], (unsigned char)buffer[1], + (unsigned char)buffer[2], (unsigned char)buffer[3]); +#define str(s) #s +#define xstr(s) str(s) + puts(xstr(FN)); + __nsan_dump_shadow_mem(buffer, sizeof(float), sizeof(float), 0); + FN(buffer); + return 0; +} diff --git a/compiler-rt/test/nsan/intercept_libc_wstr.cc b/compiler-rt/test/nsan/intercept_libc_wstr.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/intercept_libc_wstr.cc @@ -0,0 +1,80 @@ +// RUN: %clangxx_nsan -O2 -g -DFN=WcsDup %s -o %t && NSAN_OPTIONS=halt_on_error=0,resume_after_warning=false %run %t >%t.out 2>&1 +// RUN: FileCheck --check-prefix=WCSDUP %s < %t.out + +// RUN: %clangxx_nsan -O2 -g -DFN=WcpCpy %s -o %t && NSAN_OPTIONS=halt_on_error=0,resume_after_warning=false %run %t >%t.out 2>&1 +// RUN: FileCheck --check-prefix=WCPCPY %s < %t.out + +// RUN: %clangxx_nsan -O2 -g -DFN=WcsCpy %s -o %t && NSAN_OPTIONS=halt_on_error=0,resume_after_warning=false %run %t >%t.out 2>&1 +// RUN: FileCheck --check-prefix=WCSCPY %s < %t.out + +// RUN: %clangxx_nsan -O2 -g -DFN=WcsCat %s -o %t && NSAN_OPTIONS=halt_on_error=0,resume_after_warning=false %run %t >%t.out 2>&1 +// RUN: FileCheck --check-prefix=WCSCAT %s < %t.out + +// This test case checks libc wide string operations interception. + +#include +#include +#include +#include + +#include "helpers.h" + +extern "C" void __nsan_dump_shadow_mem(const char *addr, size_t size_bytes, + size_t bytes_per_line, size_t reserved); + +void WcsDup(wchar_t *const s) { + wchar_t *const dup = wcsdup(s); + __nsan_dump_shadow_mem(reinterpret_cast(dup), 8, 8, 0); + free(dup); + // WCSDUP: WcsDup + // WCSDUP-NEXT: d0 d1 d2 d3 d4 d5 d6 d7 + // WCSDUP-NEXT: d0 d1 d2 d3 __ __ __ __ +} + +void WcpCpy(wchar_t *const s) { + wchar_t buffer[] = L"abc\0"; + wcpcpy(buffer, s); + __nsan_dump_shadow_mem(reinterpret_cast(buffer), sizeof(buffer), + sizeof(buffer), 0); + // WCPCPY: WcpCpy + // WCPCPY-NEXT: d0 d1 d2 d3 d4 d5 d6 d7 + // WCPCPY-NEXT: d0 d1 d2 d3 __ __ __ __ +} + +void WcsCpy(wchar_t *const s) { + wchar_t buffer[] = L"abc\0"; + wcscpy(buffer, s); + __nsan_dump_shadow_mem(reinterpret_cast(buffer), sizeof(buffer), + sizeof(buffer), 0); + // WCSCPY: WcsCpy + // WCSCPY-NEXT: d0 d1 d2 d3 d4 d5 d6 d7 + // WCSCPY-NEXT: d0 d1 d2 d3 __ __ __ __ +} + +void WcsCat(wchar_t *const s) { + wchar_t buffer[] = L"a\0 "; + wcscat(buffer, s); + __nsan_dump_shadow_mem(reinterpret_cast(buffer), sizeof(buffer), + sizeof(buffer), 0); + // WCSCAT: WcsCat + // WCSCAT-NEXT: d0 d1 d2 d3 d4 d5 d6 d7 + // WCSCAT-NEXT: __ __ __ __ d0 d1 d2 d3 __ __ __ __ +} + +int main() { + // This has binary representation 0x0000000080402010, which in memory + // (little-endian) is {0x10,0x20,0x40,0x80,0x00,0x00,0x00,0x00}. + double f = 1.0630742122880717462525516679E-314; + DoNotOptimize(f); + wchar_t buffer[sizeof(double) / sizeof(wchar_t)]; + memcpy(buffer, &f, sizeof(double)); + static_assert(sizeof(wchar_t) == 4, "not implemented"); + printf("{0x%x, 0x%x}\n", buffer[0], buffer[1]); +#define str(s) #s +#define xstr(s) str(s) + puts(xstr(FN)); + __nsan_dump_shadow_mem(reinterpret_cast(buffer), sizeof(double), + sizeof(double), 0); + FN(buffer); + return 0; +} diff --git a/compiler-rt/test/nsan/interface_dump_shadow_mem.cc b/compiler-rt/test/nsan/interface_dump_shadow_mem.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/interface_dump_shadow_mem.cc @@ -0,0 +1,62 @@ +// RUN: %clangxx_nsan -O2 -g %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// RUN: %clangxx_nsan -fno-builtin -O2 -g -mllvm -nsan-shadow-type-mapping=dqq %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out +// RUN: %clangxx_nsan -fno-builtin -O2 -g -mllvm -nsan-shadow-type-mapping=dlq %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// This test checks that the sanitizer interface function +// `__nsan_dump_shadow_mem` works correctly. + +#include +#include +#include + + +extern "C" void __nsan_dump_shadow_mem(const char *addr, size_t size_bytes, size_t bytes_per_line, size_t reserved); + +int main() { + char buffer[64]; + int pos = 0; + // One aligned float. + const float f = 42.0; + memcpy(&(buffer[pos]), &f, sizeof(f)); + pos += sizeof(f); + // One 4-byte aligned double. + const double d = 35.0; + memcpy(&(buffer[pos]), &d, sizeof(d)); + pos += sizeof(d); + // Three uninitialized bytes. + pos += 3; + // One char byte. + buffer[pos] = 'a'; + pos += 1; + // One long double. + const long double l = 0.0000000001; + memcpy(&(buffer[pos]), &l, sizeof(l)); + pos += sizeof(l); + // One more double, but erase bytes in the middle. + const double d2 = 53.0; + memcpy(&(buffer[pos]), &d2, sizeof(d2)); + pos += sizeof(d2); + uint32_t i = 5; + memcpy(&(buffer[pos - 5]), &i, sizeof(i)); + // And finally two consecutive floats. + const float f2 = 43.0; + memcpy(&(buffer[pos]), &f2, sizeof(f2)); + pos += sizeof(f2); + const float f3 = 44.0; + memcpy(&(buffer[pos]), &f3, sizeof(f3)); + + __nsan_dump_shadow_mem(buffer, sizeof(buffer), 8, 0); +// CHECK: 0x{{[a-f0-9]*}}: f0 f1 f2 f3 d0 d1 d2 d3 (42.00000000000000000000) +// CHECK-NEXT: 0x{{[a-f0-9]*}}: d4 d5 d6 d7 __ __ __ __ (35.00000000000000000000) +// CHECK-NEXT: 0x{{[a-f0-9]*}}: l0 l1 l2 l3 l4 l5 l6 l7 +// CHECK-NEXT: 0x{{[a-f0-9]*}}: l8 l9 la lb lc ld le lf (0.00000000010000000000) +// CHECK-NEXT: 0x{{[a-f0-9]*}}: d0 d1 d2 f0 f1 f2 f3 d7 +// CHECK-NEXT: 0x{{[a-f0-9]*}}: f0 f1 f2 f3 f0 f1 f2 f3 (43.00000000000000000000) (44.00000000000000000000) +// CHECK-NEXT: 0x{{[a-f0-9]*}}: __ __ __ __ __ __ __ __ +// CHECK-NEXT: 0x{{[a-f0-9]*}}: __ __ __ __ __ __ __ __ + return 0; +} diff --git a/compiler-rt/test/nsan/jmmuller.cc b/compiler-rt/test/nsan/jmmuller.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/jmmuller.cc @@ -0,0 +1,35 @@ +// RUN: %clangxx_nsan -O0 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t + +// RUN: %clangxx_nsan -O1 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t + +// RUN: %clangxx_nsan -O2 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t + +// This tests J-M Müller's Kahan Challenge: +// http://arith22.gforge.inria.fr/slides/06-gustafson.pdf +// +// The problem is to evaluate `H` at 15, 16, 17, and 9999. The correct +// answer is (1,1,1,1). +// Note that in this case, even though the shadow computation in quad mode is +// also wrong, the inconsistency check shows that there is an issue. + +#include +#include + +double E(double z) { + return z == 0.0 ? 1.0 : (exp(z) - 1.0) / z; +} + +double Q(double x) { + return fabs(x - sqrt(x * x + 1)) - 1 / (x + sqrt(x * x + 1)); +} + +__attribute__((noinline)) // Do not constant-fold. +double H(double x) { return E(Q(x * x)); } + +int main() { + constexpr const double kX[] = {15.0, 16.0, 17.0, 9999.0}; + printf("(H(%f), H(%f), H(%f), H(%f)) = (%.8f, %.8f, %.8f, %.8f)\n", + kX[0], kX[1], kX[2], kX[3], + H(kX[0]), H(kX[1]), H(kX[2]), H(kX[3])); + return 0; +} diff --git a/compiler-rt/test/nsan/lit.cfg.py b/compiler-rt/test/nsan/lit.cfg.py new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/lit.cfg.py @@ -0,0 +1,45 @@ +# -*- Python -*- + +import os + +# Setup config name. +config.name = 'NSan' + config.name_suffix + +# Setup source root. +config.test_source_root = os.path.dirname(__file__) + +# Test suffixes. +config.suffixes = ['.c', '.cc', '.test'] + +# C & CXX flags. +c_flags = ([config.target_cflags]) + +# Android doesn't want -lrt. +if not config.android: + c_flags += ["-lrt"] + +cxx_flags = (c_flags + config.cxx_mode_flags + ["-std=c++17"]) + +nsan_flags = ["-fsanitize=numerical", "-g", + "-mno-omit-leaf-frame-pointer", + "-fno-omit-frame-pointer"] + +def build_invocation(compile_flags): + return " " + " ".join([config.clang] + compile_flags) + " " + +# Add substitutions. +config.substitutions.append(("%clang ", build_invocation(c_flags))) +config.substitutions.append(("%clang_nsan ", build_invocation(c_flags + nsan_flags))) +config.substitutions.append(("%clangxx_nsan ", build_invocation(cxx_flags + nsan_flags))) + +# Platform-specific default NSAN for lit tests. +default_nsan_options = '' + +config.environment['NSAN_OPTIONS'] = default_nsan_options +default_nsan_options += ':' +config.substitutions.append(('%env_nsan_options=', + 'env NSAN_OPTIONS=' + default_nsan_options)) + +# NSan tests are currently supported on Linux only. +if config.host_os not in ['Linux']: + config.unsupported = True diff --git a/compiler-rt/test/nsan/lit.site.cfg.py.in b/compiler-rt/test/nsan/lit.site.cfg.py.in new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/lit.site.cfg.py.in @@ -0,0 +1,11 @@ +@LIT_SITE_CFG_IN_HEADER@ + +config.name_suffix = "@NSAN_TEST_CONFIG_SUFFIX@" +config.target_arch = "@NSAN_TEST_TARGET_ARCH@" +config.target_cflags = "@NSAN_TEST_TARGET_CFLAGS@" + +# Load common config for all compiler-rt lit tests. +lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured") + +# Load tool-specific config that would do the real work. +lit_config.load_config(config, "@NSAN_LIT_SOURCE_DIR@/lit.cfg.py") diff --git a/compiler-rt/test/nsan/memcpy.cc b/compiler-rt/test/nsan/memcpy.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/memcpy.cc @@ -0,0 +1,83 @@ +// This test case verifies that we can track shadow memory values across +// explicit or implicit calls to memcpy. + +// RUN: %clangxx_nsan -O2 -g -DIMPL=OpEq %s -o %t && NSAN_OPTIONS=halt_on_error=0,resume_after_warning=false %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// RUN: %clangxx_nsan -O2 -g -DIMPL=Memcpy %s -o %t && NSAN_OPTIONS=halt_on_error=0,resume_after_warning=false %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// RUN: %clangxx_nsan -O2 -g -DIMPL=MemcpyInline %s -o %t && NSAN_OPTIONS=halt_on_error=0,resume_after_warning=false %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + + +#include +#include +#include +#include +#include + +#include "helpers.h" + +class OpEq { + public: + double* data() const { return data_.get();} + + OpEq() = default; + OpEq(const OpEq& other) { + *data_ = *other.data_; + } + + private: + std::unique_ptr data_ = std::make_unique(); +}; + +class Memcpy { + public: + double* data() const { return data_.get();} + + Memcpy() = default; + Memcpy(const Memcpy& other) { + auto size = sizeof(double); + DoNotOptimize(size); // Prevent the compiler from optimizing this to a load-store. + memcpy(data_.get(), other.data_.get(), size); + } + + private: + std::unique_ptr data_ = std::make_unique(); +}; + +class MemcpyInline { + public: + double* data() const { return data_.get();} + + MemcpyInline() = default; + MemcpyInline(const MemcpyInline& other) { + __builtin_memcpy(data_.get(), other.data_.get(), sizeof(double)); + } + + private: + std::unique_ptr data_ = std::make_unique(); +}; + +class Vector : public std::vector { + public: + Vector() : std::vector(1) {} +}; + +int main() { + using Impl = IMPL; + Impl src; + CreateInconsistency(src.data()); + DoNotOptimize(src); + // We first verify that an incorrect value has been generated in the original + // data location. + printf("%.16f\n", *src.data()); + // CHECK: #0{{.*}}in main{{.*}}memcpy.cc:[[@LINE-1]] + Impl dst(src); + DoNotOptimize(dst); + // This will fail if we correctly carried the shadow value across the copy. + printf("%.16f\n", *dst.data()); + // CHECK: #0{{.*}}in main{{.*}}memcpy.cc:[[@LINE-1]] + return 0; +} diff --git a/compiler-rt/test/nsan/memset_nonzero.cc b/compiler-rt/test/nsan/memset_nonzero.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/memset_nonzero.cc @@ -0,0 +1,23 @@ +// RUN: %clangxx_nsan -O0 -mllvm -nsan-shadow-type-mapping=dqq -g %s -o %t && NSAN_OPTIONS=halt_on_error=1,enable_loadtracking_stats=1,print_stats_on_exit=1 %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +#include "helpers.h" + +#include +#include + +// This tests tracking of loads where the application value has been set to +// a non-zero value in a untyped way (e.g. memset). +// nsan resumes by re-extending the original value, and logs the event to stats. +// Also see `memset_zero.cc`. + +int main() { + double* d = new double(2.0); + printf("%.16f\n", *d); + DoNotOptimize(d); + memset(d, 0x55, sizeof(double)); + DoNotOptimize(d); + printf("%.16f\n", *d); +// CHECK: There were 0/1 floating-point loads where the shadow type was invalid/unknown. + return 0; +} diff --git a/compiler-rt/test/nsan/memset_zero.cc b/compiler-rt/test/nsan/memset_zero.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/memset_zero.cc @@ -0,0 +1,24 @@ +// RUN: %clangxx_nsan -O0 -mllvm -nsan-shadow-type-mapping=dqq -g %s -o %t && NSAN_OPTIONS=halt_on_error=1,enable_loadtracking_stats=1,print_stats_on_exit=1 %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +#include "helpers.h" + +#include +#include + +// This tests tracking of loads where the application value has been set to zero +// in a untyped way (e.g. memset). +// nsan resumes by re-extending the original value, without logging. +// Also see `memset_nonzero.cc`. Zero is special because application typically +// initialize large buffers to zero in an untyped way. + +int main() { + double* d = new double(2.0); + printf("%.16f\n", *d); + DoNotOptimize(d); + memset(d, 0, sizeof(double)); + DoNotOptimize(d); + printf("%.16f\n", *d); +// CHECK: There were 0/0 floating-point loads where the shadow type was invalid/unknown. + return 0; +} diff --git a/compiler-rt/test/nsan/rumps_royal_pain.cc b/compiler-rt/test/nsan/rumps_royal_pain.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/rumps_royal_pain.cc @@ -0,0 +1,37 @@ +// RUN: %clangxx_nsan -O0 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// RUN: %clangxx_nsan -O1 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// RUN: %clangxx_nsan -O2 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// This tests Rump’s Royal Pain: +// http://arith22.gforge.inria.fr/slides/06-gustafson.pdf +// +// The problem is to evaluate `RumpsRoyalPain(77617, 33096)`. The exact value is +// –0.82739605994682136. Note that in this case, even though the shadow +// computation in quad mode is nowhere near the correct value, the inconsistency +// check shows that there is an issue. + +#include +#include + +__attribute__((noinline)) // Do not constant-fold. +double +RumpsRoyalPain(double x, double y) { + return 333.75 * pow(y, 6) + + pow(x, 2) * + (11 * pow(x, 2) * pow(y, 2) - pow(y, 6) - 121 * pow(y, 4) - 2) + + 5.5 * pow(y, 8) + x / (2 * y); + // CHECK: WARNING: NumericalStabilitySanitizer: inconsistent shadow results while checking return + // CHECK: {{#0 .*in RumpsRoyalPain}} +} + +int main() { + constexpr const double kX = 77617; + constexpr const double kY = 33096; + printf("RumpsRoyalPain(%f, %f)=%.8f)\n", kX, kY, RumpsRoyalPain(kX, kY)); + return 0; +} diff --git a/compiler-rt/test/nsan/simd.cc b/compiler-rt/test/nsan/simd.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/simd.cc @@ -0,0 +1,25 @@ +// RUN: %clangxx_nsan -O2 -g %s -o %t && NSAN_OPTIONS=halt_on_error=0,resume_after_warning=false %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// This tests vector(simd) sanitization. + +#include +#include + +#include "helpers.h" + +int main() { + double in; + CreateInconsistency(&in); + __m128d v = _mm_set1_pd(in); + DoNotOptimize(in); + double v2[2]; + _mm_storeu_pd(v2, v); + // CHECK:{{.*}}inconsistent shadow results while checking store to address + // CHECK: #0{{.*}}in main{{.*}}[[@LINE-2]] + DoNotOptimize(v2); + printf("%f\n", v2[0]); + // CHECK:{{.*}}inconsistent shadow results while checking call argument #1 + // CHECK: #0{{.*}}in main{{.*}}[[@LINE-2]] + return 0; +} diff --git a/compiler-rt/test/nsan/smooth_surprise.cc b/compiler-rt/test/nsan/smooth_surprise.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/smooth_surprise.cc @@ -0,0 +1,40 @@ +// RUN: %clangxx_nsan -O0 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 %run %t + +// RUN: %clangxx_nsan -O1 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 %run %t + +// RUN: %clangxx_nsan -O2 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 %run %t + +// This tests Kahan's Smooth Surprise: +// http://arith22.gforge.inria.fr/slides/06-gustafson.pdf +// log(|3(1–x)+1|)/80 + x2 + 1 +// +// This implementation using floats consistently gives the wrong answer, and +// this cannot be caught by nsan, because the issue here is not the numerical +// instability of the computations (`SmoothSurprise` is stable), but the density +// of the floats. + +#include +#include +#include + +double SmoothSurprise(double x) { + return log(abs(3 * (1 - x) + 1))/80.0 + x * x + 1; +} + +int main() { + double x_min = 0.0; + double y_min = std::numeric_limits::max(); + constexpr const double kStart = 0.8; + constexpr const double kEnd = 2.0; + constexpr const int kNumSteps = 500000; // Half a million. + for (int i = 0; i < kNumSteps; ++i) { + const double x = kStart + (i * (kEnd - kStart)) / kNumSteps; + const double y = SmoothSurprise(x); + if (y < y_min) { + x_min = x; + y_min = y; + } + } + printf("Minimum at x=%.8f (f(x)=%.8f)\n", x_min, y_min); + return 0; +} diff --git a/compiler-rt/test/nsan/stable_sort.cc b/compiler-rt/test/nsan/stable_sort.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/stable_sort.cc @@ -0,0 +1,52 @@ +// RUN: %clangxx_nsan -fno-builtin -O2 -g %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// This tests a particularaly hard case of memory tracking. stable_sort does +// conditional swaps of pairs of elements with mixed types (int/double). + +#include +#include +#include +#include +#include + +extern "C" void __nsan_dump_shadow_mem(const char *addr, size_t size_bytes, + size_t bytes_per_line, size_t reserved); + +__attribute__((noinline)) void Run(std::vector &indices, + std::vector &values) { + const auto num_entries = indices.size(); + std::vector> entries; + entries.reserve(num_entries); + for (int i = 0; i < num_entries; ++i) { + entries.emplace_back(indices[i], values[i]); + } + __nsan_dump_shadow_mem((const char *)&entries[0].second, + sizeof(double), sizeof(double), 0); + __nsan_dump_shadow_mem((const char *)&entries[1].second, + sizeof(double), sizeof(double), 0); + // CHECK: {{.*}}: d0 d1 d2 d3 d4 d5 d6 d7 (1.02800000000000002487) + // CHECK-NEXT: {{.*}}: d0 d1 d2 d3 d4 d5 d6 d7 (7.95099999999999962341) + std::stable_sort( + entries.begin(), entries.end(), + [](const std::pair &a, const std::pair &b) { + return a.first < b.first; + }); + __nsan_dump_shadow_mem((const char *)&entries[0].second, + sizeof(double), sizeof(double), 0); + __nsan_dump_shadow_mem((const char *)&entries[1].second, + sizeof(double), sizeof(double), 0); + // We make sure that the shadow values have been swapped correctly. + // CHECK-NEXT: {{.*}}: d0 d1 d2 d3 d4 d5 d6 d7 (7.95099999999999962341) + // CHECK-NEXT: {{.*}}: d0 d1 d2 d3 d4 d5 d6 d7 (1.02800000000000002487) +} + +int main() { + std::vector indices; + std::vector values; + indices.push_back(75); + values.push_back(1.028); + indices.push_back(74); + values.push_back(7.951); + Run(indices, values); +} diff --git a/compiler-rt/test/nsan/stack.cc b/compiler-rt/test/nsan/stack.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/stack.cc @@ -0,0 +1,18 @@ +// RUN: %clangxx_nsan -O2 -g %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +#include + +#include "helpers.h" + +extern "C" void __nsan_dump_shadow_mem(const char *addr, size_t size_bytes, size_t bytes_per_line, size_t reserved); + +int main() { + float array[2]; + DoNotOptimize(array); + array[0] = 1.0; + array[1] = 2.0; + __nsan_dump_shadow_mem((const char*)array, sizeof(array), 16, 0); + // CHECK: {{.*}} f0 f1 f2 f3 f0 f1 f2 f3 (1.00000000000000000000) (2.00000000000000000000) + return 0; +} diff --git a/compiler-rt/test/nsan/stats.cc b/compiler-rt/test/nsan/stats.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/stats.cc @@ -0,0 +1,31 @@ +// RUN: %clangxx_nsan -O0 -mllvm -nsan-shadow-type-mapping=dqq -g %s -o %t && NSAN_OPTIONS=halt_on_error=0,disable_warnings=1,enable_check_stats=1,enable_warning_stats=1,print_stats_on_exit=1 %run %t >%t.out 2>&1 +// Checked separately because the order is not deterministic. +// RUN: FileCheck %s --check-prefix=WARNING < %t.out +// RUN: FileCheck %s --check-prefix=NOWARNING < %t.out + +// This tests the "stats" mode of nsan. +// In this test: +// - we do not stop the application on error (halt_on_error=0), +// - we disable real-time printing of warnings (disable_warnings=1), +// - we enable stats collection (enable_{check,warning}_stats=1), +// - we print stats when exiting the application (print_stats_on_exit=1). +// We then check that the application correctly collected stats about the checks +// that were done and where those checks resulted in warnings. + +#include "helpers.h" + +#include + +int main() { + double d; + CreateInconsistency(&d); + DoNotOptimize(d); + printf("%.16f\n", d); + // WARNING: warned 1 times out of {{[0-9]*}} argument checks (max relative error: + // {{.*}}%) at WARNING-NEXT:#0{{.*}} in main{{.*}}stats.cc:[[@LINE-2]] + d = 42; + printf("%.16f\n", d); + // NOWARNING: warned 0 times out of {{[0-9]*}} argument checks at + // NOWARNING-NEXT:#0{{.*}} in main{{.*}}stats.cc:[[@LINE-2]] + return 0; +} diff --git a/compiler-rt/test/nsan/sums.cc b/compiler-rt/test/nsan/sums.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/sums.cc @@ -0,0 +1,81 @@ +// RUN: %clangxx_nsan -O0 -mllvm -nsan-shadow-type-mapping=dqq -g -DSUM=NaiveSum -DFLT=float %s -o %t && NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=19 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefixes=NAIVE,NAIVE-FLOAT < %t.out + +// RUN: %clangxx_nsan -O1 -mllvm -nsan-shadow-type-mapping=dqq -g -DSUM=NaiveSum -DFLT=float %s -o %t && NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=19 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefixes=NAIVE,NAIVE-FLOAT < %t.out + +// RUN: %clangxx_nsan -O2 -mllvm -nsan-shadow-type-mapping=dqq -g -DSUM=NaiveSum -DFLT=float %s -o %t && NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=19 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefixes=NAIVE,NAIVE-FLOAT < %t.out +// RUN: %clangxx_nsan -O2 -mllvm -nsan-shadow-type-mapping=dqq -g -DSUM=NaiveSum -DFLT=double %s -o %t && NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=49 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefixes=NAIVE,NAIVE-DOUBLE-QUAD < %t.out +// RUN: %clangxx_nsan -O2 -mllvm -nsan-shadow-type-mapping=dlq -g -DSUM=NaiveSum -DFLT=double %s -o %t && NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=49 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s --check-prefixes=NAIVE,NAIVE-DOUBLE-LONG < %t.out + +// RUN: %clangxx_nsan -O0 -mllvm -nsan-shadow-type-mapping=dqq -g -DSUM=KahanSum -DFLT=float %s -o %t && NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=19 %run %t +// RUN: %clangxx_nsan -O3 -mllvm -nsan-shadow-type-mapping=dqq -g -DSUM=KahanSum -DFLT=float %s -o %t && NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=19 %run %t +// RUN: %clangxx_nsan -O3 -mllvm -nsan-shadow-type-mapping=dqq -g -DSUM=KahanSum -DFLT=double %s -o %t && NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=49 %run %t +// RUN: %clangxx_nsan -O3 -mllvm -nsan-shadow-type-mapping=dlq -g -DSUM=KahanSum -DFLT=double %s -o %t && NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=49 %run %t + +#include +#include +#include +#include + +// A naive, unstable summation. +template +__attribute__((noinline)) // To check call stack reporting. +T NaiveSum(const std::vector& values) { + T sum = 0; + for (T v : values) { + sum += v; + } + return sum; + // NAIVE: WARNING: NumericalStabilitySanitizer: inconsistent shadow results while checking return + // NAIVE-FLOAT: float{{ *}}precision (native): + // NAIVE-FLOAT: double{{ *}}precision (shadow): + // NAIVE-DOUBLE-QUAD: double {{ *}}precision (native): + // NAIVE-DOUBLE-QUAD: __float128{{ *}}precision (shadow): + // NAIVE-DOUBLE-LONG: double{{ *}}precision (native): + // NAIVE-DOUBLE-LONG: long double{{ *}}precision (shadow): + // NAIVE: {{#0 .*in .* NaiveSum}} +} + +// Kahan's summation is a numerically stable sum. +// https://en.wikipedia.org/wiki/Kahan_summation_algorithm +template +__attribute__((noinline)) // For consistency. +T KahanSum(const std::vector& values) { + T sum = 0; + T c = 0; + for (T v : values) { + T y = v - c; + T t = sum + y; + c = (t - sum) - y; + sum = t; + } + return sum; +} + +int main() { + std::vector values; + constexpr const int kNumValues = 1000000; + values.reserve(kNumValues); + // Using a seed to avoid flakiness. + constexpr uint32_t kSeed = 0x123456; +std::mt19937 gen(kSeed); + std::uniform_real_distribution dis(0.0f, 1000.0f); + for (int i = 0; i < kNumValues; ++i) { + values.push_back(dis(gen)); + } + + const auto t1 = std::chrono::high_resolution_clock::now(); + const auto sum = SUM(values); + const auto t2 = std::chrono::high_resolution_clock::now(); + printf("sum: %.8f\n", sum); + std::cout << "runtime: " + << std::chrono::duration_cast(t2 - t1) + .count() / + 1000.0 + << "ms\n"; + return 0; +} diff --git a/compiler-rt/test/nsan/suppressions.cc b/compiler-rt/test/nsan/suppressions.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/suppressions.cc @@ -0,0 +1,26 @@ +// RUN: %clangxx_nsan -O2 -g -DIMPL=OpEq %s -o %t + +// RUN: rm -f %t.supp +// RUN: touch %t.supp +// RUN: NSAN_OPTIONS="halt_on_error=0,resume_after_warning=false,suppressions='%t.supp'" %run %t 2>&1 | FileCheck %s --check-prefixes=NOSUPP + +// RUN: echo "consistency:*main*" > %t.supp +// RUN: NSAN_OPTIONS="halt_on_error=0,resume_after_warning=false,suppressions='%t.supp'" %run %t 2>&1 | FileCheck %s --check-prefixes=SUPP + +// This tests sanitizer suppressions, i.e. warning silencing. + +#include "helpers.h" + +#include + +int main() { + double d; + CreateInconsistency(&d); + // NOSUPP: #1{{.*}}[[@LINE-1]] + // SUPP-NOT: #1{{.*}}[[@LINE-2]] + DoNotOptimize(d); + printf("%.16f\n", d); + // NOSUPP: #0{{.*}}[[@LINE-1]] + // SUPP-NOT: #0[[@LINE-2]] + return 0; +} diff --git a/compiler-rt/test/nsan/swap.cc b/compiler-rt/test/nsan/swap.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/swap.cc @@ -0,0 +1,44 @@ +// RUN: %clangxx_nsan -fno-builtin -mllvm -nsan-check-loads -O2 -g2 -UNDEBUG %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// This verifies that shadow memory is tracked correcty across typed and +// bitcasted swaps. + +#include +#include +#include +#include + +extern "C" void __nsan_dump_shadow_mem(const char *addr, size_t size_bytes, size_t bytes_per_line, size_t reserved); + +__attribute__((noinline)) // prevent optimization +void SwapFT(double *a, double* b) { + // LLVM typically optimizes this to an untyped swap (through i64) anyway. + std::swap(*a, *b); +} + +__attribute__((noinline)) // prevent optimization +void SwapBitcasted(uint64_t *a, uint64_t*b) { + std::swap(*a, *b); +} + +int main() { + double a = 1.0, b = 2.0; + __nsan_dump_shadow_mem((const char*)&a, sizeof(a), sizeof(a), 0); + __nsan_dump_shadow_mem((const char*)&b, sizeof(b), sizeof(b), 0); + SwapFT(&a, &b); + __nsan_dump_shadow_mem((const char*)&a, sizeof(a), sizeof(a), 0); + __nsan_dump_shadow_mem((const char*)&b, sizeof(b), sizeof(b), 0); + assert(a == 2.0 && b == 1.0); + // This breaks strict aliasing but is OK on X86. + SwapBitcasted(reinterpret_cast(&a), reinterpret_cast(&b)); + __nsan_dump_shadow_mem((const char*)&a, sizeof(a), sizeof(a), 0); + __nsan_dump_shadow_mem((const char*)&b, sizeof(b), sizeof(b), 0); + assert(a == 1.0 && b == 2.0); +// CHECK: 0x{{[a-f0-9]*}}: d0 d1 d2 d3 d4 d5 d6 d7 (1.0{{.*}} +// CHECK-NEXT: 0x{{[a-f0-9]*}}: d0 d1 d2 d3 d4 d5 d6 d7 (2.0{{.*}} +// CHECK-NEXT: 0x{{[a-f0-9]*}}: d0 d1 d2 d3 d4 d5 d6 d7 (2.0{{.*}} +// CHECK-NEXT: 0x{{[a-f0-9]*}}: d0 d1 d2 d3 d4 d5 d6 d7 (1.0{{.*}} +// CHECK-NEXT: 0x{{[a-f0-9]*}}: d0 d1 d2 d3 d4 d5 d6 d7 (1.0{{.*}} +// CHECK-NEXT: 0x{{[a-f0-9]*}}: d0 d1 d2 d3 d4 d5 d6 d7 (2.0{{.*}} +} diff --git a/compiler-rt/test/nsan/type_punning.cc b/compiler-rt/test/nsan/type_punning.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/type_punning.cc @@ -0,0 +1,26 @@ +// RUN: %clangxx_nsan -O0 -mllvm -nsan-shadow-type-mapping=dqq -g %s -o %t && NSAN_OPTIONS=halt_on_error=1,enable_loadtracking_stats=1,print_stats_on_exit=1 %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +#include "helpers.h" + +#include +#include +#include + +// This tests tracking of loads where the application value has been tampered +// with through type punning. +// nsan resumes by re-extending the original value, and logs the failed tracking +// to stats. + +int main() { + auto d = std::make_unique(2.0); + printf("%.16f\n", *d); + DoNotOptimize(d); + reinterpret_cast(d.get())[7] = 0; + DoNotOptimize(d); + printf("%.16f\n", *d); + // CHECK: invalid/unknown type for 1/0 loads + // CHECK: There were 1/0 floating-point loads where the shadow type was invalid/unknown + // or unknown. + return 0; +} diff --git a/compiler-rt/test/nsan/uninstrumented_write.cc b/compiler-rt/test/nsan/uninstrumented_write.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/uninstrumented_write.cc @@ -0,0 +1,22 @@ +// RUN: %clangxx_nsan -O2 -g %s -o %t && NSAN_OPTIONS=halt_on_error=0 %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// This test load checking. Inconsistencies on load can happen when +// uninstrumented code writes to memory. + +#include "helpers.h" + +#include +#include + +int main() { + auto d = std::make_unique(2.0); + printf("%.16f\n", *d); + DoNotOptimize(d); + // Sneakily change the sign bit. + asm volatile("xorb $0x80, 7(%0)" : : "r"(d.get())); + printf("%.16f\n", *d); + // CHECK: WARNING: NumericalStabilitySanitizer: inconsistent shadow results + // while checking call argument #1 CHECK: {{#0 .*in main}} + return 0; +} diff --git a/compiler-rt/test/nsan/vector_push_back.cc b/compiler-rt/test/nsan/vector_push_back.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/vector_push_back.cc @@ -0,0 +1,17 @@ +// RUN: %clangxx_nsan -fno-builtin -O2 -g0 %s -o %t && %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// This test verifies that dynamic memory is correctly tracked. + +#include +#include + +extern "C" void __nsan_dump_shadow_mem(const char *addr, size_t size_bytes, size_t bytes_per_line, size_t reserved); + +int main() { + std::vector values; + values.push_back(1.028); + __nsan_dump_shadow_mem((const char*)values.data(), 8, 8, 0); + // CHECK: 0x{{[a-f0-9]*}}: d0 d1 d2 d3 d4 d5 d6 d7 (1.02800000000000002487) +} + diff --git a/compiler-rt/test/nsan/verificarlo_case4.cc b/compiler-rt/test/nsan/verificarlo_case4.cc new file mode 100644 --- /dev/null +++ b/compiler-rt/test/nsan/verificarlo_case4.cc @@ -0,0 +1,28 @@ +// RUN: %clangxx_nsan -O2 -g %s -o %t && NSAN_OPTIONS=halt_on_error=1 not %run %t >%t.out 2>&1 +// RUN: FileCheck %s < %t.out + +// Case Study #4 from the Verificarlo paper: The loop alternates between +// accumulating extremely large and extremely small values, leading to large +// loss of precision. + +#include + +using FloatT = double; + +__attribute__((noinline)) FloatT Case4(FloatT c, int iterations) { + for (unsigned i = 0; i < iterations; ++i) { + if (i % 2 == 0) + c = c + 1.e6; + else + c = c - 1.e-6; + } + return c; + // CHECK: #0 {{.*}} in Case4{{.*}}[[@LINE-1]] +} + +int main() { + for (int iterations = 1; iterations <= 100000000; iterations *= 10) { + printf("%10i iterations: %f\n", iterations, Case4(-5.e13, iterations)); + } + return 0; +} diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -658,6 +658,7 @@ ATTR_KIND_NO_CALLBACK = 71, ATTR_KIND_HOT = 72, ATTR_KIND_NO_PROFILE = 73, + ATTR_KIND_SANITIZE_NUMERICAL_STABILITY = 74, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -230,6 +230,9 @@ /// MemTagSanitizer is on. def SanitizeMemTag : EnumAttr<"sanitize_memtag">; +/// NumericalStabilitySanitizer is on. +def SanitizeNumericalStability : EnumAttr<"sanitize_numericalstability">; + /// Speculative Load Hardening is enabled. /// /// Note that this uses the default compatibility (always compatible during @@ -285,6 +288,7 @@ def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; +def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; def : CompatRule<"isEqual">; diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -432,6 +432,7 @@ void initializeTargetPassConfigPass(PassRegistry&); void initializeTargetTransformInfoWrapperPassPass(PassRegistry&); void initializeThreadSanitizerLegacyPassPass(PassRegistry&); +void initializeNumericalStabilitySanitizerLegacyPassPass(PassRegistry&); void initializeTwoAddressInstructionPassPass(PassRegistry&); void initializeTypeBasedAAWrapperPassPass(PassRegistry&); void initializeTypePromotionPass(PassRegistry&); diff --git a/llvm/include/llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h @@ -0,0 +1,40 @@ +//===- NumericalStabilitySanitizer.h - NSan Pass ---------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the numerical stability sanitizer (nsan) pass. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_NUMERICALSTABIITYSANITIZER_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_NUMERICALSTABIITYSANITIZER_H + +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" + +namespace llvm { + +/// Inserts NumericalStabilitySanitizer instrumentation. +FunctionPass *createNumericalStabilitySanitizerLegacyPassPass(); + +/// A function pass for nsan instrumentation. +/// +/// Instruments functions to duplicate floating point computations in a +/// higher-precision type. +/// This function pass inserts calls to runtime library functions. If the +/// functions aren't declared yet, the pass inserts the declarations. +struct NumericalStabilitySanitizerPass + : public PassInfoMixin { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_INSTRUMENTATION_NUMERICALSTABIITYSANITIZER_H diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -691,6 +691,7 @@ KEYWORD(sanitize_address); KEYWORD(sanitize_hwaddress); KEYWORD(sanitize_memtag); + KEYWORD(sanitize_numericalstability); KEYWORD(sanitize_thread); KEYWORD(sanitize_memory); KEYWORD(speculative_load_hardening); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -1395,6 +1395,8 @@ B.addAttribute(Attribute::SanitizeHWAddress); break; case lltok::kw_sanitize_memtag: B.addAttribute(Attribute::SanitizeMemTag); break; + case lltok::kw_sanitize_numericalstability: + B.addAttribute(Attribute::SanitizeNumericalStability); break; case lltok::kw_sanitize_thread: B.addAttribute(Attribute::SanitizeThread); break; case lltok::kw_sanitize_memory: @@ -1791,6 +1793,7 @@ case lltok::kw_sanitize_address: case lltok::kw_sanitize_hwaddress: case lltok::kw_sanitize_memtag: + case lltok::kw_sanitize_numericalstability: case lltok::kw_sanitize_memory: case lltok::kw_sanitize_thread: case lltok::kw_speculative_load_hardening: @@ -1900,6 +1903,7 @@ case lltok::kw_sanitize_address: case lltok::kw_sanitize_hwaddress: case lltok::kw_sanitize_memtag: + case lltok::kw_sanitize_numericalstability: case lltok::kw_sanitize_memory: case lltok::kw_sanitize_thread: case lltok::kw_speculative_load_hardening: diff --git a/llvm/lib/AsmParser/LLToken.h b/llvm/lib/AsmParser/LLToken.h --- a/llvm/lib/AsmParser/LLToken.h +++ b/llvm/lib/AsmParser/LLToken.h @@ -182,6 +182,7 @@ kw_sanitize_address, kw_sanitize_hwaddress, kw_sanitize_memtag, + kw_sanitize_numericalstability, kw_builtin, kw_byval, kw_inalloca, diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1546,6 +1546,8 @@ return Attribute::MustProgress; case bitc::ATTR_KIND_HOT: return Attribute::Hot; + case bitc::ATTR_KIND_SANITIZE_NUMERICAL_STABILITY: + return Attribute::SanitizeNumericalStability; } } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -754,6 +754,8 @@ return bitc::ATTR_KIND_BYREF; case Attribute::MustProgress: return bitc::ATTR_KIND_MUSTPROGRESS; + case Attribute::SanitizeNumericalStability: + return bitc::ATTR_KIND_SANITIZE_NUMERICAL_STABILITY; case Attribute::EndAttrKinds: llvm_unreachable("Can not encode end-attribute kinds marker."); case Attribute::None: diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -337,6 +337,8 @@ return "sanitize_hwaddress"; if (hasAttribute(Attribute::SanitizeMemTag)) return "sanitize_memtag"; + if (hasAttribute(Attribute::SanitizeNumericalStability)) + return "sanitize_numericalstability"; if (hasAttribute(Attribute::AlwaysInline)) return "alwaysinline"; if (hasAttribute(Attribute::ArgMemOnly)) diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -1629,6 +1629,7 @@ case Attribute::SanitizeMemTag: case Attribute::SanitizeThread: case Attribute::SanitizeMemory: + case Attribute::SanitizeNumericalStability: case Attribute::MinSize: case Attribute::NoDuplicate: case Attribute::Builtin: diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -131,6 +131,7 @@ #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemProfiler.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" +#include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h" #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" #include "llvm/Transforms/Instrumentation/PoisonChecking.h" #include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -114,6 +114,7 @@ MODULE_PASS("dfsan", DataFlowSanitizerPass()) MODULE_PASS("asan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/false, false, true, false)) MODULE_PASS("msan-module", MemorySanitizerPass({})) +MODULE_PASS("nsan-module", NumericalStabilitySanitizerPass()) MODULE_PASS("tsan-module", ThreadSanitizerPass()) MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, false, true, false)) MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass()) @@ -328,6 +329,7 @@ FUNCTION_PASS("kasan", AddressSanitizerPass(true, false, false)) FUNCTION_PASS("msan", MemorySanitizerPass({})) FUNCTION_PASS("kmsan", MemorySanitizerPass({0, false, /*Kernel=*/true})) +FUNCTION_PASS("nsan", NumericalStabilitySanitizerPass()) FUNCTION_PASS("tsan", ThreadSanitizerPass()) FUNCTION_PASS("memprof", MemProfilerPass()) #undef FUNCTION_PASS diff --git a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp --- a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp @@ -65,6 +65,7 @@ .Case("sanitize_address", Attribute::SanitizeAddress) .Case("sanitize_hwaddress", Attribute::SanitizeHWAddress) .Case("sanitize_memory", Attribute::SanitizeMemory) + .Case("sanitize_numericalstability", Attribute::SanitizeNumericalStability) .Case("sanitize_thread", Attribute::SanitizeThread) .Case("sanitize_memtag", Attribute::SanitizeMemTag) .Case("speculative_load_hardening", Attribute::SpeculativeLoadHardening) diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -7,6 +7,7 @@ GCOVProfiling.cpp MemProfiler.cpp MemorySanitizer.cpp + NumericalStabilitySanitizer.cpp IndirectCallPromotion.cpp Instrumentation.cpp InstrOrderFile.cpp diff --git a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp --- a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -108,6 +108,7 @@ initializeMemorySanitizerLegacyPassPass(Registry); initializeHWAddressSanitizerLegacyPassPass(Registry); initializeThreadSanitizerLegacyPassPass(Registry); + initializeNumericalStabilitySanitizerLegacyPassPass(Registry); initializeModuleSanitizerCoverageLegacyPassPass(Registry); initializeDataFlowSanitizerLegacyPassPass(Registry); } diff --git a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp @@ -0,0 +1,2270 @@ +//===-- NumericalStabilitySanitizer.cpp -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of NumericalStabilitySanitizer. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h" + +#include +#include + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/InitializePasses.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/EscapeEnumerator.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "nsan" + +STATISTIC(NumInstrumentedFTLoads, + "Number of instrumented floating-point loads"); + +STATISTIC(NumInstrumentedFTCalls, + "Number of instrumented floating-point calls"); +STATISTIC(NumInstrumentedFTRets, + "Number of instrumented floating-point returns"); +STATISTIC(NumInstrumentedFTStores, + "Number of instrumented floating-point stores"); +STATISTIC(NumInstrumentedNonFTStores, + "Number of instrumented non floating-point stores"); +STATISTIC( + NumInstrumentedNonFTMemcpyStores, + "Number of instrumented non floating-point stores with memcpy semantics"); +STATISTIC(NumInstrumentedFCmp, "Number of instrumented fcmps"); + +// Using smaller shadow types types can help improve speed. For example, `dlq` +// is 3x slower to 5x faster in opt mode and 2-6x faster in dbg mode compared to +// `dqq`. +static cl::opt ClShadowMapping( + "nsan-shadow-type-mapping", cl::init("dqq"), + cl::desc("One shadow type id for each of `float`, `double`, `long double`. " + "`d`,`l`,`q`,`e` mean double, x86_fp80, fp128 (quad) and " + "ppc_fp128 (extended double) respectively. The default is to " + "shadow `float` as `double`, and `double` and `x86_fp80` as " + "`fp128`"), + cl::Hidden); + +static cl::opt + ClInstrumentFCmp("nsan-instrument-fcmp", cl::init(true), + cl::desc("Instrument floating-point comparisons"), + cl::Hidden); + +static cl::opt ClTruncateFCmpEq( + "nsan-truncate-fcmp-eq", cl::init(true), + cl::desc( + "This flag controls the behaviour of fcmp equality comparisons:" + "For equality comparisons such as `x == 0.0f`, we can perform the " + "shadow check in the shadow (`x_shadow == 0.0) == (x == 0.0f)`) or app " + " domain (`(trunc(x_shadow) == 0.0f) == (x == 0.0f)`). This helps " + "catch the case when `x_shadow` is accurate enough (and therefore " + "close enough to zero) so that `trunc(x_shadow)` is zero even though " + "both `x` and `x_shadow` are not. "), + cl::Hidden); + +// When there is external, uninstrumented code writing to memory, the shadow +// memory can get out of sync with the application memory. Enabling this flag +// emits consistency checks for loads to catch this situation. +// When everything is instrumented, this is not strictly necessary because any +// load should have a corresponding store, but can help debug cases when the +// framework did a bad job at tracking shadow memory modifications by failing on +// load rather than store. +// FIXME: provide a way to resume computations from the FT value when the load +// is inconsistent. This ensures that further computations are not polluted. +static cl::opt ClCheckLoads("nsan-check-loads", cl::init(false), + cl::desc("Check floating-point load"), + cl::Hidden); + +static const char *const kNsanModuleCtorName = "nsan.module_ctor"; +static const char *const kNsanInitName = "__nsan_init"; + +// The following values must be kept in sync with the runtime. +static constexpr const int kShadowScale = 2; +static constexpr const int kMaxVectorWidth = 8; +static constexpr const int kMaxNumArgs = 128; +static constexpr const int kMaxShadowTypeSizeBytes = 16; // fp128 + +namespace { + +// Defines the characteristics (type id, type, and floating-point semantics) +// attached for all possible shadow types. +class ShadowTypeConfig { +public: + static std::unique_ptr fromNsanTypeId(char TypeId); + // The floating-point semantics of the shadow type. + virtual const fltSemantics &semantics() const = 0; + + // The LLVM Type corresponding to the shadow type. + virtual Type *getType(LLVMContext &Context) const = 0; + + // The nsan type id of the shadow type (`d`, `l`, `q`, ...). + virtual char getNsanTypeId() const = 0; + + virtual ~ShadowTypeConfig() {} +}; + +template +class ShadowTypeConfigImpl : public ShadowTypeConfig { +public: + char getNsanTypeId() const override { return NsanTypeId; } + static constexpr const char kNsanTypeId = NsanTypeId; +}; + +// `double` (`d`) shadow type. +class F64ShadowConfig : public ShadowTypeConfigImpl<'d'> { + const fltSemantics &semantics() const override { + return APFloat::IEEEdouble(); + } + Type *getType(LLVMContext &Context) const override { + return Type::getDoubleTy(Context); + } +}; + +// `x86_fp80` (`l`) shadow type: X86 long double. +class F80ShadowConfig : public ShadowTypeConfigImpl<'l'> { + const fltSemantics &semantics() const override { + return APFloat::x87DoubleExtended(); + } + Type *getType(LLVMContext &Context) const override { + return Type::getX86_FP80Ty(Context); + } +}; + +// `fp128` (`q`) shadow type. +class F128ShadowConfig : public ShadowTypeConfigImpl<'q'> { + const fltSemantics &semantics() const override { return APFloat::IEEEquad(); } + Type *getType(LLVMContext &Context) const override { + return Type::getFP128Ty(Context); + } +}; + +// `ppc_fp128` (`e`) shadow type: IBM extended double with 106 bits of mantissa. +class PPC128ShadowConfig : public ShadowTypeConfigImpl<'e'> { + const fltSemantics &semantics() const override { + return APFloat::PPCDoubleDouble(); + } + Type *getType(LLVMContext &Context) const override { + return Type::getPPC_FP128Ty(Context); + } +}; + +// Creates a ShadowTypeConfig given its type id. +std::unique_ptr +ShadowTypeConfig::fromNsanTypeId(const char TypeId) { + switch (TypeId) { + case F64ShadowConfig::kNsanTypeId: + return std::make_unique(); + case F80ShadowConfig::kNsanTypeId: + return std::make_unique(); + case F128ShadowConfig::kNsanTypeId: + return std::make_unique(); + case PPC128ShadowConfig::kNsanTypeId: + return std::make_unique(); + } + errs() << "nsan: invalid shadow type id'" << TypeId << "'\n"; + return nullptr; +} + +// An enum corresponding to shadow value types. Used as indices in arrays, so +// not an `enum class`. +enum FTValueType { kFloat, kDouble, kLongDouble, kNumValueTypes }; + +static FTValueType semanticsToFTValueType(const fltSemantics &Sem) { + if (&Sem == &APFloat::IEEEsingle()) { + return kFloat; + } else if (&Sem == &APFloat::IEEEdouble()) { + return kDouble; + } else if (&Sem == &APFloat::x87DoubleExtended()) { + return kLongDouble; + } + llvm_unreachable("semantics are not one of the handled types"); +} + +// If `FT` corresponds to a primitive FTValueType, return it. +static Optional ftValueTypeFromType(Type *FT) { + if (FT->isFloatTy()) + return kFloat; + if (FT->isDoubleTy()) + return kDouble; + if (FT->isX86_FP80Ty()) + return kLongDouble; + return {}; +} + +// Returns the LLVM type for an FTValueType. +static Type *typeFromFTValueType(FTValueType VT, LLVMContext &Context) { + switch (VT) { + case kFloat: + return Type::getFloatTy(Context); + case kDouble: + return Type::getDoubleTy(Context); + case kLongDouble: + return Type::getX86_FP80Ty(Context); + case kNumValueTypes: + return nullptr; + } +} + +// Returns the type name for an FTValueType. +static const char *typeNameFromFTValueType(FTValueType VT) { + switch (VT) { + case kFloat: + return "float"; + case kDouble: + return "double"; + case kLongDouble: + return "longdouble"; + case kNumValueTypes: + return nullptr; + } +} + +// A specific mapping configuration of application type to shadow type for nsan +// (see -nsan-shadow-mapping flag). +class MappingConfig { +public: + bool initialize(LLVMContext *C) { + if (ClShadowMapping.size() != 3) { + errs() << "Invalid nsan mapping: " << ClShadowMapping << "\n"; + } + Context = C; + unsigned ShadowTypeSizeBits[kNumValueTypes]; + for (int VT = 0; VT < kNumValueTypes; ++VT) { + auto Config = ShadowTypeConfig::fromNsanTypeId(ClShadowMapping[VT]); + if (Config == nullptr) + return false; + const unsigned AppTypeSize = + typeFromFTValueType(static_cast(VT), *C) + ->getScalarSizeInBits(); + const unsigned ShadowTypeSize = + Config->getType(*C)->getScalarSizeInBits(); + // Check that the shadow type size is at most kShadowScale times the + // application type size, so that shadow memory compoutations are valid. + if (ShadowTypeSize > kShadowScale * AppTypeSize) { + errs() << "Invalid nsan mapping f" << AppTypeSize << "->f" + << ShadowTypeSize << ": The shadow type size should be at most " + << kShadowScale << " times the application type size\n"; + return false; + } + ShadowTypeSizeBits[VT] = ShadowTypeSize; + Configs[VT] = std::move(Config); + } + + // Check that the mapping is monotonous. This is required because if one + // does an fpextend of `float->long double` in application code, nsan is + // going to do an fpextend of `shadow(float) -> shadow(long double)` in + // shadow code. This will fail in `qql` mode, since nsan would be + // fpextending `f128->long`, which is invalid. + // FIXME: Relax this. + if (ShadowTypeSizeBits[kFloat] > ShadowTypeSizeBits[kDouble] || + ShadowTypeSizeBits[kDouble] > ShadowTypeSizeBits[kLongDouble]) { + errs() << "Invalid nsan mapping: { float->f" << ShadowTypeSizeBits[kFloat] + << "; double->f" << ShadowTypeSizeBits[kDouble] + << "; long double->f" << ShadowTypeSizeBits[kLongDouble] << " }\n"; + return false; + } + return true; + } + + const ShadowTypeConfig &byValueType(FTValueType VT) const { + assert(VT < FTValueType::kNumValueTypes && "invalid value type"); + return *Configs[VT]; + } + + const ShadowTypeConfig &bySemantics(const fltSemantics &Sem) const { + return byValueType(semanticsToFTValueType(Sem)); + } + + // Returns the extended shadow type for a given application type. + Type *getExtendedFPType(Type *FT) const { + if (const auto VT = ftValueTypeFromType(FT)) + return Configs[*VT]->getType(*Context); + if (FT->isVectorTy()) { + auto *VecTy = cast(FT); + Type *ExtendedScalar = getExtendedFPType(VecTy->getElementType()); + return ExtendedScalar + ? VectorType::get(ExtendedScalar, VecTy->getElementCount()) + : nullptr; + } + return nullptr; + } + +private: + LLVMContext *Context = nullptr; + std::unique_ptr Configs[FTValueType::kNumValueTypes]; +}; + +// The memory extents of a type specifies how many elements of a given +// FTValueType needs to be stored when storing this type. +struct MemoryExtents { + FTValueType ValueType; + uint64_t NumElts; +}; +static MemoryExtents getMemoryExtentsOrDie(Type *FT) { + if (const auto VT = ftValueTypeFromType(FT)) + return {*VT, 1}; + if (FT->isVectorTy()) { + auto *VecTy = cast(FT); + const auto ScalarExtents = getMemoryExtentsOrDie(VecTy->getElementType()); + return {ScalarExtents.ValueType, + ScalarExtents.NumElts * VecTy->getElementCount().getFixedValue()}; + } + llvm_unreachable("invalid value type"); +} + +// The location of a check. Passed as parameters to runtime checking functions. +class CheckLoc { +public: + // Creates a location that references an application memory location. + static CheckLoc makeStore(Value *Address) { + CheckLoc Result(kStore); + Result.Address = Address; + return Result; + } + static CheckLoc makeLoad(Value *Address) { + CheckLoc Result(kLoad); + Result.Address = Address; + return Result; + } + + // Creates a location that references an argument, given by id. + static CheckLoc makeArg(int ArgId) { + CheckLoc Result(kArg); + Result.ArgId = ArgId; + return Result; + } + + // Creates a location that references the return value of a function. + static CheckLoc makeRet() { return CheckLoc(kRet); } + + // Creates a location that references a vector insert. + static CheckLoc makeInsert() { return CheckLoc(kInsert); } + + // Returns the CheckType of location this refers to, as an integer-typed LLVM + // IR value. + Value *getType(LLVMContext &C) const { + return ConstantInt::get(Type::getInt32Ty(C), static_cast(CheckTy)); + } + + // Returns a CheckType-specific value representing details of the location + // (e.g. application address for loads or stores), as an `IntptrTy`-typed LLVM + // IR value. + Value *getValue(Type *IntptrTy, IRBuilder<> &Builder) const { + switch (CheckTy) { + case kUnknown: + llvm_unreachable("unknown type"); + case kRet: + case kInsert: + return ConstantInt::get(IntptrTy, 0); + case kArg: + return ConstantInt::get(IntptrTy, ArgId); + case kLoad: + case kStore: + return Builder.CreatePtrToInt(Address, IntptrTy); + } + } + +private: + // Must be kept in sync with the runtime. + enum CheckType { + kUnknown = 0, + kRet, + kArg, + kLoad, + kStore, + kInsert, + }; + explicit CheckLoc(CheckType CheckTy) : CheckTy(CheckTy) {} + + const CheckType CheckTy; + Value *Address = nullptr; + int ArgId = -1; +}; + +// A map of LLVM IR values to shadow LLVM IR values. +class ValueToShadowMap { +public: + explicit ValueToShadowMap(MappingConfig *Config) : Config(Config) {} + + // Sets the shadow value for a value. Asserts that the value does not already + // have a value. + void setShadow(Value *V, Value *Shadow) { + assert(V); + assert(Shadow); + const bool Inserted = Map.emplace(V, Shadow).second; +#ifdef LLVM_ENABLE_DUMP + if (!Inserted) { + if (const auto *const I = dyn_cast(V)) + I->getParent()->getParent()->dump(); + errs() << "duplicate shadow (" << V << "): "; + V->dump(); + } +#endif + assert(Inserted && "duplicate shadow"); + (void)Inserted; + } + + // Returns true if the value already has a shadow (including if the value is a + // constant). If true, calling getShadow() is valid. + bool hasShadow(Value *V) const { + return isa(V) || (Map.find(V) != Map.end()); + } + + // Returns the shadow value for a given value. Asserts that the value has + // a shadow value. Lazily creates shadows for constant values. + Value *getShadow(Value *V) const { + assert(V); + if (Constant *C = dyn_cast(V)) + return getShadowConstant(C); + const auto ShadowValIt = Map.find(V); + assert(ShadowValIt != Map.end() && "shadow val does not exist"); + assert(ShadowValIt->second && "shadow val is null"); + return ShadowValIt->second; + } + + bool empty() const { return Map.empty(); } + +private: + // Extends a constant application value to its shadow counterpart. + APFloat extendConstantFP(APFloat CV) const { + bool LosesInfo = false; + CV.convert(Config->bySemantics(CV.getSemantics()).semantics(), + APFloatBase::rmTowardZero, &LosesInfo); + return CV; + } + + // Returns the shadow constant for the given application constant. + Constant *getShadowConstant(Constant *C) const { + if (UndefValue *U = dyn_cast(C)) { + return UndefValue::get(Config->getExtendedFPType(U->getType())); + } + if (ConstantFP *CFP = dyn_cast(C)) { + // Floating-point constants. + return ConstantFP::get(Config->getExtendedFPType(CFP->getType()), + extendConstantFP(CFP->getValueAPF())); + } + // Vector, array, or aggregate constants. + if (C->getType()->isVectorTy()) { + SmallVector Elements; + for (int I = 0, E = cast(C->getType())->getElementCount().getFixedValue(); + I < E; ++I) + Elements.push_back(getShadowConstant(C->getAggregateElement(I))); + return ConstantVector::get(Elements); + } + llvm_unreachable("unimplemented"); + } + + MappingConfig *const Config; + std::unordered_map Map; +}; + +/// Instantiating NumericalStabilitySanitizer inserts the nsan runtime library +/// API function declarations into the module if they don't exist already. +/// Instantiating ensures the __nsan_init function is in the list of global +/// constructors for the module. +class NumericalStabilitySanitizer { +public: + bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI); + +private: + void initialize(Module &M); + bool instrumentMemIntrinsic(MemIntrinsic *MI); + void maybeAddSuffixForNsanInterface(CallBase *CI); + bool addrPointsToConstantData(Value *Addr); + void maybeCreateShadowValue(Instruction &Root, const TargetLibraryInfo &TLI, + ValueToShadowMap &Map); + Value *createShadowValueWithOperandsAvailable(Instruction &Inst, + const TargetLibraryInfo &TLI, + const ValueToShadowMap &Map); + PHINode *maybeCreateShadowPhi(PHINode &Phi, const TargetLibraryInfo &TLI); + void createShadowArguments(Function &F, const TargetLibraryInfo &TLI, + ValueToShadowMap &Map); + + void populateShadowStack(CallBase &CI, const TargetLibraryInfo &TLI, + const ValueToShadowMap &Map); + + void propagateShadowValues(Instruction &Inst, const TargetLibraryInfo &TLI, + const ValueToShadowMap &Map); + Value *emitCheck(Value *V, Value *ShadowV, IRBuilder<> &Builder, + CheckLoc Loc); + Value *emitCheckInternal(Value *V, Value *ShadowV, IRBuilder<> &Builder, + CheckLoc Loc); + void emitFCmpCheck(FCmpInst &FCmp, const ValueToShadowMap &Map); + Value *getCalleeAddress(CallBase &Call, IRBuilder<> &Builder) const; + + // Value creation handlers. + Value *handleLoad(LoadInst &Load, Type *VT, Type *ExtendedVT); + Value *handleTrunc(FPTruncInst &Trunc, Type *VT, Type *ExtendedVT, + const ValueToShadowMap &Map); + Value *handleExt(FPExtInst &Ext, Type *VT, Type *ExtendedVT, + const ValueToShadowMap &Map); + Value *handleCallBase(CallBase &Call, Type *VT, Type *ExtendedVT, + const TargetLibraryInfo &TLI, + const ValueToShadowMap &Map, IRBuilder<> &Builder); + Value *maybeHandleKnownCallBase(CallBase &Call, Type *VT, Type *ExtendedVT, + const TargetLibraryInfo &TLI, + const ValueToShadowMap &Map, + IRBuilder<> &Builder); + + // Value propagation handlers. + void propagateFTStore(StoreInst &Store, Type *VT, Type *ExtendedVT, + const ValueToShadowMap &Map); + void propagateNonFTStore(StoreInst &Store, Type *VT, + const ValueToShadowMap &Map); + + MappingConfig Config; + LLVMContext *Context = nullptr; + IntegerType *IntptrTy = nullptr; + FunctionCallee NsanGetShadowPtrForStore[FTValueType::kNumValueTypes]; + FunctionCallee NsanGetShadowPtrForLoad[FTValueType::kNumValueTypes]; + FunctionCallee NsanCheckValue[FTValueType::kNumValueTypes]; + FunctionCallee NsanFCmpFail[FTValueType::kNumValueTypes]; + FunctionCallee NsanCopyValues; + FunctionCallee NsanSetValueUnknown; + FunctionCallee NsanGetRawShadowTypePtr; + FunctionCallee NsanGetRawShadowPtr; + GlobalValue *NsanShadowRetTag; + GlobalValue *NsanShadowRetPtr; + GlobalValue *NsanShadowArgsTag; + GlobalValue *NsanShadowArgsPtr; +}; + +struct NumericalStabilitySanitizerLegacyPass : FunctionPass { + NumericalStabilitySanitizerLegacyPass() : FunctionPass(ID) {} + StringRef getPassName() const override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnFunction(Function &F) override; + bool doInitialization(Module &M) override; + static char ID; + +private: + Optional Nsan; +}; + +void insertModuleCtor(Module &M) { + getOrCreateSanitizerCtorAndInitFunctions( + M, kNsanModuleCtorName, kNsanInitName, /*InitArgTypes=*/{}, + /*InitArgs=*/{}, + // This callback is invoked when the functions are created the first + // time. Hook them into the global ctors list in that case: + [&](Function *Ctor, FunctionCallee) { appendToGlobalCtors(M, Ctor, 0); }); +} + +} // end anonymous namespace + +PreservedAnalyses +NumericalStabilitySanitizerPass::run(Function &F, + FunctionAnalysisManager &FAM) { + NumericalStabilitySanitizer Nsan; + if (Nsan.sanitizeFunction(F, FAM.getResult(F))) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); +} + +PreservedAnalyses +NumericalStabilitySanitizerPass::run(Module &M, ModuleAnalysisManager &MAM) { + insertModuleCtor(M); + return PreservedAnalyses::none(); +} + +char NumericalStabilitySanitizerLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(NumericalStabilitySanitizerLegacyPass, "nsan", + "NumericalStabilitySanitizer: detects numerical errors.", false, + false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(NumericalStabilitySanitizerLegacyPass, "nsan", + "NumericalStabilitySanitizer: detects numerical errors.", false, + false) + +StringRef NumericalStabilitySanitizerLegacyPass::getPassName() const { + return "NumericalStabilitySanitizerLegacyPass"; +} + +void NumericalStabilitySanitizerLegacyPass::getAnalysisUsage( + AnalysisUsage &AU) const { + AU.addRequired(); +} + +bool NumericalStabilitySanitizerLegacyPass::doInitialization(Module &M) { + insertModuleCtor(M); + Nsan.emplace(); + return true; +} + +bool NumericalStabilitySanitizerLegacyPass::runOnFunction(Function &F) { + auto &TLI = getAnalysis().getTLI(F); + Nsan->sanitizeFunction(F, TLI); + return true; +} + +FunctionPass *llvm::createNumericalStabilitySanitizerLegacyPassPass() { + return new NumericalStabilitySanitizerLegacyPass(); +} + +static GlobalValue *createThreadLocalGV(const char *Name, Module &M, Type *Ty) { + return dyn_cast(M.getOrInsertGlobal(Name, Ty, [&M, Ty, Name] { + return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, + nullptr, Name, nullptr, + GlobalVariable::InitialExecTLSModel); + })); +} + +void NumericalStabilitySanitizer::initialize(Module &M) { + const DataLayout &DL = M.getDataLayout(); + Context = &M.getContext(); + IntptrTy = DL.getIntPtrType(*Context); + Type *Int8PtrTy = Type::getInt8PtrTy(*Context); + Type *Int32Ty = Type::getInt32Ty(*Context); + Type *Int1Ty = Type::getInt1Ty(*Context); + Type *VoidTy = Type::getVoidTy(*Context); + + AttributeList Attr; + Attr = Attr.addAttribute(*Context, AttributeList::FunctionIndex, + Attribute::NoUnwind); + // Initialize the runtime values (functions and global variables). + for (int I = 0; I < kNumValueTypes; ++I) { + const FTValueType VT = static_cast(I); + const char *const VTName = typeNameFromFTValueType(VT); + Type *const VTTy = typeFromFTValueType(VT, *Context); + + // Load/store. + const std::string GetterPrefix = + std::string("__nsan_get_shadow_ptr_for_") + VTName; + NsanGetShadowPtrForStore[VT] = M.getOrInsertFunction( + GetterPrefix + "_store", Attr, Int8PtrTy, Int8PtrTy, IntptrTy); + NsanGetShadowPtrForLoad[VT] = M.getOrInsertFunction( + GetterPrefix + "_load", Attr, Int8PtrTy, Int8PtrTy, IntptrTy); + + // Check. + const auto &ShadowConfig = Config.byValueType(VT); + Type *ShadowTy = ShadowConfig.getType(*Context); + NsanCheckValue[VT] = + M.getOrInsertFunction(std::string("__nsan_internal_check_") + VTName + + "_" + ShadowConfig.getNsanTypeId(), + Attr, Int32Ty, VTTy, ShadowTy, Int32Ty, IntptrTy); + NsanFCmpFail[VT] = M.getOrInsertFunction( + std::string("__nsan_fcmp_fail_") + VTName + "_" + + ShadowConfig.getNsanTypeId(), + Attr, VoidTy, VTTy, VTTy, ShadowTy, ShadowTy, Int32Ty, Int1Ty, Int1Ty); + } + + NsanCopyValues = M.getOrInsertFunction("__nsan_copy_values", Attr, VoidTy, + Int8PtrTy, Int8PtrTy, IntptrTy); + NsanSetValueUnknown = M.getOrInsertFunction("__nsan_set_value_unknown", Attr, + VoidTy, Int8PtrTy, IntptrTy); + + // FIXME: Add attributes nofree, nosync, readnone, readonly, + NsanGetRawShadowTypePtr = M.getOrInsertFunction( + "__nsan_internal_get_raw_shadow_type_ptr", Attr, Int8PtrTy, Int8PtrTy); + NsanGetRawShadowPtr = M.getOrInsertFunction( + "__nsan_internal_get_raw_shadow_ptr", Attr, Int8PtrTy, Int8PtrTy); + + NsanShadowRetTag = createThreadLocalGV("__nsan_shadow_ret_tag", M, IntptrTy); + NsanShadowRetPtr = createThreadLocalGV( + "__nsan_shadow_ret_ptr", M, + ArrayType::get(Type::getInt8Ty(*Context), + kMaxVectorWidth * kMaxShadowTypeSizeBytes)); + + NsanShadowArgsTag = + createThreadLocalGV("__nsan_shadow_args_tag", M, IntptrTy); + NsanShadowArgsPtr = createThreadLocalGV( + "__nsan_shadow_args_ptr", M, + ArrayType::get(Type::getInt8Ty(*Context), + kMaxVectorWidth * kMaxNumArgs * kMaxShadowTypeSizeBytes)); +} + +// Returns true if the given LLVM Value points to constant data (typically, a +// global variable reference). +bool NumericalStabilitySanitizer::addrPointsToConstantData(Value *Addr) { + // If this is a GEP, just analyze its pointer operand. + if (GetElementPtrInst *GEP = dyn_cast(Addr)) + Addr = GEP->getPointerOperand(); + + if (GlobalVariable *GV = dyn_cast(Addr)) { + return GV->isConstant(); + } + return false; +} + +// This instruments the function entry to create shadow arguments. +// Pseudocode: +// if (this_fn_ptr == __nsan_shadow_args_tag) { +// s(arg0) = LOAD(__nsan_shadow_args); +// s(arg1) = LOAD(__nsan_shadow_args + sizeof(arg0)); +// ... +// __nsan_shadow_args_tag = 0; +// } else { +// s(arg0) = fext(arg0); +// s(arg1) = fext(arg1); +// ... +// } +void NumericalStabilitySanitizer::createShadowArguments( + Function &F, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) { + assert(!F.getIntrinsicID() && "found a definition of an intrinsic"); + + // Do not bother if there are no FP args. + if (all_of(F.args(), [this](const Argument &Arg) { + return Config.getExtendedFPType(Arg.getType()) == nullptr; + })) + return; + + const DataLayout &DL = F.getParent()->getDataLayout(); + IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHI()); + // The function has shadow args if the shadow args tag matches the function + // address. + Value *HasShadowArgs = Builder.CreateICmpEQ( + Builder.CreateLoad(IntptrTy, NsanShadowArgsTag, /*isVolatile=*/false), + Builder.CreatePtrToInt(&F, IntptrTy)); + + unsigned ShadowArgsOffsetBytes = 0; + for (Argument &Arg : F.args()) { + Type *const VT = Arg.getType(); + Type *const ExtendedVT = Config.getExtendedFPType(VT); + if (ExtendedVT == nullptr) + continue; // Not an FT value. + Value *Shadow = Builder.CreateSelect( + HasShadowArgs, + Builder.CreateAlignedLoad( + Builder.CreatePointerCast( + Builder.CreateConstGEP2_64(NsanShadowArgsPtr, 0, + ShadowArgsOffsetBytes), + ExtendedVT->getPointerTo()), + Align(1), /*isVolatile=*/false), + Builder.CreateCast(Instruction::FPExt, &Arg, ExtendedVT)); + Map.setShadow(&Arg, Shadow); + TypeSize SlotSize = DL.getTypeStoreSize(ExtendedVT); + assert(!SlotSize.isScalable() && "unsupported"); + ShadowArgsOffsetBytes += SlotSize.getFixedSize(); + } + Builder.CreateStore(ConstantInt::get(IntptrTy, 0), NsanShadowArgsTag); +} + +// Returns true if the instrumentation should emit code to check arguments +// before a function call. +static bool shouldCheckArgs(CallBase &CI, const TargetLibraryInfo &TLI) { + Function *Fn = CI.getCalledFunction(); + if (Fn == nullptr) + return true; // Always check args of indirect calls. + + // Never check nsan functions, the user called them for a reason. + if (Fn->getName().startswith("__nsan_")) + return false; + + const auto ID = Fn->getIntrinsicID(); + LibFunc LFunc = LibFunc::NumLibFuncs; + // Always check args of unknown functions. + if (ID == Intrinsic::ID() && !TLI.getLibFunc(*Fn, LFunc)) + return true; + + // Do not check args of an `fabs` call that is used for a comparison. + // This is typically used for `fabs(a-b) < tolerance`, where what matters is + // the result of the comparison, which is already caught be the fcmp checks. + if (ID == Intrinsic::fabs || LFunc == LibFunc_fabsf || + LFunc == LibFunc_fabs || LFunc == LibFunc_fabsl) + for (const auto &U : CI.users()) + if (isa(U)) + return false; + + return true; // Default is check. +} + +// Populates the shadow call stack (which contains shadow values for every +// floating-point parameter to the function). +void NumericalStabilitySanitizer::populateShadowStack( + CallBase &CI, const TargetLibraryInfo &TLI, const ValueToShadowMap &Map) { + // Do not create a shadow stack for inline asm. + if (CI.isInlineAsm()) + return; + + // Do not bother if there are no FP args. + if (all_of(CI.operands(), [this](const Value *Arg) { + return Config.getExtendedFPType(Arg->getType()) == nullptr; + })) + return; + + IRBuilder<> Builder(&CI); + SmallVector ArgShadows; + const bool ShouldCheckArgs = shouldCheckArgs(CI, TLI); + int ArgId = -1; + for (Value *Arg : CI.operands()) { + ++ArgId; + if (Config.getExtendedFPType(Arg->getType()) == nullptr) + continue; // Not an FT value. + Value *ArgShadow = Map.getShadow(Arg); + ArgShadows.push_back(ShouldCheckArgs ? emitCheck(Arg, ArgShadow, Builder, + CheckLoc::makeArg(ArgId)) + : ArgShadow); + } + + // Do not create shadow stacks for intrinsics/known lib funcs. + if (Function *Fn = CI.getCalledFunction()) { + LibFunc LFunc; + if (Fn->getIntrinsicID() || TLI.getLibFunc(*Fn, LFunc)) + return; + } + + const DataLayout &DL = + CI.getParent()->getParent()->getParent()->getDataLayout(); + // Set the shadow stack tag. + Builder.CreateStore(getCalleeAddress(CI, Builder), NsanShadowArgsTag); + unsigned ShadowArgsOffsetBytes = 0; + + unsigned ShadowArgId = 0; + for (const Value *Arg : CI.operands()) { + Type *const VT = Arg->getType(); + Type *const ExtendedVT = Config.getExtendedFPType(VT); + if (ExtendedVT == nullptr) + continue; // Not an FT value. + Builder.CreateAlignedStore( + ArgShadows[ShadowArgId++], + Builder.CreatePointerCast( + Builder.CreateConstGEP2_64(NsanShadowArgsPtr, 0, + ShadowArgsOffsetBytes), + ExtendedVT->getPointerTo()), + Align(1), /*isVolatile=*/false); + TypeSize SlotSize = DL.getTypeStoreSize(ExtendedVT); + assert(!SlotSize.isScalable() && "unsupported"); + ShadowArgsOffsetBytes += SlotSize.getFixedSize(); + } +} + +// Internal part of emitCheck(). Returns a value that indicates whether +// computation should continue with the shadow or resume by re-fextending the +// value. +enum ContinuationType { // Keep in sync with runtime. + kContinueWithShadow = 0, + kResumeFromValue = 1, +}; +Value *NumericalStabilitySanitizer::emitCheckInternal(Value *V, Value *ShadowV, + IRBuilder<> &Builder, + CheckLoc Loc) { + // Do not emit checks for constant values, this is redundant. + if (isa(V)) + return ConstantInt::get(Builder.getInt32Ty(), kContinueWithShadow); + + Type *const Ty = V->getType(); + if (const auto VT = ftValueTypeFromType(Ty)) + return Builder.CreateCall( + NsanCheckValue[*VT], + {V, ShadowV, Loc.getType(*Context), Loc.getValue(IntptrTy, Builder)}); + + if (Ty->isVectorTy()) { + auto *VecTy = cast(Ty); + Value *CheckResult = nullptr; + for (int I = 0, E = VecTy->getElementCount().getFixedValue(); I < E; ++I) { + // We resume if any element resumes. Another option would be to create a + // vector shuffle with the array of ContinueWithShadow, but that is too + // complex. + Value *ComponentCheckResult = emitCheckInternal( + Builder.CreateExtractElement(V, I), + Builder.CreateExtractElement(ShadowV, I), Builder, Loc); + CheckResult = CheckResult + ? Builder.CreateOr(CheckResult, ComponentCheckResult) + : ComponentCheckResult; + } + return CheckResult; + } + if (Ty->isArrayTy()) { + Value *CheckResult = nullptr; + for (int I = 0, E = Ty->getArrayNumElements(); I < E; ++I) { + Value *ComponentCheckResult = emitCheckInternal( + Builder.CreateExtractValue(V, I), + Builder.CreateExtractValue(ShadowV, I), Builder, Loc); + CheckResult = CheckResult + ? Builder.CreateOr(CheckResult, ComponentCheckResult) + : ComponentCheckResult; + } + return CheckResult; + } + if (Ty->isStructTy()) { + Value *CheckResult = nullptr; + for (int I = 0, E = Ty->getStructNumElements(); I < E; ++I) { + if (Config.getExtendedFPType(Ty->getStructElementType(I)) == nullptr) + continue; // Only check FT values. + Value *ComponentCheckResult = emitCheckInternal( + Builder.CreateExtractValue(V, I), + Builder.CreateExtractValue(ShadowV, I), Builder, Loc); + CheckResult = CheckResult + ? Builder.CreateOr(CheckResult, ComponentCheckResult) + : ComponentCheckResult; + } + assert(CheckResult && "struct with no FT element"); + return CheckResult; + } + + llvm_unreachable("not implemented"); +} + +// Inserts a runtime check of V against its shadow value ShadowV. +// We check values whenever they escape: on return, call, stores, and +// insertvalue. +// Returns the shadow value that should be used to continue the computations, +// depending on the answer from the runtime. +// FIXME: Should we check on select ? phi ? +Value *NumericalStabilitySanitizer::emitCheck(Value *V, Value *ShadowV, + IRBuilder<> &Builder, + CheckLoc Loc) { + // Do not emit checks for constant values, this is redundant. + if (isa(V)) + return ShadowV; + + Value *CheckResult = emitCheckInternal(V, ShadowV, Builder, Loc); + return Builder.CreateSelect( + Builder.CreateICmpEQ(CheckResult, ConstantInt::get(Builder.getInt32Ty(), + kResumeFromValue)), + Builder.CreateCast(Instruction::FPExt, V, + Config.getExtendedFPType(V->getType())), + ShadowV); +} + +static Instruction *getNextInstructionOrDie(Instruction &Inst) { + assert(Inst.getNextNode() && "instruction is a terminator"); + return Inst.getNextNode(); +} + +// Inserts a check that fcmp on shadow values are consistent with that on base +// values. +void NumericalStabilitySanitizer::emitFCmpCheck(FCmpInst &FCmp, + const ValueToShadowMap &Map) { + if (!ClInstrumentFCmp) + return; + Value *LHS = FCmp.getOperand(0); + if (Config.getExtendedFPType(LHS->getType()) == nullptr) + return; + Value *RHS = FCmp.getOperand(1); + + // Split the basic block. On mismatch, we'll jump to the new basic block with + // a call to the runtime for error reporting. + BasicBlock *FCmpBB = FCmp.getParent(); + BasicBlock *NextBB = FCmpBB->splitBasicBlock(getNextInstructionOrDie(FCmp)); + // Remove the newly created terminator unconditional branch. + FCmpBB->getInstList().erase(FCmpBB->back()); + BasicBlock *FailBB = + BasicBlock::Create(*Context, "", FCmpBB->getParent(), NextBB); + + // Create the shadow fcmp and comparison between the fcmps. + IRBuilder<> FCmpBuilder(FCmpBB); + FCmpBuilder.SetCurrentDebugLocation(FCmp.getDebugLoc()); + Value *ShadowLHS = Map.getShadow(LHS); + Value *ShadowRHS = Map.getShadow(RHS); + // See comment on ClTruncateFCmpEq. + if (FCmp.isEquality() && ClTruncateFCmpEq) { + Type *Ty = ShadowLHS->getType(); + ShadowLHS = FCmpBuilder.CreateCast( + Instruction::FPExt, + FCmpBuilder.CreateCast(Instruction::FPTrunc, ShadowLHS, LHS->getType()), + Ty); + ShadowRHS = FCmpBuilder.CreateCast( + Instruction::FPExt, + FCmpBuilder.CreateCast(Instruction::FPTrunc, ShadowRHS, RHS->getType()), + Ty); + } + Value *ShadowFCmp = + FCmpBuilder.CreateFCmp(FCmp.getPredicate(), ShadowLHS, ShadowRHS); + Value *OriginalAndShadowFcmpMatch = + FCmpBuilder.CreateICmpEQ(&FCmp, ShadowFCmp); + + if (OriginalAndShadowFcmpMatch->getType()->isVectorTy()) { + // If we have a vector type, `OriginalAndShadowFcmpMatch` is a vector of i1, + // where an element is true if the corresponding elements in original and + // shadow are the same. We want all elements to be 1. + OriginalAndShadowFcmpMatch = + FCmpBuilder.CreateAndReduce(OriginalAndShadowFcmpMatch); + } + + FCmpBuilder.CreateCondBr(OriginalAndShadowFcmpMatch, NextBB, FailBB); + + // Fill in FailBB. + IRBuilder<> FailBuilder(FailBB); + FailBuilder.SetCurrentDebugLocation(FCmp.getDebugLoc()); + + const auto EmitFailCall = [this, &FCmp, &FCmpBuilder, + &FailBuilder](Value *L, Value *R, Value *ShadowL, + Value *ShadowR, Value *Result, + Value *ShadowResult) { + Type *FT = L->getType(); + FunctionCallee *Callee = nullptr; + if (FT->isFloatTy()) { + Callee = &(NsanFCmpFail[kFloat]); + } else if (FT->isDoubleTy()) { + Callee = &(NsanFCmpFail[kDouble]); + } else if (FT->isX86_FP80Ty()) { + // FIXME: make NsanFCmpFailLongDouble work. + Callee = &(NsanFCmpFail[kDouble]); + L = FailBuilder.CreateCast(Instruction::FPTrunc, L, + Type::getDoubleTy(*Context)); + R = FailBuilder.CreateCast(Instruction::FPTrunc, L, + Type::getDoubleTy(*Context)); + } else { + llvm_unreachable("not implemented"); + } + FailBuilder.CreateCall(*Callee, {L, R, ShadowL, ShadowR, + ConstantInt::get(FCmpBuilder.getInt32Ty(), + FCmp.getPredicate()), + Result, ShadowResult}); + }; + if (LHS->getType()->isVectorTy()) { + for (int I = 0, E = cast(LHS->getType())->getElementCount().getFixedValue(); + I < E; ++I) { + EmitFailCall(FailBuilder.CreateExtractElement(LHS, I), + FailBuilder.CreateExtractElement(RHS, I), + FailBuilder.CreateExtractElement(ShadowLHS, I), + FailBuilder.CreateExtractElement(ShadowRHS, I), + FailBuilder.CreateExtractElement(&FCmp, I), + FailBuilder.CreateExtractElement(ShadowFCmp, I)); + } + } else { + EmitFailCall(LHS, RHS, ShadowLHS, ShadowRHS, &FCmp, ShadowFCmp); + } + FailBuilder.CreateBr(NextBB); + + ++NumInstrumentedFCmp; +} + +// Creates a shadow phi value for any phi that defines a value of FT type. +PHINode *NumericalStabilitySanitizer::maybeCreateShadowPhi( + PHINode &Phi, const TargetLibraryInfo &TLI) { + Type *const VT = Phi.getType(); + Type *const ExtendedVT = Config.getExtendedFPType(VT); + if (ExtendedVT == nullptr) + return nullptr; // Not an FT value. + // The phi operands are shadow values and are not available when the phi is + // created. They will be populated in a final phase, once all shadow values + // have been created. + PHINode *Shadow = PHINode::Create(ExtendedVT, Phi.getNumIncomingValues()); + Shadow->insertAfter(&Phi); + return Shadow; +} + +Value *NumericalStabilitySanitizer::handleLoad(LoadInst &Load, Type *VT, + Type *ExtendedVT) { + IRBuilder<> Builder(getNextInstructionOrDie(Load)); + Builder.SetCurrentDebugLocation(Load.getDebugLoc()); + if (addrPointsToConstantData(Load.getPointerOperand())) { + // No need to look into the shadow memory, the value is a constant. Just + // convert from FT to 2FT. + return Builder.CreateCast(Instruction::FPExt, &Load, ExtendedVT); + } + + // if (%shadowptr == &) + // %shadow = fpext %v + // else + // %shadow = load (ptrcast %shadow_ptr)) + // Considered options here: + // - Have `NsanGetShadowPtrForLoad` return a fixed address + // &__nsan_unknown_value_shadow_address that is valid to load from, and + // use a select. This has the advantage that the generated IR is simpler. + // - Have `NsanGetShadowPtrForLoad` return nullptr. Because `select` does + // not short-circuit, dereferencing the returned pointer is no longer an + // option, have to split and create a separate basic block. This has the + // advantage of being easier to debug because it crashes if we ever mess + // up. + + const auto Extents = getMemoryExtentsOrDie(VT); + Value *ShadowPtr = + Builder.CreateCall(NsanGetShadowPtrForLoad[Extents.ValueType], + {Builder.CreatePointerCast(Load.getPointerOperand(), + Builder.getInt8PtrTy()), + ConstantInt::get(IntptrTy, Extents.NumElts)}); + ++NumInstrumentedFTLoads; + +#if 0 + // Emit a select. + return Builder.CreateSelect( + Builder.CreateICmpEq(ShadowPtr, NsanUnknownValueShadowAddress), + Builder.CreateCast(Instruction::FPExt, &Load, ExtendedVT), + Builder.CreateAlignedLoad( + Builder.CreatePointerCast(ShadowPtr, ExtendedVT->getPointerTo()), + Align(1), Load.isVolatile())); +#else + // Split the basic block. + BasicBlock *LoadBB = Load.getParent(); + BasicBlock *NextBB = LoadBB->splitBasicBlock(Builder.GetInsertPoint()); + // Create the two options for creating the shadow value. + BasicBlock *ShadowLoadBB = + BasicBlock::Create(*Context, "", LoadBB->getParent(), NextBB); + BasicBlock *FExtBB = + BasicBlock::Create(*Context, "", LoadBB->getParent(), NextBB); + + // Replace the newly created terminator unconditional branch by a conditional + // branch to one of the options. + { + LoadBB->getInstList().erase(LoadBB->back()); + IRBuilder<> LoadBBBuilder(LoadBB); // The old builder has been invalidated. + LoadBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc()); + LoadBBBuilder.CreateCondBr(LoadBBBuilder.CreateIsNull(ShadowPtr), FExtBB, + ShadowLoadBB); + } + + // Fill in ShadowLoadBB. + IRBuilder<> ShadowLoadBBBuilder(ShadowLoadBB); + ShadowLoadBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc()); + Value *ShadowLoad = ShadowLoadBBBuilder.CreateAlignedLoad( + ShadowLoadBBBuilder.CreatePointerCast(ShadowPtr, + ExtendedVT->getPointerTo()), + Align(1), Load.isVolatile()); + if (ClCheckLoads) { + ShadowLoad = emitCheck(&Load, ShadowLoad, ShadowLoadBBBuilder, + CheckLoc::makeLoad(Load.getPointerOperand())); + } + ShadowLoadBBBuilder.CreateBr(NextBB); + + // Fill in FExtBB. + IRBuilder<> FExtBBBuilder(FExtBB); + FExtBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc()); + Value *const FExt = + FExtBBBuilder.CreateCast(Instruction::FPExt, &Load, ExtendedVT); + FExtBBBuilder.CreateBr(NextBB); + + // The shadow value come from any of the options. + IRBuilder<> NextBBBuilder(&*NextBB->begin()); + NextBBBuilder.SetCurrentDebugLocation(Load.getDebugLoc()); + PHINode *ShadowPhi = NextBBBuilder.CreatePHI(ExtendedVT, 2); + ShadowPhi->addIncoming(ShadowLoad, ShadowLoadBB); + ShadowPhi->addIncoming(FExt, FExtBB); + return ShadowPhi; +#endif +} + +Value *NumericalStabilitySanitizer::handleTrunc(FPTruncInst &Trunc, Type *VT, + Type *ExtendedVT, + const ValueToShadowMap &Map) { + Value *const OrigSource = Trunc.getOperand(0); + Type *const OrigSourceTy = OrigSource->getType(); + Type *const ExtendedSourceTy = Config.getExtendedFPType(OrigSourceTy); + + // When truncating: + // - (A) If the source has a shadow, we truncate from the shadow, else we + // truncate from the original source. + // - (B) If the shadow of the source is larger than the shadow of the dest, + // we still need a truncate. Else, the shadow of the source is the same + // type as the shadow of the dest (because mappings are non-decreasing), so + // we don't need to emit a truncate. + // Examples, + // with a mapping of {f32->f64;f64->f80;f80->f128} + // fptrunc double %1 to float -> fptrunc x86_fp80 s(%1) to double + // fptrunc x86_fp80 %1 to float -> fptrunc fp128 s(%1) to double + // fptrunc fp128 %1 to float -> fptrunc fp128 %1 to double + // fptrunc x86_fp80 %1 to double -> x86_fp80 s(%1) + // fptrunc fp128 %1 to double -> fptrunc fp128 %1 to x86_fp80 + // fptrunc fp128 %1 to x86_fp80 -> fp128 %1 + // with a mapping of {f32->f64;f64->f128;f80->f128} + // fptrunc double %1 to float -> fptrunc fp128 s(%1) to double + // fptrunc x86_fp80 %1 to float -> fptrunc fp128 s(%1) to double + // fptrunc fp128 %1 to float -> fptrunc fp128 %1 to double + // fptrunc x86_fp80 %1 to double -> fp128 %1 + // fptrunc fp128 %1 to double -> fp128 %1 + // fptrunc fp128 %1 to x86_fp80 -> fp128 %1 + // with a mapping of {f32->f32;f64->f32;f80->f64} + // fptrunc double %1 to float -> float s(%1) + // fptrunc x86_fp80 %1 to float -> fptrunc double s(%1) to float + // fptrunc fp128 %1 to float -> fptrunc fp128 %1 to float + // fptrunc x86_fp80 %1 to double -> fptrunc double s(%1) to float + // fptrunc fp128 %1 to double -> fptrunc fp128 %1 to float + // fptrunc fp128 %1 to x86_fp80 -> fptrunc fp128 %1 to double + + // See (A) above. + Value *const Source = + ExtendedSourceTy ? Map.getShadow(OrigSource) : OrigSource; + Type *const SourceTy = ExtendedSourceTy ? ExtendedSourceTy : OrigSourceTy; + // See (B) above. + if (SourceTy == ExtendedVT) + return Source; + + Instruction *const Shadow = + CastInst::Create(Instruction::FPTrunc, Source, ExtendedVT); + Shadow->insertAfter(&Trunc); + return Shadow; +} + +Value *NumericalStabilitySanitizer::handleExt(FPExtInst &Ext, Type *VT, + Type *ExtendedVT, + const ValueToShadowMap &Map) { + Value *const OrigSource = Ext.getOperand(0); + Type *const OrigSourceTy = OrigSource->getType(); + Type *const ExtendedSourceTy = Config.getExtendedFPType(OrigSourceTy); + // When extending: + // - (A) If the source has a shadow, we extend from the shadow, else we + // extend from the original source. + // - (B) If the shadow of the dest is larger than the shadow of the source, + // we still need an extend. Else, the shadow of the source is the same + // type as the shadow of the dest (because mappings are non-decreasing), so + // we don't need to emit an extend. + // Examples, + // with a mapping of {f32->f64;f64->f80;f80->f128} + // fpext half %1 to float -> fpext half %1 to double + // fpext half %1 to double -> fpext half %1 to x86_fp80 + // fpext half %1 to x86_fp80 -> fpext half %1 to fp128 + // fpext float %1 to double -> double s(%1) + // fpext float %1 to x86_fp80 -> fpext double s(%1) to fp128 + // fpext double %1 to x86_fp80 -> fpext x86_fp80 s(%1) to fp128 + // with a mapping of {f32->f64;f64->f128;f80->f128} + // fpext half %1 to float -> fpext half %1 to double + // fpext half %1 to double -> fpext half %1 to fp128 + // fpext half %1 to x86_fp80 -> fpext half %1 to fp128 + // fpext float %1 to double -> fpext double s(%1) to fp128 + // fpext float %1 to x86_fp80 -> fpext double s(%1) to fp128 + // fpext double %1 to x86_fp80 -> fp128 s(%1) + // with a mapping of {f32->f32;f64->f32;f80->f64} + // fpext half %1 to float -> fpext half %1 to float + // fpext half %1 to double -> fpext half %1 to float + // fpext half %1 to x86_fp80 -> fpext half %1 to double + // fpext float %1 to double -> s(%1) + // fpext float %1 to x86_fp80 -> fpext float s(%1) to double + // fpext double %1 to x86_fp80 -> fpext float s(%1) to double + + // See (A) above. + Value *const Source = + ExtendedSourceTy ? Map.getShadow(OrigSource) : OrigSource; + Type *const SourceTy = ExtendedSourceTy ? ExtendedSourceTy : OrigSourceTy; + // See (B) above. + if (SourceTy == ExtendedVT) + return Source; + + Instruction *const Shadow = + CastInst::Create(Instruction::FPExt, Source, ExtendedVT); + Shadow->insertAfter(&Ext); + return Shadow; +} + +// Returns a value with the address of the callee. +Value * +NumericalStabilitySanitizer::getCalleeAddress(CallBase &Call, + IRBuilder<> &Builder) const { + if (Function *Fn = Call.getCalledFunction()) { + // We're calling a statically known function. + return Builder.CreatePtrToInt(Fn, IntptrTy); + } else { + // We're calling a function through a function pointer. + return Builder.CreatePtrToInt(Call.getCalledOperand(), IntptrTy); + } +} + +namespace { + +// FIXME: This should be tablegen-ed. + +struct KnownIntrinsic { + struct WidenedIntrinsic { + const char *NarrowName; + Intrinsic::ID ID; // wide id. + using FnTypeFactory = FunctionType *(*)(LLVMContext &); + FnTypeFactory MakeFnTy; + }; + + static const char *get(LibFunc LFunc); + + // Given an intrinsic with an `FT` argument, try to find a wider intrinsic + // that applies the same operation on the shadow argument. + // Options are: + // - pass in the ID and full function type, + // - pass in the name, which includes the function type through mangling. + static const WidenedIntrinsic *widen(StringRef Name); + +private: + struct LFEntry { + LibFunc LFunc; + const char *IntrinsicName; + }; + static const LFEntry kLibfuncIntrinsics[]; + + static const WidenedIntrinsic kWidenedIntrinsics[]; +}; + +FunctionType *Make_Double_Double(LLVMContext &C) { + return FunctionType::get(Type::getDoubleTy(C), {Type::getDoubleTy(C)}, false); +} + +FunctionType *Make_X86FP80_X86FP80(LLVMContext &C) { + return FunctionType::get(Type::getX86_FP80Ty(C), {Type::getX86_FP80Ty(C)}, + false); +} + +FunctionType *Make_Double_DoubleI32(LLVMContext &C) { + return FunctionType::get(Type::getDoubleTy(C), + {Type::getDoubleTy(C), Type::getInt32Ty(C)}, false); +} + +FunctionType *Make_X86FP80_X86FP80I32(LLVMContext &C) { + return FunctionType::get(Type::getX86_FP80Ty(C), + {Type::getX86_FP80Ty(C), Type::getInt32Ty(C)}, + false); +} + +FunctionType *Make_Double_DoubleDouble(LLVMContext &C) { + return FunctionType::get(Type::getDoubleTy(C), + {Type::getDoubleTy(C), Type::getDoubleTy(C)}, false); +} + +FunctionType *Make_X86FP80_X86FP80X86FP80(LLVMContext &C) { + return FunctionType::get(Type::getX86_FP80Ty(C), + {Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C)}, + false); +} + +FunctionType *Make_Double_DoubleDoubleDouble(LLVMContext &C) { + return FunctionType::get( + Type::getDoubleTy(C), + {Type::getDoubleTy(C), Type::getDoubleTy(C), Type::getDoubleTy(C)}, + false); +} + +FunctionType *Make_X86FP80_X86FP80X86FP80X86FP80(LLVMContext &C) { + return FunctionType::get( + Type::getX86_FP80Ty(C), + {Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C), Type::getX86_FP80Ty(C)}, + false); +} + +const KnownIntrinsic::WidenedIntrinsic KnownIntrinsic::kWidenedIntrinsics[] = { + // FIXME: Right now we ignore vector intrinsics. + // This is hard because we have to model the semantics of the intrinsics, + // e.g. llvm.x86.sse2.min.sd means extract first element, min, insert back. + // Intrinsics that take any non-vector FT types: + // NOTE: Right now because of https://bugs.llvm.org/show_bug.cgi?id=45399 + // for f128 we need to use Make_X86FP80_X86FP80 (go to a lower precision and + // come back). + {"llvm.sqrt.f32", Intrinsic::sqrt, Make_Double_Double}, + {"llvm.sqrt.f64", Intrinsic::sqrt, Make_X86FP80_X86FP80}, + {"llvm.sqrt.f80", Intrinsic::sqrt, Make_X86FP80_X86FP80}, + {"llvm.powi.f32", Intrinsic::powi, Make_Double_DoubleI32}, + {"llvm.powi.f64", Intrinsic::powi, Make_X86FP80_X86FP80I32}, + {"llvm.powi.f80", Intrinsic::powi, Make_X86FP80_X86FP80I32}, + {"llvm.sin.f32", Intrinsic::sin, Make_Double_Double}, + {"llvm.sin.f64", Intrinsic::sin, Make_X86FP80_X86FP80}, + {"llvm.sin.f80", Intrinsic::sin, Make_X86FP80_X86FP80}, + {"llvm.cos.f32", Intrinsic::cos, Make_Double_Double}, + {"llvm.cos.f64", Intrinsic::cos, Make_X86FP80_X86FP80}, + {"llvm.cos.f80", Intrinsic::cos, Make_X86FP80_X86FP80}, + {"llvm.pow.f32", Intrinsic::pow, Make_Double_DoubleDouble}, + {"llvm.pow.f64", Intrinsic::pow, Make_X86FP80_X86FP80X86FP80}, + {"llvm.pow.f80", Intrinsic::pow, Make_X86FP80_X86FP80X86FP80}, + {"llvm.exp.f32", Intrinsic::exp, Make_Double_Double}, + {"llvm.exp.f64", Intrinsic::exp, Make_X86FP80_X86FP80}, + {"llvm.exp.f80", Intrinsic::exp, Make_X86FP80_X86FP80}, + {"llvm.exp2.f32", Intrinsic::exp2, Make_Double_Double}, + {"llvm.exp2.f64", Intrinsic::exp2, Make_X86FP80_X86FP80}, + {"llvm.exp2.f80", Intrinsic::exp2, Make_X86FP80_X86FP80}, + {"llvm.log.f32", Intrinsic::log, Make_Double_Double}, + {"llvm.log.f64", Intrinsic::log, Make_X86FP80_X86FP80}, + {"llvm.log.f80", Intrinsic::log, Make_X86FP80_X86FP80}, + {"llvm.log10.f32", Intrinsic::log10, Make_Double_Double}, + {"llvm.log10.f64", Intrinsic::log10, Make_X86FP80_X86FP80}, + {"llvm.log10.f80", Intrinsic::log10, Make_X86FP80_X86FP80}, + {"llvm.log2.f32", Intrinsic::log2, Make_Double_Double}, + {"llvm.log2.f64", Intrinsic::log2, Make_X86FP80_X86FP80}, + {"llvm.log2.f80", Intrinsic::log2, Make_X86FP80_X86FP80}, + {"llvm.fma.f32", Intrinsic::fma, Make_Double_DoubleDoubleDouble}, + {"llvm.fma.f64", Intrinsic::fma, Make_X86FP80_X86FP80X86FP80X86FP80}, + {"llvm.fma.f80", Intrinsic::fma, Make_X86FP80_X86FP80X86FP80X86FP80}, + {"llvm.fabs.f32", Intrinsic::fabs, Make_Double_Double}, + {"llvm.fabs.f64", Intrinsic::fabs, Make_X86FP80_X86FP80}, + {"llvm.fabs.f80", Intrinsic::fabs, Make_X86FP80_X86FP80}, + {"llvm.minnum.f32", Intrinsic::minnum, Make_Double_DoubleDouble}, + {"llvm.minnum.f64", Intrinsic::minnum, Make_X86FP80_X86FP80X86FP80}, + {"llvm.minnum.f80", Intrinsic::minnum, Make_X86FP80_X86FP80X86FP80}, + {"llvm.maxnum.f32", Intrinsic::maxnum, Make_Double_DoubleDouble}, + {"llvm.maxnum.f64", Intrinsic::maxnum, Make_X86FP80_X86FP80X86FP80}, + {"llvm.maxnum.f80", Intrinsic::maxnum, Make_X86FP80_X86FP80X86FP80}, + {"llvm.minimum.f32", Intrinsic::minimum, Make_Double_DoubleDouble}, + {"llvm.minimum.f64", Intrinsic::minimum, Make_X86FP80_X86FP80X86FP80}, + {"llvm.minimum.f80", Intrinsic::minimum, Make_X86FP80_X86FP80X86FP80}, + {"llvm.maximum.f32", Intrinsic::maximum, Make_Double_DoubleDouble}, + {"llvm.maximum.f64", Intrinsic::maximum, Make_X86FP80_X86FP80X86FP80}, + {"llvm.maximum.f80", Intrinsic::maximum, Make_X86FP80_X86FP80X86FP80}, + {"llvm.copysign.f32", Intrinsic::copysign, Make_Double_DoubleDouble}, + {"llvm.copysign.f64", Intrinsic::copysign, Make_X86FP80_X86FP80X86FP80}, + {"llvm.copysign.f80", Intrinsic::copysign, Make_X86FP80_X86FP80X86FP80}, + {"llvm.floor.f32", Intrinsic::floor, Make_Double_Double}, + {"llvm.floor.f64", Intrinsic::floor, Make_X86FP80_X86FP80}, + {"llvm.floor.f80", Intrinsic::floor, Make_X86FP80_X86FP80}, + {"llvm.ceil.f32", Intrinsic::ceil, Make_Double_Double}, + {"llvm.ceil.f64", Intrinsic::ceil, Make_X86FP80_X86FP80}, + {"llvm.ceil.f80", Intrinsic::ceil, Make_X86FP80_X86FP80}, + {"llvm.trunc.f32", Intrinsic::trunc, Make_Double_Double}, + {"llvm.trunc.f64", Intrinsic::trunc, Make_X86FP80_X86FP80}, + {"llvm.trunc.f80", Intrinsic::trunc, Make_X86FP80_X86FP80}, + {"llvm.rint.f32", Intrinsic::rint, Make_Double_Double}, + {"llvm.rint.f64", Intrinsic::rint, Make_X86FP80_X86FP80}, + {"llvm.rint.f80", Intrinsic::rint, Make_X86FP80_X86FP80}, + {"llvm.nearbyint.f32", Intrinsic::nearbyint, Make_Double_Double}, + {"llvm.nearbyint.f64", Intrinsic::nearbyint, Make_X86FP80_X86FP80}, + {"llvm.nearbyin80f64", Intrinsic::nearbyint, Make_X86FP80_X86FP80}, + {"llvm.round.f32", Intrinsic::round, Make_Double_Double}, + {"llvm.round.f64", Intrinsic::round, Make_X86FP80_X86FP80}, + {"llvm.round.f80", Intrinsic::round, Make_X86FP80_X86FP80}, + {"llvm.lround.f32", Intrinsic::lround, Make_Double_Double}, + {"llvm.lround.f64", Intrinsic::lround, Make_X86FP80_X86FP80}, + {"llvm.lround.f80", Intrinsic::lround, Make_X86FP80_X86FP80}, + {"llvm.llround.f32", Intrinsic::llround, Make_Double_Double}, + {"llvm.llround.f64", Intrinsic::llround, Make_X86FP80_X86FP80}, + {"llvm.llround.f80", Intrinsic::llround, Make_X86FP80_X86FP80}, + {"llvm.lrint.f32", Intrinsic::lrint, Make_Double_Double}, + {"llvm.lrint.f64", Intrinsic::lrint, Make_X86FP80_X86FP80}, + {"llvm.lrint.f80", Intrinsic::lrint, Make_X86FP80_X86FP80}, + {"llvm.llrint.f32", Intrinsic::llrint, Make_Double_Double}, + {"llvm.llrint.f64", Intrinsic::llrint, Make_X86FP80_X86FP80}, + {"llvm.llrint.f80", Intrinsic::llrint, Make_X86FP80_X86FP80}, +}; + +const KnownIntrinsic::LFEntry KnownIntrinsic::kLibfuncIntrinsics[] = { + {LibFunc_sqrtf, "llvm.sqrt.f32"}, // + {LibFunc_sqrt, "llvm.sqrt.f64"}, // + {LibFunc_sqrtl, "llvm.sqrt.f80"}, // + {LibFunc_sinf, "llvm.sin.f32"}, // + {LibFunc_sin, "llvm.sin.f64"}, // + {LibFunc_sinl, "llvm.sin.f80"}, // + {LibFunc_cosf, "llvm.cos.f32"}, // + {LibFunc_cos, "llvm.cos.f64"}, // + {LibFunc_cosl, "llvm.cos.f80"}, // + {LibFunc_powf, "llvm.pow.f32"}, // + {LibFunc_pow, "llvm.pow.f64"}, // + {LibFunc_powl, "llvm.pow.f80"}, // + {LibFunc_expf, "llvm.exp.f32"}, // + {LibFunc_exp, "llvm.exp.f64"}, // + {LibFunc_expl, "llvm.exp.f80"}, // + {LibFunc_exp2f, "llvm.exp2.f32"}, // + {LibFunc_exp2, "llvm.exp2.f64"}, // + {LibFunc_exp2l, "llvm.exp2.f80"}, // + {LibFunc_logf, "llvm.log.f32"}, // + {LibFunc_log, "llvm.log.f64"}, // + {LibFunc_logl, "llvm.log.f80"}, // + {LibFunc_log10f, "llvm.log10.f32"}, // + {LibFunc_log10, "llvm.log10.f64"}, // + {LibFunc_log10l, "llvm.log10.f80"}, // + {LibFunc_log2f, "llvm.log2.f32"}, // + {LibFunc_log2, "llvm.log2.f64"}, // + {LibFunc_log2l, "llvm.log2.f80"}, // + {LibFunc_fabsf, "llvm.fabs.f32"}, // + {LibFunc_fabs, "llvm.fabs.f64"}, // + {LibFunc_fabsl, "llvm.fabs.f80"}, // + {LibFunc_copysignf, "llvm.copysign.f32"}, // + {LibFunc_copysign, "llvm.copysign.f64"}, // + {LibFunc_copysignl, "llvm.copysign.f80"}, // + {LibFunc_floorf, "llvm.floor.f32"}, // + {LibFunc_floor, "llvm.floor.f64"}, // + {LibFunc_floorl, "llvm.floor.f80"}, // + {LibFunc_fmaxf, "llvm.maxnum.f32"}, // + {LibFunc_fmax, "llvm.maxnum.f64"}, // + {LibFunc_fmaxl, "llvm.maxnum.f80"}, // + {LibFunc_fminf, "llvm.minnum.f32"}, // + {LibFunc_fmin, "llvm.minnum.f64"}, // + {LibFunc_fminl, "llvm.minnum.f80"}, // + {LibFunc_ceilf, "llvm.ceil.f32"}, // + {LibFunc_ceil, "llvm.ceil.f64"}, // + {LibFunc_ceill, "llvm.ceil.f80"}, // + {LibFunc_truncf, "llvm.trunc.f32"}, // + {LibFunc_trunc, "llvm.trunc.f64"}, // + {LibFunc_truncl, "llvm.trunc.f80"}, // + {LibFunc_rintf, "llvm.rint.f32"}, // + {LibFunc_rint, "llvm.rint.f64"}, // + {LibFunc_rintl, "llvm.rint.f80"}, // + {LibFunc_nearbyintf, "llvm.nearbyint.f32"}, // + {LibFunc_nearbyint, "llvm.nearbyint.f64"}, // + {LibFunc_nearbyintl, "llvm.nearbyint.f80"}, // + {LibFunc_roundf, "llvm.round.f32"}, // + {LibFunc_round, "llvm.round.f64"}, // + {LibFunc_roundl, "llvm.round.f80"}, // +}; + +const char *KnownIntrinsic::get(LibFunc LFunc) { + for (const auto &E : kLibfuncIntrinsics) { + if (E.LFunc == LFunc) + return E.IntrinsicName; + } + return nullptr; +} + +const KnownIntrinsic::WidenedIntrinsic *KnownIntrinsic::widen(StringRef Name) { + for (const auto &E : kWidenedIntrinsics) { + if (E.NarrowName == Name) + return &E; + } + return nullptr; +} + +} // namespace + +// Returns the name of the LLVM intrinsic corresponding to the given function. +static const char *getIntrinsicFromLibfunc(Function &Fn, Type *VT, + const TargetLibraryInfo &TLI) { + LibFunc LFunc; + if (!TLI.getLibFunc(Fn, LFunc)) + return nullptr; + + if (const char *Name = KnownIntrinsic::get(LFunc)) + return Name; + + errs() << "FIXME: LibFunc: " << TLI.getName(LFunc) << "\n"; + return nullptr; +} + +// Try to handle a known function call. +Value *NumericalStabilitySanitizer::maybeHandleKnownCallBase( + CallBase &Call, Type *VT, Type *ExtendedVT, const TargetLibraryInfo &TLI, + const ValueToShadowMap &Map, IRBuilder<> &Builder) { + Function *const Fn = Call.getCalledFunction(); + if (Fn == nullptr) + return nullptr; + + Intrinsic::ID WidenedId = Intrinsic::ID(); + FunctionType *WidenedFnTy = nullptr; + if (const auto ID = Fn->getIntrinsicID()) { + const auto *const Widened = KnownIntrinsic::widen(Fn->getName()); + if (Widened) { + WidenedId = Widened->ID; + WidenedFnTy = Widened->MakeFnTy(*Context); + } else { + // If we don't know how to widen the intrinsic, we have no choice but to + // call the non-wide version on a truncated shadow and extend again + // afterwards. + WidenedId = ID; + WidenedFnTy = Fn->getFunctionType(); + } + } else if (const char *Name = getIntrinsicFromLibfunc(*Fn, VT, TLI)) { + // We might have a call to a library function that we can replace with a + // wider Intrinsic. + const auto *Widened = KnownIntrinsic::widen(Name); + assert(Widened && "make sure KnownIntrinsic entries are consistent"); + WidenedId = Widened->ID; + WidenedFnTy = Widened->MakeFnTy(*Context); + } else { + // This is not a known library function or intrinsic. + return nullptr; + } + + // Check that the widened intrinsic is valid. + SmallVector Table; + getIntrinsicInfoTableEntries(WidenedId, Table); + SmallVector ArgTys; + ArrayRef TableRef = Table; + const Intrinsic::MatchIntrinsicTypesResult Res = + Intrinsic::matchIntrinsicSignature(WidenedFnTy, TableRef, ArgTys); + assert(Res == Intrinsic::MatchIntrinsicTypes_Match && + "invalid widened intrinsic"); + (void)Res; + + // For known intrinsic functions, we create a second call to the same + // intrinsic with a different type. + SmallVector Args; + // The last operand is the intrinsic itself, skip it. + for (unsigned I = 0, E = Call.getNumOperands() - 1; I < E; ++I) { + Value *Arg = Call.getOperand(I); + Type *const OrigArgTy = Arg->getType(); + Type *const IntrinsicArgTy = WidenedFnTy->getParamType(I); + if (OrigArgTy == IntrinsicArgTy) { + Args.push_back(Arg); // The arg is passed as is. + continue; + } + Type *const ShadowArgTy = Config.getExtendedFPType(Arg->getType()); + assert(ShadowArgTy && + "don't know how to get the shadow value for a non-FT"); + Value *Shadow = Map.getShadow(Arg); + if (ShadowArgTy == IntrinsicArgTy) { + // The shadow is the right type for the intrinsic. + assert(Shadow->getType() == ShadowArgTy); + Args.push_back(Shadow); + continue; + } + // There is no intrinsic with his level of precision, truncate the shadow. + Args.push_back( + Builder.CreateCast(Instruction::FPTrunc, Shadow, IntrinsicArgTy)); + } + Value *IntrinsicCall = Builder.CreateIntrinsic(WidenedId, ArgTys, Args); + return WidenedFnTy->getReturnType() == ExtendedVT + ? IntrinsicCall + : Builder.CreateCast(Instruction::FPExt, IntrinsicCall, + ExtendedVT); +} + +// Handle a CallBase, i.e. a function call, an inline asm sequence, or an +// invoke. +Value *NumericalStabilitySanitizer::handleCallBase(CallBase &Call, Type *VT, + Type *ExtendedVT, + const TargetLibraryInfo &TLI, + const ValueToShadowMap &Map, + IRBuilder<> &Builder) { + // We cannot look inside inline asm, just expand the result again. + if (Call.isInlineAsm()) { + return Builder.CreateCast(Instruction::FPExt, &Call, ExtendedVT); + } + + // Intrinsics and library functions (e.g. sin, exp) are handled + // specifically, because we know their semantics and can do better than + // blindly calling them (e.g. compute the sinus in the actual shadow domain). + if (Value *V = + maybeHandleKnownCallBase(Call, VT, ExtendedVT, TLI, Map, Builder)) + return V; + + // If the return tag matches that of the called function, read the extended + // return value from the shadow ret ptr. Else, just extend the return value. + Value *HasShadowRet = Builder.CreateICmpEQ( + Builder.CreateLoad(IntptrTy, NsanShadowRetTag, /*isVolatile=*/false), + getCalleeAddress(Call, Builder)); + Value *ShadowRetVal = + Builder.CreateLoad(ExtendedVT, + Builder.CreatePointerCast( + Builder.CreateConstGEP2_64(NsanShadowRetPtr, 0, 0), + ExtendedVT->getPointerTo()), + /*isVolatile=*/false); + Value *Shadow = Builder.CreateSelect( + HasShadowRet, ShadowRetVal, + Builder.CreateCast(Instruction::FPExt, &Call, ExtendedVT)); + ++NumInstrumentedFTCalls; + return Shadow; + // Note that we do not need to set NsanShadowRetTag to zero as we know that + // either the function is not instrumented and it will never set + // NsanShadowRetTag; or it is and it will always do so. +} + +// Creates a shadow value for the given FT value. At that point all operands are +// guaranteed to be available. +Value *NumericalStabilitySanitizer::createShadowValueWithOperandsAvailable( + Instruction &Inst, const TargetLibraryInfo &TLI, + const ValueToShadowMap &Map) { + Type *const VT = Inst.getType(); + Type *const ExtendedVT = Config.getExtendedFPType(VT); + assert(ExtendedVT != nullptr && "trying to create a shadow for a non-FT"); + + if (LoadInst *Load = dyn_cast(&Inst)) { + return handleLoad(*Load, VT, ExtendedVT); + } + if (CallInst *Call = dyn_cast(&Inst)) { + // Insert after the call. + BasicBlock::iterator It(Inst); + IRBuilder<> Builder(Call->getParent(), ++It); + Builder.SetCurrentDebugLocation(Call->getDebugLoc()); + return handleCallBase(*Call, VT, ExtendedVT, TLI, Map, Builder); + } + if (InvokeInst *Invoke = dyn_cast(&Inst)) { + // The Invoke terminates the basic block, create a new basic block in + // between the successful invoke and the next block. + BasicBlock *InvokeBB = Invoke->getParent(); + BasicBlock *NextBB = Invoke->getNormalDest(); + BasicBlock *NewBB = + BasicBlock::Create(*Context, "", NextBB->getParent(), NextBB); + Inst.replaceSuccessorWith(NextBB, NewBB); + + IRBuilder<> Builder(NewBB); + Builder.SetCurrentDebugLocation(Invoke->getDebugLoc()); + Value *Shadow = handleCallBase(*Invoke, VT, ExtendedVT, TLI, Map, Builder); + Builder.CreateBr(NextBB); + NewBB->replaceSuccessorsPhiUsesWith(InvokeBB, NewBB); + return Shadow; + } + if (BinaryOperator *BinOp = dyn_cast(&Inst)) { + IRBuilder<> Builder(getNextInstructionOrDie(*BinOp)); + Builder.SetCurrentDebugLocation(BinOp->getDebugLoc()); + return Builder.CreateBinOp(BinOp->getOpcode(), + Map.getShadow(BinOp->getOperand(0)), + Map.getShadow(BinOp->getOperand(1))); + } + if (UnaryOperator *UnaryOp = dyn_cast(&Inst)) { + IRBuilder<> Builder(getNextInstructionOrDie(*UnaryOp)); + Builder.SetCurrentDebugLocation(UnaryOp->getDebugLoc()); + return Builder.CreateUnOp(UnaryOp->getOpcode(), + Map.getShadow(UnaryOp->getOperand(0))); + } + if (FPTruncInst *Trunc = dyn_cast(&Inst)) { + return handleTrunc(*Trunc, VT, ExtendedVT, Map); + } + if (FPExtInst *Ext = dyn_cast(&Inst)) { + return handleExt(*Ext, VT, ExtendedVT, Map); + } + if (isa(&Inst) || isa(&Inst)) { + CastInst *Cast = dyn_cast(&Inst); + IRBuilder<> Builder(getNextInstructionOrDie(*Cast)); + Builder.SetCurrentDebugLocation(Cast->getDebugLoc()); + return Builder.CreateCast(Cast->getOpcode(), Cast->getOperand(0), + ExtendedVT); + } + + if (SelectInst *S = dyn_cast(&Inst)) { + IRBuilder<> Builder(getNextInstructionOrDie(*S)); + Builder.SetCurrentDebugLocation(S->getDebugLoc()); + return Builder.CreateSelect(S->getCondition(), + Map.getShadow(S->getTrueValue()), + Map.getShadow(S->getFalseValue())); + } + + if (ExtractElementInst *Extract = dyn_cast(&Inst)) { + IRBuilder<> Builder(getNextInstructionOrDie(*Extract)); + Builder.SetCurrentDebugLocation(Extract->getDebugLoc()); + return Builder.CreateExtractElement( + Map.getShadow(Extract->getVectorOperand()), Extract->getIndexOperand()); + } + + if (InsertElementInst *Insert = dyn_cast(&Inst)) { + IRBuilder<> Builder(getNextInstructionOrDie(*Insert)); + Builder.SetCurrentDebugLocation(Insert->getDebugLoc()); + return Builder.CreateInsertElement(Map.getShadow(Insert->getOperand(0)), + Map.getShadow(Insert->getOperand(1)), + Insert->getOperand(2)); + } + + if (ShuffleVectorInst *Shuffle = dyn_cast(&Inst)) { + IRBuilder<> Builder(getNextInstructionOrDie(*Shuffle)); + Builder.SetCurrentDebugLocation(Shuffle->getDebugLoc()); + return Builder.CreateShuffleVector(Map.getShadow(Shuffle->getOperand(0)), + Map.getShadow(Shuffle->getOperand(1)), + Shuffle->getShuffleMask()); + } + + if (ExtractValueInst *Extract = dyn_cast(&Inst)) { + IRBuilder<> Builder(getNextInstructionOrDie(*Extract)); + Builder.SetCurrentDebugLocation(Extract->getDebugLoc()); + // FIXME: We could make aggregate object first class citizens. For now we + // just extend the extracted value. + return Builder.CreateCast(Instruction::FPExt, Extract, ExtendedVT); + } + + if (BitCastInst *BC = dyn_cast(&Inst)) { + IRBuilder<> Builder(getNextInstructionOrDie(*BC)); + Builder.SetCurrentDebugLocation(BC->getDebugLoc()); + return Builder.CreateCast(Instruction::FPExt, BC, ExtendedVT); + } + + errs() << "FIXME: implement " << Inst.getOpcodeName() << "\n"; + llvm_unreachable("not implemented"); +} + +// Creates a shadow value for an instruction that defines a value of FT type. +// FT operands that do not already have shadow values are created recursively. +// The DFS is guaranteed to not loop as phis and arguments already have +// shadows. +void NumericalStabilitySanitizer::maybeCreateShadowValue( + Instruction &Root, const TargetLibraryInfo &TLI, ValueToShadowMap &Map) { + Type *const VT = Root.getType(); + Type *const ExtendedVT = Config.getExtendedFPType(VT); + if (ExtendedVT == nullptr) + return; // Not an FT value. + + if (Map.hasShadow(&Root)) + return; // Shadow already exists. + + assert(!isa(Root) && "phi nodes should already have shadows"); + + std::vector DfsStack(1, &Root); + while (!DfsStack.empty()) { + // Ensure that all operands to the instruction have shadows before + // proceeding. + Instruction *I = DfsStack.back(); + // The shadow for the instruction might have been created deeper in the DFS, + // see `forward_use_with_two_uses` test. + if (Map.hasShadow(I)) { + DfsStack.pop_back(); + continue; + } + + bool MissingShadow = false; + for (Value *Op : I->operands()) { + Type *const VT = Op->getType(); + if (!Config.getExtendedFPType(VT)) + continue; // Not an FT value. + if (Map.hasShadow(Op)) + continue; // Shadow is already available. + assert(isa(Op) && + "non-instructions should already have shadows"); + assert(!isa(Op) && "phi nodes should aready have shadows"); + MissingShadow = true; + DfsStack.push_back(dyn_cast(Op)); + } + if (MissingShadow) + continue; // Process operands and come back to this instruction later. + + // All operands have shadows. Create a shadow for the current value. + Value *Shadow = createShadowValueWithOperandsAvailable(*I, TLI, Map); + Map.setShadow(I, Shadow); + DfsStack.pop_back(); + } +} + +// A floating-point store needs its value and type written to shadow memory. +void NumericalStabilitySanitizer::propagateFTStore( + StoreInst &Store, Type *const VT, Type *const ExtendedVT, + const ValueToShadowMap &Map) { + Value *StoredValue = Store.getValueOperand(); + IRBuilder<> Builder(&Store); + Builder.SetCurrentDebugLocation(Store.getDebugLoc()); + const auto Extents = getMemoryExtentsOrDie(VT); + Value *ShadowPtr = + Builder.CreateCall(NsanGetShadowPtrForStore[Extents.ValueType], + {Builder.CreatePointerCast(Store.getPointerOperand(), + Builder.getInt8PtrTy()), + ConstantInt::get(IntptrTy, Extents.NumElts)}); + + Value *StoredShadow = Map.getShadow(StoredValue); + if (!Store.getParent()->getParent()->hasOptNone()) { + // Only check stores when optimizing, because non-optimized code generates + // too many stores to the stack, creating false positives. + StoredShadow = emitCheck(StoredValue, StoredShadow, Builder, + CheckLoc::makeStore(Store.getPointerOperand())); + ++NumInstrumentedFTStores; + } + + Builder.CreateAlignedStore( + StoredShadow, + Builder.CreatePointerCast(ShadowPtr, ExtendedVT->getPointerTo()), + Align(1), Store.isVolatile()); +} + +// A non-ft store needs to invalidate shadow memory. Exceptions are: +// - memory transfers of floating-point data through other pointer types (llvm +// optimization passes transform `*(float*)a = *(float*)b` into +// `*(i32*)a = *(i32*)b` ). These have the same semantics as memcpy. +// - Writes of FT-sized constants. LLVM likes to do float stores as bitcasted +// ints. Note that this is not really necessary because if the value is +// unknown the framework will re-extend it on load anyway. It just felt +// easier to debug tests with vectors of FTs. +void NumericalStabilitySanitizer::propagateNonFTStore( + StoreInst &Store, Type *const VT, const ValueToShadowMap &Map) { + Value *PtrOp = Store.getPointerOperand(); + IRBuilder<> Builder(getNextInstructionOrDie(Store)); + Builder.SetCurrentDebugLocation(Store.getDebugLoc()); + Value *Dst = Builder.CreatePointerCast(PtrOp, Builder.getInt8PtrTy()); + const DataLayout &DL = + Store.getParent()->getParent()->getParent()->getDataLayout(); + TypeSize SlotSize = DL.getTypeStoreSize(VT); + assert(!SlotSize.isScalable() && "unsupported"); + const auto LoadSizeBytes = SlotSize.getFixedSize(); + Value *ValueSize = Builder.Insert(Constant::getIntegerValue( + IntptrTy, APInt(IntptrTy->getPrimitiveSizeInBits(), LoadSizeBytes))); + + ++NumInstrumentedNonFTStores; + Value *StoredValue = Store.getValueOperand(); + if (LoadInst *Load = dyn_cast(StoredValue)) { + // FIXME: Handle the case when the value is from a phi. + // This is a memory transfer with memcpy semantics. Copy the type and + // value from the source. Note that we cannot use __nsan_copy_values() + // here, because that will not work when there is a write to memory in + // between the load and the store, e.g. in the case of a swap. + Type *ShadowTypeIntTy = Type::getIntNTy(*Context, 8 * LoadSizeBytes); + Type *ShadowValueIntTy = + Type::getIntNTy(*Context, 8 * kShadowScale * LoadSizeBytes); + IRBuilder<> LoadBuilder(getNextInstructionOrDie(*Load)); + Builder.SetCurrentDebugLocation(Store.getDebugLoc()); + Value *LoadSrc = LoadBuilder.CreatePointerCast(Load->getPointerOperand(), + Builder.getInt8PtrTy()); + // Read the shadow type and value at load time. The type has the same size + // as the FT value, the value has twice its size. + // FIXME: cache them to avoid re-creating them when a load is used by + // several stores. Maybe create them like the FT shadows when a load is + // encountered. + Value *RawShadowType = LoadBuilder.CreateAlignedLoad( + ShadowTypeIntTy, + LoadBuilder.CreatePointerCast( + LoadBuilder.CreateCall(NsanGetRawShadowTypePtr, {LoadSrc}), + ShadowTypeIntTy->getPointerTo()), + Align(1), + /*isVolatile=*/false); + Value *RawShadowValue = LoadBuilder.CreateAlignedLoad( + ShadowValueIntTy, + LoadBuilder.CreatePointerCast( + LoadBuilder.CreateCall(NsanGetRawShadowPtr, {LoadSrc}), + ShadowValueIntTy->getPointerTo()), + Align(1), + /*isVolatile=*/false); + + // Write back the shadow type and value at store time. + Builder.CreateAlignedStore( + RawShadowType, + Builder.CreatePointerCast( + Builder.CreateCall(NsanGetRawShadowTypePtr, {Dst}), + ShadowTypeIntTy->getPointerTo()), + Align(1), + /*isVolatile=*/false); + Builder.CreateAlignedStore( + RawShadowValue, + Builder.CreatePointerCast( + Builder.CreateCall(NsanGetRawShadowPtr, {Dst}), + ShadowValueIntTy->getPointerTo()), + Align(1), + /*isVolatile=*/false); + + ++NumInstrumentedNonFTMemcpyStores; + return; + } + if (Constant *C = dyn_cast(StoredValue)) { + // This might be a fp constant stored as an int. Bitcast and store if it has + // appropriate size. + Type *BitcastTy = nullptr; // The FT type to bitcast to. + if (ConstantInt *CInt = dyn_cast(C)) { + switch (CInt->getType()->getScalarSizeInBits()) { + case 32: + BitcastTy = Type::getFloatTy(*Context); + break; + case 64: + BitcastTy = Type::getDoubleTy(*Context); + break; + case 80: + BitcastTy = Type::getX86_FP80Ty(*Context); + break; + default: + break; + } + } else if (ConstantDataVector *CDV = dyn_cast(C)) { + const int NumElements = + cast(CDV->getType())->getElementCount().getFixedValue(); + switch (CDV->getType()->getScalarSizeInBits()) { + case 32: + BitcastTy = + VectorType::get(Type::getFloatTy(*Context), NumElements, false); + break; + case 64: + BitcastTy = + VectorType::get(Type::getDoubleTy(*Context), NumElements, false); + break; + case 80: + BitcastTy = + VectorType::get(Type::getX86_FP80Ty(*Context), NumElements, false); + break; + default: + break; + } + } + if (BitcastTy) { + const MemoryExtents Extents = getMemoryExtentsOrDie(BitcastTy); + Value *ShadowPtr = Builder.CreateCall( + NsanGetShadowPtrForStore[Extents.ValueType], + {Builder.CreatePointerCast(PtrOp, Builder.getInt8PtrTy()), + ConstantInt::get(IntptrTy, Extents.NumElts)}); + // Bitcast the integer value to the appropriate FT type and extend to 2FT. + Type *ExtVT = Config.getExtendedFPType(BitcastTy); + Value *Shadow = Builder.CreateCast( + Instruction::FPExt, Builder.CreateBitCast(C, BitcastTy), ExtVT); + Builder.CreateAlignedStore( + Shadow, Builder.CreatePointerCast(ShadowPtr, ExtVT->getPointerTo()), + Align(1), Store.isVolatile()); + return; + } + } + // All other stores just reset the shadow value to unknown. + Builder.CreateCall(NsanSetValueUnknown, {Dst, ValueSize}); +} + +void NumericalStabilitySanitizer::propagateShadowValues( + Instruction &Inst, const TargetLibraryInfo &TLI, + const ValueToShadowMap &Map) { + if (StoreInst *Store = dyn_cast(&Inst)) { + Value *StoredValue = Store->getValueOperand(); + Type *const VT = StoredValue->getType(); + Type *const ExtendedVT = Config.getExtendedFPType(VT); + if (ExtendedVT == nullptr) + return propagateNonFTStore(*Store, VT, Map); + return propagateFTStore(*Store, VT, ExtendedVT, Map); + } + + if (FCmpInst *FCmp = dyn_cast(&Inst)) { + emitFCmpCheck(*FCmp, Map); + return; + } + + if (CallBase *CB = dyn_cast(&Inst)) { + maybeAddSuffixForNsanInterface(CB); + if (CallInst *CI = dyn_cast(&Inst)) + maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI); + if (MemIntrinsic *MI = dyn_cast(&Inst)) { + instrumentMemIntrinsic(MI); + return; + } + populateShadowStack(*CB, TLI, Map); + return; + } + + if (ReturnInst *RetInst = dyn_cast(&Inst)) { + Value *RV = RetInst->getReturnValue(); + if (RV == nullptr) + return; // This is a `ret void`. + Type *const VT = RV->getType(); + Type *const ExtendedVT = Config.getExtendedFPType(VT); + if (ExtendedVT == nullptr) + return; // Not an FT ret. + Value *RVShadow = Map.getShadow(RV); + IRBuilder<> Builder(&Inst); + Builder.SetCurrentDebugLocation(RetInst->getDebugLoc()); + RVShadow = emitCheck(RV, RVShadow, Builder, CheckLoc::makeRet()); + ++NumInstrumentedFTRets; + // Store tag. + Value *FnAddr = + Builder.CreatePtrToInt(Inst.getParent()->getParent(), IntptrTy); + Builder.CreateStore(FnAddr, NsanShadowRetTag); + // Store value. + Value *ShadowRetValPtr = Builder.CreatePointerCast( + Builder.CreateConstGEP2_64(NsanShadowRetPtr, 0, 0), + ExtendedVT->getPointerTo()); + Builder.CreateStore(RVShadow, ShadowRetValPtr); + return; + } + + if (InsertValueInst *Insert = dyn_cast(&Inst)) { + Value *V = Insert->getOperand(1); + Type *const VT = V->getType(); + Type *const ExtendedVT = Config.getExtendedFPType(VT); + if (ExtendedVT == nullptr) + return; + IRBuilder<> Builder(Insert); + Builder.SetCurrentDebugLocation(Insert->getDebugLoc()); + emitCheck(V, Map.getShadow(V), Builder, CheckLoc::makeInsert()); + return; + } +} + +// Moves fast math flags from the function to individual instructions, and +// removes the attribute from the function. +// FIXME: Make this controllable with a flag. +static void moveFastMathFlags(Function &F, + std::vector &Instructions) { + FastMathFlags FMF; +#define MOVE_FLAG(attr, setter) \ + if (F.getFnAttribute(attr).getValueAsString() == "true") { \ + F.removeAttribute(AttributeList::FunctionIndex, attr); \ + FMF.set##setter(); \ + } + MOVE_FLAG("unsafe-fp-math", Fast) + MOVE_FLAG("no-infs-fp-math", NoInfs) + MOVE_FLAG("no-nans-fp-math", NoNaNs) + MOVE_FLAG("no-signed-zeros-fp-math", NoSignedZeros) +#undef MOVE_FLAG + + for (Instruction *I : Instructions) + if (isa(I)) + I->setFastMathFlags(FMF); +} + +bool NumericalStabilitySanitizer::sanitizeFunction( + Function &F, const TargetLibraryInfo &TLI) { + // This is required to prevent instrumenting call to __nsan_init from within + // the module constructor. + if (F.getName() == kNsanModuleCtorName) + return false; + if (!Config.initialize(&F.getParent()->getContext())) + return false; + initialize(*F.getParent()); + SmallVector AllLoadsAndStores; + SmallVector LocalLoadsAndStores; + if (!F.hasFnAttribute(Attribute::SanitizeNumericalStability)) + return false; + + // The instrumentation maintains: + // - for each IR value `v` of floating-point (or vector floating-point) type + // FT, a shadow IR value `s(v)` with twice the precision 2FT (e.g. + // double for float and f128 for double). + // - A shadow memory, which stores `s(v)` for any `v` that has been stored, + // along with a shadow memory tag, which stores whether the value in the + // corresponding shadow memory is valid. Note that this might be + // incorrect if a non-instrumented function stores to memory, or if + // memory is stored to through a char pointer. + // - A shadow stack, which holds `s(v)` for any floating-point argument `v` + // of a call to an instrumented function. This allows + // instrumented functions to retrieve the shadow values for their + // arguments. + // Because instrumented functions can be called from non-instrumented + // functions, the stack needs to include a tag so that the instrumented + // function knows whether shadow values are available for their + // parameters (i.e. whether is was called by an instrumented function). + // When shadow arguments are not available, they have to be recreated by + // extending the precision of the non-shadow arguments to the non-shadow + // value. Non-instrumented functions do not modify (or even know about) the + // shadow stack. The shadow stack pointer is __nsan_shadow_args. The shadow + // stack tag is __nsan_shadow_args_tag. The tag is any unique identifier + // for the function (we use the address of the function). Both variables + // are thread local. + // Example: + // calls shadow stack tag shadow stack + // ======================================================================= + // non_instrumented_1() 0 0 + // | + // v + // instrumented_2(float a) 0 0 + // | + // v + // instrumented_3(float b, double c) &instrumented_3 s(b),s(c) + // | + // v + // instrumented_4(float d) &instrumented_4 s(d) + // | + // v + // non_instrumented_5(float e) &non_instrumented_5 s(e) + // | + // v + // instrumented_6(float f) &non_instrumented_5 s(e) + // + // On entry, instrumented_2 checks whether the tag corresponds to its + // function ptr. + // Note that functions reset the tag to 0 after reading shadow parameters. + // This ensures that the function does not erroneously read invalid data if + // called twice in the same stack, once from an instrumented function and + // once from an uninstrumented one. For example, in the following example, + // resetting the tag in (A) ensures that (B) does not reuse the same the + // shadow arguments (which would be incorrect). + // instrumented_1(float a) + // | + // v + // instrumented_2(float b) (A) + // | + // v + // non_instrumented_3() + // | + // v + // instrumented_2(float b) (B) + // + // - A shadow return slot. Any function that returns a floating-point value + // places a shadow return value in __nsan_shadow_ret_val. Again, because + // we might be calling non-instrumented functions, this value is guarded + // by __nsan_shadow_ret_tag marker indicating which instrumented function + // placed the value in __nsan_shadow_ret_val, so that the caller can check + // that this corresponds to the callee. Both variables are thread local. + // + // For example, in the following example, the instrumentation in + // `instrumented_1` rejects the shadow return value from `instrumented_3` + // because is is not tagged as expected (`&instrumented_3` instead of + // `non_instrumented_2`): + // + // instrumented_1() + // | + // v + // float non_instrumented_2() + // | + // v + // float instrumented_3() + // + // Calls of known math functions (sin, cos, exp, ...) are duplicated to call + // their overload on the shadow type. + + // Collect all instructions before processing, as creating shadow values + // creates new instructions inside the function. + std::vector OriginalInstructions; + for (auto &BB : F) { + for (auto &Inst : BB) { + OriginalInstructions.emplace_back(&Inst); + } + } + + moveFastMathFlags(F, OriginalInstructions); + ValueToShadowMap ValueToShadow(&Config); + + // In the first pass, we create shadow values for all FT function arguments + // and all phis. This ensures that the DFS of the next pass does not have + // any loops. + std::vector OriginalPhis; + createShadowArguments(F, TLI, ValueToShadow); + for (Instruction *I : OriginalInstructions) { + if (PHINode *Phi = dyn_cast(I)) { + if (PHINode *Shadow = maybeCreateShadowPhi(*Phi, TLI)) { + OriginalPhis.push_back(Phi); + ValueToShadow.setShadow(Phi, Shadow); + } + } + } + + // Create shadow values for all instructions creating FT values. + for (Instruction *I : OriginalInstructions) { + maybeCreateShadowValue(*I, TLI, ValueToShadow); + } + + // Propagate shadow values across stores, calls and rets. + for (Instruction *I : OriginalInstructions) { + propagateShadowValues(*I, TLI, ValueToShadow); + } + + // The last pass populates shadow phis with shadow values. + for (PHINode *Phi : OriginalPhis) { + PHINode *ShadowPhi = dyn_cast(ValueToShadow.getShadow(Phi)); + for (int I = 0, E = Phi->getNumOperands(); I < E; ++I) { + Value *V = Phi->getOperand(I); + Value *Shadow = ValueToShadow.getShadow(V); + BasicBlock *IncomingBB = Phi->getIncomingBlock(I); + // For some instructions (e.g. invoke), we create the shadow in a separate + // block, different from the block where the original value is created. + // In that case, the shadow phi might need to refer to this block instead + // of the original block. + // Note that this can only happen for instructions as constant shadows are + // always created in the same block. + ShadowPhi->addIncoming(Shadow, IncomingBB); + } + } + + return !ValueToShadow.empty(); +} + +// Instrument the memory intrinsics so that they properly modify the shadow +// memory. +bool NumericalStabilitySanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) { + IRBuilder<> Builder(MI); + if (MemSetInst *M = dyn_cast(MI)) { + Builder.SetCurrentDebugLocation(M->getDebugLoc()); + Builder.CreateCall( + NsanSetValueUnknown, + {// Address + Builder.CreatePointerCast(M->getArgOperand(0), Builder.getInt8PtrTy()), + // Size + Builder.CreateIntCast(M->getArgOperand(2), IntptrTy, false)}); + } else if (MemTransferInst *M = dyn_cast(MI)) { + Builder.SetCurrentDebugLocation(M->getDebugLoc()); + Builder.CreateCall( + NsanCopyValues, + {// Destination + Builder.CreatePointerCast(M->getArgOperand(0), Builder.getInt8PtrTy()), + // Source + Builder.CreatePointerCast(M->getArgOperand(1), Builder.getInt8PtrTy()), + // Size + Builder.CreateIntCast(M->getArgOperand(2), IntptrTy, false)}); + } + return false; +} + +void NumericalStabilitySanitizer::maybeAddSuffixForNsanInterface(CallBase *CI) { + Function *Fn = CI->getCalledFunction(); + if (Fn == nullptr) + return; + + if (!Fn->getName().startswith("__nsan_")) + return; + + if (Fn->getName() == "__nsan_dump_shadow_mem") { + assert(CI->getNumArgOperands() == 4 && + "invalid prototype for __nsan_dump_shadow_mem"); + // __nsan_dump_shadow_mem requires an extra parameter with the dynamic + // configuration: + // (shadow_type_id_for_long_double << 16) | (shadow_type_id_for_double << 8) + // | shadow_type_id_for_double + const uint64_t shadow_value_type_ids = + (static_cast(Config.byValueType(kLongDouble).getNsanTypeId()) + << 16) | + (static_cast(Config.byValueType(kDouble).getNsanTypeId()) + << 8) | + static_cast(Config.byValueType(kFloat).getNsanTypeId()); + CI->setArgOperand(3, ConstantInt::get(IntptrTy, shadow_value_type_ids)); + } +} diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -961,6 +961,7 @@ case Attribute::ShadowCallStack: case Attribute::SanitizeAddress: case Attribute::SanitizeMemory: + case Attribute::SanitizeNumericalStability: case Attribute::SanitizeThread: case Attribute::SanitizeHWAddress: case Attribute::SanitizeMemTag: diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/basic.ll @@ -0,0 +1,965 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -nsan -nsan-shadow-type-mapping=dqq -nsan-truncate-fcmp-eq=false -S | FileCheck %s --check-prefixes=CHECK,DQQ +; RUN: opt < %s -nsan -nsan-shadow-type-mapping=dlq -nsan-truncate-fcmp-eq=false -S | FileCheck %s --check-prefixes=CHECK,DLQ +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Tests with simple control flow. + +@float_const = private unnamed_addr constant float 0.5 +@x86_fp80_const = private unnamed_addr constant x86_fp80 0xK3FC9E69594BEC44DE000 +@double_const = private unnamed_addr constant double 0.5 + + +define float @return_param_float(float %a) sanitize_numericalstability { +; CHECK-LABEL: @return_param_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (float (float)* @return_param_float to i64) +; CHECK-NEXT: [[TMP2:%.*]] = load double, double* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to double*), align 1 +; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[A:%.*]] to double +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]] +; CHECK-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @__nsan_internal_check_float_d(float [[A]], double [[TMP4]], i32 1, i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = fpext float [[A]] to double +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], double [[TMP7]], double [[TMP4]] +; CHECK-NEXT: store i64 ptrtoint (float (float)* @return_param_float to i64), i64* @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP8]], double* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to double*), align 8 +; CHECK-NEXT: ret float [[A]] +; +entry: + ret float %a +} + +; Note that the shadow fadd should not have a `fast` flag. +define float @param_add_return_float(float %a) sanitize_numericalstability { +; CHECK-LABEL: @param_add_return_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (float (float)* @param_add_return_float to i64) +; CHECK-NEXT: [[TMP2:%.*]] = load double, double* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to double*), align 1 +; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[A:%.*]] to double +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]] +; CHECK-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[B:%.*]] = fadd fast float [[A]], 1.000000e+00 +; CHECK-NEXT: [[TMP5:%.*]] = fadd double [[TMP4]], 1.000000e+00 +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__nsan_internal_check_float_d(float [[B]], double [[TMP5]], i32 1, i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = fpext float [[B]] to double +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], double [[TMP8]], double [[TMP5]] +; CHECK-NEXT: store i64 ptrtoint (float (float)* @param_add_return_float to i64), i64* @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP9]], double* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to double*), align 8 +; CHECK-NEXT: ret float [[B]] +; +entry: + %b = fadd fast float %a, 1.0 + ret float %b +} + +define x86_fp80 @param_add_return_x86_fp80(x86_fp80 %a) sanitize_numericalstability { +; CHECK-LABEL: @param_add_return_x86_fp80( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (x86_fp80 (x86_fp80)* @param_add_return_x86_fp80 to i64) +; CHECK-NEXT: [[TMP2:%.*]] = load fp128, fp128* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to fp128*), align 1 +; CHECK-NEXT: [[TMP3:%.*]] = fpext x86_fp80 [[A:%.*]] to fp128 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], fp128 [[TMP2]], fp128 [[TMP3]] +; CHECK-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[B:%.*]] = fadd x86_fp80 [[A]], 0xK3FC9E69594BEC44DE000 +; CHECK-NEXT: [[TMP5:%.*]] = fadd fp128 [[TMP4]], 0xLC0000000000000003FC9CD2B297D889B +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__nsan_internal_check_longdouble_q(x86_fp80 [[B]], fp128 [[TMP5]], i32 1, i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = fpext x86_fp80 [[B]] to fp128 +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], fp128 [[TMP8]], fp128 [[TMP5]] +; CHECK-NEXT: store i64 ptrtoint (x86_fp80 (x86_fp80)* @param_add_return_x86_fp80 to i64), i64* @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store fp128 [[TMP9]], fp128* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to fp128*), align 16 +; CHECK-NEXT: ret x86_fp80 [[B]] +; +entry: + %b = fadd x86_fp80 %a, 0xK3FC9E69594BEC44DE000 + ret x86_fp80 %b +} + +define double @param_add_return_double(double %a) sanitize_numericalstability { +; DQQ-LABEL: @param_add_return_double( +; DQQ-NEXT: entry: +; DQQ-NEXT: [[TMP0:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (double (double)* @param_add_return_double to i64) +; DQQ-NEXT: [[TMP2:%.*]] = load fp128, fp128* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to fp128*), align 1 +; DQQ-NEXT: [[TMP3:%.*]] = fpext double [[A:%.*]] to fp128 +; DQQ-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], fp128 [[TMP2]], fp128 [[TMP3]] +; DQQ-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: [[B:%.*]] = fadd double [[A]], 1.000000e+00 +; DQQ-NEXT: [[TMP5:%.*]] = fadd fp128 [[TMP4]], 0xL00000000000000003FFF000000000000 +; DQQ-NEXT: [[TMP6:%.*]] = call i32 @__nsan_internal_check_double_q(double [[B]], fp128 [[TMP5]], i32 1, i64 0) +; DQQ-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 +; DQQ-NEXT: [[TMP8:%.*]] = fpext double [[B]] to fp128 +; DQQ-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], fp128 [[TMP8]], fp128 [[TMP5]] +; DQQ-NEXT: store i64 ptrtoint (double (double)* @param_add_return_double to i64), i64* @__nsan_shadow_ret_tag, align 8 +; DQQ-NEXT: store fp128 [[TMP9]], fp128* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to fp128*), align 16 +; DQQ-NEXT: ret double [[B]] +; +; DLQ-LABEL: @param_add_return_double( +; DLQ-NEXT: entry: +; DLQ-NEXT: [[TMP0:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (double (double)* @param_add_return_double to i64) +; DLQ-NEXT: [[TMP2:%.*]] = load x86_fp80, x86_fp80* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to x86_fp80*), align 1 +; DLQ-NEXT: [[TMP3:%.*]] = fpext double [[A:%.*]] to x86_fp80 +; DLQ-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], x86_fp80 [[TMP2]], x86_fp80 [[TMP3]] +; DLQ-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: [[B:%.*]] = fadd double [[A]], 1.000000e+00 +; DLQ-NEXT: [[TMP5:%.*]] = fadd x86_fp80 [[TMP4]], 0xK3FFF8000000000000000 +; DLQ-NEXT: [[TMP6:%.*]] = call i32 @__nsan_internal_check_double_l(double [[B]], x86_fp80 [[TMP5]], i32 1, i64 0) +; DLQ-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 +; DLQ-NEXT: [[TMP8:%.*]] = fpext double [[B]] to x86_fp80 +; DLQ-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], x86_fp80 [[TMP8]], x86_fp80 [[TMP5]] +; DLQ-NEXT: store i64 ptrtoint (double (double)* @param_add_return_double to i64), i64* @__nsan_shadow_ret_tag, align 8 +; DLQ-NEXT: store x86_fp80 [[TMP9]], x86_fp80* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to x86_fp80*), align 16 +; DLQ-NEXT: ret double [[B]] +; +entry: + %b = fadd double %a, 1.0 + ret double %b +} + +define <2 x float> @return_param_add_return_float_vector(<2 x float> %a) sanitize_numericalstability { +; CHECK-LABEL: @return_param_add_return_float_vector( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (<2 x float> (<2 x float>)* @return_param_add_return_float_vector to i64) +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to <2 x double>*), align 1 +; CHECK-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[A:%.*]] to <2 x double> +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]] +; CHECK-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[B:%.*]] = fadd <2 x float> [[A]], +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[B]], i64 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP6]], double [[TMP7]], i32 1, i64 0) +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[B]], i64 1 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[TMP5]], i64 1 +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP9]], double [[TMP10]], i32 1, i64 0) +; CHECK-NEXT: [[TMP12:%.*]] = or i32 [[TMP8]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = fpext <2 x float> [[B]] to <2 x double> +; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP13]], <2 x double> [[TMP14]], <2 x double> [[TMP5]] +; CHECK-NEXT: store i64 ptrtoint (<2 x float> (<2 x float>)* @return_param_add_return_float_vector to i64), i64* @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store <2 x double> [[TMP15]], <2 x double>* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to <2 x double>*), align 16 +; CHECK-NEXT: ret <2 x float> [[B]] +; +entry: + %b = fadd <2 x float> %a, + ret <2 x float> %b +} + +; TODO: This is ignored for now. +define [2 x float] @return_param_float_array([2 x float] %a) sanitize_numericalstability { +; CHECK-LABEL: @return_param_float_array( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret [2 x float] [[A:%.*]] +; +entry: + ret [2 x float] %a +} + +define void @constantload_add_store_float(float* %dst) sanitize_numericalstability { +; CHECK-LABEL: @constantload_add_store_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = load float, float* @float_const +; CHECK-NEXT: [[TMP0:%.*]] = fpext float [[B]] to double +; CHECK-NEXT: [[C:%.*]] = fadd float [[B]], 1.000000e+00 +; CHECK-NEXT: [[TMP1:%.*]] = fadd double [[TMP0]], 1.000000e+00 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[DST:%.*]] to i8* +; CHECK-NEXT: [[TMP3:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_store(i8* [[TMP2]], i64 1) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint float* [[DST]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @__nsan_internal_check_float_d(float [[C]], double [[TMP1]], i32 4, i64 [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = fpext float [[C]] to double +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], double [[TMP7]], double [[TMP1]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP3]] to double* +; CHECK-NEXT: store double [[TMP8]], double* [[TMP9]], align 1 +; CHECK-NEXT: store float [[C]], float* [[DST]], align 1 +; CHECK-NEXT: ret void +; +entry: + %b = load float, float* @float_const + %c = fadd float %b, 1.0 + store float %c, float* %dst, align 1 + ret void +} + +define void @constantload_add_store_x86_fp80(x86_fp80* %dst) sanitize_numericalstability { +; CHECK-LABEL: @constantload_add_store_x86_fp80( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = load x86_fp80, x86_fp80* @x86_fp80_const +; CHECK-NEXT: [[TMP0:%.*]] = fpext x86_fp80 [[B]] to fp128 +; CHECK-NEXT: [[C:%.*]] = fadd x86_fp80 [[B]], 0xK3FC9E69594BEC44DE000 +; CHECK-NEXT: [[TMP1:%.*]] = fadd fp128 [[TMP0]], 0xLC0000000000000003FC9CD2B297D889B +; CHECK-NEXT: [[TMP2:%.*]] = bitcast x86_fp80* [[DST:%.*]] to i8* +; CHECK-NEXT: [[TMP3:%.*]] = call i8* @__nsan_get_shadow_ptr_for_longdouble_store(i8* [[TMP2]], i64 1) +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint x86_fp80* [[DST]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @__nsan_internal_check_longdouble_q(x86_fp80 [[C]], fp128 [[TMP1]], i32 4, i64 [[TMP4]]) +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = fpext x86_fp80 [[C]] to fp128 +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], fp128 [[TMP7]], fp128 [[TMP1]] +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP3]] to fp128* +; CHECK-NEXT: store fp128 [[TMP8]], fp128* [[TMP9]], align 1 +; CHECK-NEXT: store x86_fp80 [[C]], x86_fp80* [[DST]], align 1 +; CHECK-NEXT: ret void +; +entry: + %b = load x86_fp80, x86_fp80* @x86_fp80_const + %c = fadd x86_fp80 %b, 0xK3FC9E69594BEC44DE000 + store x86_fp80 %c, x86_fp80* %dst, align 1 + ret void +} + +define void @constantload_add_store_double(double* %dst) sanitize_numericalstability { +; DQQ-LABEL: @constantload_add_store_double( +; DQQ-NEXT: entry: +; DQQ-NEXT: [[B:%.*]] = load double, double* @double_const +; DQQ-NEXT: [[TMP0:%.*]] = fpext double [[B]] to fp128 +; DQQ-NEXT: [[C:%.*]] = fadd double [[B]], 1.000000e+00 +; DQQ-NEXT: [[TMP1:%.*]] = fadd fp128 [[TMP0]], 0xL00000000000000003FFF000000000000 +; DQQ-NEXT: [[TMP2:%.*]] = bitcast double* [[DST:%.*]] to i8* +; DQQ-NEXT: [[TMP3:%.*]] = call i8* @__nsan_get_shadow_ptr_for_double_store(i8* [[TMP2]], i64 1) +; DQQ-NEXT: [[TMP4:%.*]] = ptrtoint double* [[DST]] to i64 +; DQQ-NEXT: [[TMP5:%.*]] = call i32 @__nsan_internal_check_double_q(double [[C]], fp128 [[TMP1]], i32 4, i64 [[TMP4]]) +; DQQ-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 1 +; DQQ-NEXT: [[TMP7:%.*]] = fpext double [[C]] to fp128 +; DQQ-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], fp128 [[TMP7]], fp128 [[TMP1]] +; DQQ-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP3]] to fp128* +; DQQ-NEXT: store fp128 [[TMP8]], fp128* [[TMP9]], align 1 +; DQQ-NEXT: store double [[C]], double* [[DST]], align 1 +; DQQ-NEXT: ret void +; +; DLQ-LABEL: @constantload_add_store_double( +; DLQ-NEXT: entry: +; DLQ-NEXT: [[B:%.*]] = load double, double* @double_const +; DLQ-NEXT: [[TMP0:%.*]] = fpext double [[B]] to x86_fp80 +; DLQ-NEXT: [[C:%.*]] = fadd double [[B]], 1.000000e+00 +; DLQ-NEXT: [[TMP1:%.*]] = fadd x86_fp80 [[TMP0]], 0xK3FFF8000000000000000 +; DLQ-NEXT: [[TMP2:%.*]] = bitcast double* [[DST:%.*]] to i8* +; DLQ-NEXT: [[TMP3:%.*]] = call i8* @__nsan_get_shadow_ptr_for_double_store(i8* [[TMP2]], i64 1) +; DLQ-NEXT: [[TMP4:%.*]] = ptrtoint double* [[DST]] to i64 +; DLQ-NEXT: [[TMP5:%.*]] = call i32 @__nsan_internal_check_double_l(double [[C]], x86_fp80 [[TMP1]], i32 4, i64 [[TMP4]]) +; DLQ-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 1 +; DLQ-NEXT: [[TMP7:%.*]] = fpext double [[C]] to x86_fp80 +; DLQ-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], x86_fp80 [[TMP7]], x86_fp80 [[TMP1]] +; DLQ-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP3]] to x86_fp80* +; DLQ-NEXT: store x86_fp80 [[TMP8]], x86_fp80* [[TMP9]], align 1 +; DLQ-NEXT: store double [[C]], double* [[DST]], align 1 +; DLQ-NEXT: ret void +; +entry: + %b = load double, double* @double_const + %c = fadd double %b, 1.0 + store double %c, double* %dst, align 1 + ret void +} + +define void @load_add_store_float(float* %a) sanitize_numericalstability { +; CHECK-LABEL: @load_add_store_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = load float, float* [[A:%.*]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[A]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_load(i8* [[TMP0]], i64 1) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8* [[TMP1]], null +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP6:%.*]], label [[TMP3:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP1]] to double* +; CHECK-NEXT: [[TMP5:%.*]] = load double, double* [[TMP4]], align 1 +; CHECK-NEXT: br label [[TMP8:%.*]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = fpext float [[B]] to double +; CHECK-NEXT: br label [[TMP8]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = phi double [ [[TMP5]], [[TMP3]] ], [ [[TMP7]], [[TMP6]] ] +; CHECK-NEXT: [[C:%.*]] = fadd float [[B]], 1.000000e+00 +; CHECK-NEXT: [[TMP10:%.*]] = fadd double [[TMP9]], 1.000000e+00 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[A]] to i8* +; CHECK-NEXT: [[TMP12:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_store(i8* [[TMP11]], i64 1) +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint float* [[A]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @__nsan_internal_check_float_d(float [[C]], double [[TMP10]], i32 4, i64 [[TMP13]]) +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 1 +; CHECK-NEXT: [[TMP16:%.*]] = fpext float [[C]] to double +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], double [[TMP16]], double [[TMP10]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP12]] to double* +; CHECK-NEXT: store double [[TMP17]], double* [[TMP18]], align 1 +; CHECK-NEXT: store float [[C]], float* [[A]], align 1 +; CHECK-NEXT: ret void +; +entry: + %b = load float, float* %a, align 1 + %c = fadd float %b, 1.0 + store float %c, float* %a, align 1 + ret void +} + +define void @load_add_store_x86_fp80(x86_fp80* %a) sanitize_numericalstability { +; CHECK-LABEL: @load_add_store_x86_fp80( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = load x86_fp80, x86_fp80* [[A:%.*]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast x86_fp80* [[A]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @__nsan_get_shadow_ptr_for_longdouble_load(i8* [[TMP0]], i64 1) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8* [[TMP1]], null +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP6:%.*]], label [[TMP3:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP1]] to fp128* +; CHECK-NEXT: [[TMP5:%.*]] = load fp128, fp128* [[TMP4]], align 1 +; CHECK-NEXT: br label [[TMP8:%.*]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = fpext x86_fp80 [[B]] to fp128 +; CHECK-NEXT: br label [[TMP8]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = phi fp128 [ [[TMP5]], [[TMP3]] ], [ [[TMP7]], [[TMP6]] ] +; CHECK-NEXT: [[C:%.*]] = fadd x86_fp80 [[B]], 0xK3FC9E69594BEC44DE000 +; CHECK-NEXT: [[TMP10:%.*]] = fadd fp128 [[TMP9]], 0xLC0000000000000003FC9CD2B297D889B +; CHECK-NEXT: [[TMP11:%.*]] = bitcast x86_fp80* [[A]] to i8* +; CHECK-NEXT: [[TMP12:%.*]] = call i8* @__nsan_get_shadow_ptr_for_longdouble_store(i8* [[TMP11]], i64 1) +; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint x86_fp80* [[A]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @__nsan_internal_check_longdouble_q(x86_fp80 [[C]], fp128 [[TMP10]], i32 4, i64 [[TMP13]]) +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 1 +; CHECK-NEXT: [[TMP16:%.*]] = fpext x86_fp80 [[C]] to fp128 +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], fp128 [[TMP16]], fp128 [[TMP10]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP12]] to fp128* +; CHECK-NEXT: store fp128 [[TMP17]], fp128* [[TMP18]], align 1 +; CHECK-NEXT: store x86_fp80 [[C]], x86_fp80* [[A]], align 1 +; CHECK-NEXT: ret void +; +entry: + %b = load x86_fp80, x86_fp80* %a, align 1 + %c = fadd x86_fp80 %b, 0xK3FC9E69594BEC44DE000 + store x86_fp80 %c, x86_fp80* %a, align 1 + ret void +} + +define void @load_add_store_double(double* %a) sanitize_numericalstability { +; DQQ-LABEL: @load_add_store_double( +; DQQ-NEXT: entry: +; DQQ-NEXT: [[B:%.*]] = load double, double* [[A:%.*]], align 1 +; DQQ-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to i8* +; DQQ-NEXT: [[TMP1:%.*]] = call i8* @__nsan_get_shadow_ptr_for_double_load(i8* [[TMP0]], i64 1) +; DQQ-NEXT: [[TMP2:%.*]] = icmp eq i8* [[TMP1]], null +; DQQ-NEXT: br i1 [[TMP2]], label [[TMP6:%.*]], label [[TMP3:%.*]] +; DQQ: 3: +; DQQ-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP1]] to fp128* +; DQQ-NEXT: [[TMP5:%.*]] = load fp128, fp128* [[TMP4]], align 1 +; DQQ-NEXT: br label [[TMP8:%.*]] +; DQQ: 6: +; DQQ-NEXT: [[TMP7:%.*]] = fpext double [[B]] to fp128 +; DQQ-NEXT: br label [[TMP8]] +; DQQ: 8: +; DQQ-NEXT: [[TMP9:%.*]] = phi fp128 [ [[TMP5]], [[TMP3]] ], [ [[TMP7]], [[TMP6]] ] +; DQQ-NEXT: [[C:%.*]] = fadd double [[B]], 1.000000e+00 +; DQQ-NEXT: [[TMP10:%.*]] = fadd fp128 [[TMP9]], 0xL00000000000000003FFF000000000000 +; DQQ-NEXT: [[TMP11:%.*]] = bitcast double* [[A]] to i8* +; DQQ-NEXT: [[TMP12:%.*]] = call i8* @__nsan_get_shadow_ptr_for_double_store(i8* [[TMP11]], i64 1) +; DQQ-NEXT: [[TMP13:%.*]] = ptrtoint double* [[A]] to i64 +; DQQ-NEXT: [[TMP14:%.*]] = call i32 @__nsan_internal_check_double_q(double [[C]], fp128 [[TMP10]], i32 4, i64 [[TMP13]]) +; DQQ-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 1 +; DQQ-NEXT: [[TMP16:%.*]] = fpext double [[C]] to fp128 +; DQQ-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], fp128 [[TMP16]], fp128 [[TMP10]] +; DQQ-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP12]] to fp128* +; DQQ-NEXT: store fp128 [[TMP17]], fp128* [[TMP18]], align 1 +; DQQ-NEXT: store double [[C]], double* [[A]], align 1 +; DQQ-NEXT: ret void +; +; DLQ-LABEL: @load_add_store_double( +; DLQ-NEXT: entry: +; DLQ-NEXT: [[B:%.*]] = load double, double* [[A:%.*]], align 1 +; DLQ-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to i8* +; DLQ-NEXT: [[TMP1:%.*]] = call i8* @__nsan_get_shadow_ptr_for_double_load(i8* [[TMP0]], i64 1) +; DLQ-NEXT: [[TMP2:%.*]] = icmp eq i8* [[TMP1]], null +; DLQ-NEXT: br i1 [[TMP2]], label [[TMP6:%.*]], label [[TMP3:%.*]] +; DLQ: 3: +; DLQ-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP1]] to x86_fp80* +; DLQ-NEXT: [[TMP5:%.*]] = load x86_fp80, x86_fp80* [[TMP4]], align 1 +; DLQ-NEXT: br label [[TMP8:%.*]] +; DLQ: 6: +; DLQ-NEXT: [[TMP7:%.*]] = fpext double [[B]] to x86_fp80 +; DLQ-NEXT: br label [[TMP8]] +; DLQ: 8: +; DLQ-NEXT: [[TMP9:%.*]] = phi x86_fp80 [ [[TMP5]], [[TMP3]] ], [ [[TMP7]], [[TMP6]] ] +; DLQ-NEXT: [[C:%.*]] = fadd double [[B]], 1.000000e+00 +; DLQ-NEXT: [[TMP10:%.*]] = fadd x86_fp80 [[TMP9]], 0xK3FFF8000000000000000 +; DLQ-NEXT: [[TMP11:%.*]] = bitcast double* [[A]] to i8* +; DLQ-NEXT: [[TMP12:%.*]] = call i8* @__nsan_get_shadow_ptr_for_double_store(i8* [[TMP11]], i64 1) +; DLQ-NEXT: [[TMP13:%.*]] = ptrtoint double* [[A]] to i64 +; DLQ-NEXT: [[TMP14:%.*]] = call i32 @__nsan_internal_check_double_l(double [[C]], x86_fp80 [[TMP10]], i32 4, i64 [[TMP13]]) +; DLQ-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 1 +; DLQ-NEXT: [[TMP16:%.*]] = fpext double [[C]] to x86_fp80 +; DLQ-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], x86_fp80 [[TMP16]], x86_fp80 [[TMP10]] +; DLQ-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP12]] to x86_fp80* +; DLQ-NEXT: store x86_fp80 [[TMP17]], x86_fp80* [[TMP18]], align 1 +; DLQ-NEXT: store double [[C]], double* [[A]], align 1 +; DLQ-NEXT: ret void +; +entry: + %b = load double, double* %a, align 1 + %c = fadd double %b, 1.0 + store double %c, double* %a, align 1 + ret void +} + +define void @load_add_store_vector(<2 x float>* %a) sanitize_numericalstability { +; CHECK-LABEL: @load_add_store_vector( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float>* [[A]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_load(i8* [[TMP0]], i64 2) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8* [[TMP1]], null +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP6:%.*]], label [[TMP3:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP1]] to <2 x double>* +; CHECK-NEXT: [[TMP5:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 1 +; CHECK-NEXT: br label [[TMP8:%.*]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = fpext <2 x float> [[B]] to <2 x double> +; CHECK-NEXT: br label [[TMP8]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x double> [ [[TMP5]], [[TMP3]] ], [ [[TMP7]], [[TMP6]] ] +; CHECK-NEXT: [[C:%.*]] = fadd <2 x float> [[B]], +; CHECK-NEXT: [[TMP10:%.*]] = fadd <2 x double> [[TMP9]], +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x float>* [[A]] to i8* +; CHECK-NEXT: [[TMP12:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_store(i8* [[TMP11]], i64 2) +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x float> [[C]], i64 0 +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint <2 x float>* [[A]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP13]], double [[TMP14]], i32 4, i64 [[TMP15]]) +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[C]], i64 1 +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x double> [[TMP10]], i64 1 +; CHECK-NEXT: [[TMP19:%.*]] = ptrtoint <2 x float>* [[A]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP17]], double [[TMP18]], i32 4, i64 [[TMP19]]) +; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP16]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[TMP21]], 1 +; CHECK-NEXT: [[TMP23:%.*]] = fpext <2 x float> [[C]] to <2 x double> +; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP22]], <2 x double> [[TMP23]], <2 x double> [[TMP10]] +; CHECK-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP12]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP24]], <2 x double>* [[TMP25]], align 1 +; CHECK-NEXT: store <2 x float> [[C]], <2 x float>* [[A]], align 1 +; CHECK-NEXT: ret void +; +entry: + %b = load <2 x float>, <2 x float>* %a, align 1 + %c = fadd <2 x float> %b, + store <2 x float> %c, <2 x float>* %a, align 1 + ret void +} + +declare float @returns_float() + +define void @call_fn_returning_float(float* %dst) sanitize_numericalstability { +; CHECK-LABEL: @call_fn_returning_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = call float @returns_float() +; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (float ()* @returns_float to i64) +; CHECK-NEXT: [[TMP2:%.*]] = load double, double* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to double*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[B]] to double +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]] +; CHECK-NEXT: [[C:%.*]] = fadd float [[B]], 1.000000e+00 +; CHECK-NEXT: [[TMP5:%.*]] = fadd double [[TMP4]], 1.000000e+00 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[DST:%.*]] to i8* +; CHECK-NEXT: [[TMP7:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_store(i8* [[TMP6]], i64 1) +; CHECK-NEXT: [[TMP8:%.*]] = ptrtoint float* [[DST]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @__nsan_internal_check_float_d(float [[C]], double [[TMP5]], i32 4, i64 [[TMP8]]) +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = fpext float [[C]] to double +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP10]], double [[TMP11]], double [[TMP5]] +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP7]] to double* +; CHECK-NEXT: store double [[TMP12]], double* [[TMP13]], align 1 +; CHECK-NEXT: store float [[C]], float* [[DST]], align 1 +; CHECK-NEXT: ret void +; +entry: + %b = call float @returns_float() + %c = fadd float %b, 1.0 + store float %c, float* %dst, align 1 + ret void +} + +define float @return_fn_returning_float(float* %dst) sanitize_numericalstability { +; CHECK-LABEL: @return_fn_returning_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = call float @returns_float() +; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (float ()* @returns_float to i64) +; CHECK-NEXT: [[TMP2:%.*]] = load double, double* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to double*), align 8 +; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[B]] to double +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = call i32 @__nsan_internal_check_float_d(float [[B]], double [[TMP4]], i32 1, i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = fpext float [[B]] to double +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], double [[TMP7]], double [[TMP4]] +; CHECK-NEXT: store i64 ptrtoint (float (float*)* @return_fn_returning_float to i64), i64* @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP8]], double* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to double*), align 8 +; CHECK-NEXT: ret float [[B]] +; +entry: + %b = call float @returns_float() + ret float %b +} + +declare void @takes_floats(float %a, i8 %b, double %c, x86_fp80 %d) + +define void @call_fn_taking_float() sanitize_numericalstability { +; DQQ-LABEL: @call_fn_taking_float( +; DQQ-NEXT: entry: +; DQQ-NEXT: store i64 ptrtoint (void (float, i8, double, x86_fp80)* @takes_floats to i64), i64* @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: store double 1.000000e+00, double* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to double*), align 1 +; DQQ-NEXT: store fp128 0xL00000000000000004000800000000000, fp128* bitcast (i8* getelementptr inbounds ([16384 x i8], [16384 x i8]* @__nsan_shadow_args_ptr, i64 0, i64 8) to fp128*), align 1 +; DQQ-NEXT: store fp128 0xLC0000000000000003FC9CD2B297D889B, fp128* bitcast (i8* getelementptr inbounds ([16384 x i8], [16384 x i8]* @__nsan_shadow_args_ptr, i64 0, i64 24) to fp128*), align 1 +; DQQ-NEXT: call void @takes_floats(float 1.000000e+00, i8 2, double 3.000000e+00, x86_fp80 0xK3FC9E69594BEC44DE000) +; DQQ-NEXT: ret void +; +; DLQ-LABEL: @call_fn_taking_float( +; DLQ-NEXT: entry: +; DLQ-NEXT: store i64 ptrtoint (void (float, i8, double, x86_fp80)* @takes_floats to i64), i64* @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: store double 1.000000e+00, double* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to double*), align 1 +; DLQ-NEXT: store x86_fp80 0xK4000C000000000000000, x86_fp80* bitcast (i8* getelementptr inbounds ([16384 x i8], [16384 x i8]* @__nsan_shadow_args_ptr, i64 0, i64 8) to x86_fp80*), align 1 +; DLQ-NEXT: store fp128 0xLC0000000000000003FC9CD2B297D889B, fp128* bitcast (i8* getelementptr inbounds ([16384 x i8], [16384 x i8]* @__nsan_shadow_args_ptr, i64 0, i64 18) to fp128*), align 1 +; DLQ-NEXT: call void @takes_floats(float 1.000000e+00, i8 2, double 3.000000e+00, x86_fp80 0xK3FC9E69594BEC44DE000) +; DLQ-NEXT: ret void +; +entry: + call void @takes_floats(float 1.0, i8 2, double 3.0, x86_fp80 0xK3FC9E69594BEC44DE000) + ret void +} + +declare float @llvm.sin.f32(float) readnone + +define float @call_sin_intrinsic() sanitize_numericalstability { +; CHECK-LABEL: @call_sin_intrinsic( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[R:%.*]] = call float @llvm.sin.f32(float 1.000000e+00) +; CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.sin.f64(double 1.000000e+00) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP0]], i32 1, i64 0) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[R]] to double +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], double [[TMP3]], double [[TMP0]] +; CHECK-NEXT: store i64 ptrtoint (float ()* @call_sin_intrinsic to i64), i64* @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP4]], double* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to double*), align 8 +; CHECK-NEXT: ret float [[R]] +; +entry: + %r = call float @llvm.sin.f32(float 1.0) + ret float %r +} + +declare float @sinf(float) + +define float @call_sinf_libfunc() sanitize_numericalstability { +; CHECK-LABEL: @call_sinf_libfunc( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[R:%.*]] = call float @sinf(float 1.000000e+00) #4 +; CHECK-NEXT: [[TMP0:%.*]] = call double @llvm.sin.f64(double 1.000000e+00) +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP0]], i32 1, i64 0) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[R]] to double +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], double [[TMP3]], double [[TMP0]] +; CHECK-NEXT: store i64 ptrtoint (float ()* @call_sinf_libfunc to i64), i64* @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP4]], double* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to double*), align 8 +; CHECK-NEXT: ret float [[R]] +; +entry: + %r = call float @sinf(float 1.0) + ret float %r +} + +declare double @sin(double) + +; FIXME: nsan uses `sin(double)` for fp128. +define double @call_sin_libfunc() sanitize_numericalstability { +; DQQ-LABEL: @call_sin_libfunc( +; DQQ-NEXT: entry: +; DQQ-NEXT: [[R:%.*]] = call double @sin(double 1.000000e+00) #4 +; DQQ-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.sin.f80(x86_fp80 0xK3FFF8000000000000000) +; DQQ-NEXT: [[TMP1:%.*]] = fpext x86_fp80 [[TMP0]] to fp128 +; DQQ-NEXT: [[TMP2:%.*]] = call i32 @__nsan_internal_check_double_q(double [[R]], fp128 [[TMP1]], i32 1, i64 0) +; DQQ-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 1 +; DQQ-NEXT: [[TMP4:%.*]] = fpext double [[R]] to fp128 +; DQQ-NEXT: [[TMP5:%.*]] = select i1 [[TMP3]], fp128 [[TMP4]], fp128 [[TMP1]] +; DQQ-NEXT: store i64 ptrtoint (double ()* @call_sin_libfunc to i64), i64* @__nsan_shadow_ret_tag, align 8 +; DQQ-NEXT: store fp128 [[TMP5]], fp128* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to fp128*), align 16 +; DQQ-NEXT: ret double [[R]] +; +; DLQ-LABEL: @call_sin_libfunc( +; DLQ-NEXT: entry: +; DLQ-NEXT: [[R:%.*]] = call double @sin(double 1.000000e+00) #4 +; DLQ-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.sin.f80(x86_fp80 0xK3FFF8000000000000000) +; DLQ-NEXT: [[TMP1:%.*]] = call i32 @__nsan_internal_check_double_l(double [[R]], x86_fp80 [[TMP0]], i32 1, i64 0) +; DLQ-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1 +; DLQ-NEXT: [[TMP3:%.*]] = fpext double [[R]] to x86_fp80 +; DLQ-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], x86_fp80 [[TMP3]], x86_fp80 [[TMP0]] +; DLQ-NEXT: store i64 ptrtoint (double ()* @call_sin_libfunc to i64), i64* @__nsan_shadow_ret_tag, align 8 +; DLQ-NEXT: store x86_fp80 [[TMP4]], x86_fp80* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to x86_fp80*), align 16 +; DLQ-NEXT: ret double [[R]] +; +entry: + %r = call double @sin(double 1.0) + ret double %r +} + +declare double @frexp(double, i32*) + +define double @call_frexp_libfunc_nointrinsic(double %0, i32* nocapture %1) sanitize_numericalstability { +; DQQ-LABEL: @call_frexp_libfunc_nointrinsic( +; DQQ-NEXT: [[TMP3:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: [[TMP4:%.*]] = icmp eq i64 [[TMP3]], ptrtoint (double (double, i32*)* @call_frexp_libfunc_nointrinsic to i64) +; DQQ-NEXT: [[TMP5:%.*]] = load fp128, fp128* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to fp128*), align 1 +; DQQ-NEXT: [[TMP6:%.*]] = fpext double [[TMP0:%.*]] to fp128 +; DQQ-NEXT: [[TMP7:%.*]] = select i1 [[TMP4]], fp128 [[TMP5]], fp128 [[TMP6]] +; DQQ-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: [[TMP8:%.*]] = call i32 @__nsan_internal_check_double_q(double [[TMP0]], fp128 [[TMP7]], i32 2, i64 0) +; DQQ-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 +; DQQ-NEXT: [[TMP10:%.*]] = fpext double [[TMP0]] to fp128 +; DQQ-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], fp128 [[TMP10]], fp128 [[TMP7]] +; DQQ-NEXT: [[TMP12:%.*]] = tail call double @frexp(double [[TMP0]], i32* [[TMP1:%.*]]) +; DQQ-NEXT: [[TMP13:%.*]] = load i64, i64* @__nsan_shadow_ret_tag, align 8 +; DQQ-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], ptrtoint (double (double, i32*)* @frexp to i64) +; DQQ-NEXT: [[TMP15:%.*]] = load fp128, fp128* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to fp128*), align 16 +; DQQ-NEXT: [[TMP16:%.*]] = fpext double [[TMP12]] to fp128 +; DQQ-NEXT: [[TMP17:%.*]] = select i1 [[TMP14]], fp128 [[TMP15]], fp128 [[TMP16]] +; DQQ-NEXT: [[TMP18:%.*]] = call i32 @__nsan_internal_check_double_q(double [[TMP12]], fp128 [[TMP17]], i32 1, i64 0) +; DQQ-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 1 +; DQQ-NEXT: [[TMP20:%.*]] = fpext double [[TMP12]] to fp128 +; DQQ-NEXT: [[TMP21:%.*]] = select i1 [[TMP19]], fp128 [[TMP20]], fp128 [[TMP17]] +; DQQ-NEXT: store i64 ptrtoint (double (double, i32*)* @call_frexp_libfunc_nointrinsic to i64), i64* @__nsan_shadow_ret_tag, align 8 +; DQQ-NEXT: store fp128 [[TMP21]], fp128* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to fp128*), align 16 +; DQQ-NEXT: ret double [[TMP12]] +; +; DLQ-LABEL: @call_frexp_libfunc_nointrinsic( +; DLQ-NEXT: [[TMP3:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: [[TMP4:%.*]] = icmp eq i64 [[TMP3]], ptrtoint (double (double, i32*)* @call_frexp_libfunc_nointrinsic to i64) +; DLQ-NEXT: [[TMP5:%.*]] = load x86_fp80, x86_fp80* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to x86_fp80*), align 1 +; DLQ-NEXT: [[TMP6:%.*]] = fpext double [[TMP0:%.*]] to x86_fp80 +; DLQ-NEXT: [[TMP7:%.*]] = select i1 [[TMP4]], x86_fp80 [[TMP5]], x86_fp80 [[TMP6]] +; DLQ-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: [[TMP8:%.*]] = call i32 @__nsan_internal_check_double_l(double [[TMP0]], x86_fp80 [[TMP7]], i32 2, i64 0) +; DLQ-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 +; DLQ-NEXT: [[TMP10:%.*]] = fpext double [[TMP0]] to x86_fp80 +; DLQ-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], x86_fp80 [[TMP10]], x86_fp80 [[TMP7]] +; DLQ-NEXT: [[TMP12:%.*]] = tail call double @frexp(double [[TMP0]], i32* [[TMP1:%.*]]) +; DLQ-NEXT: [[TMP13:%.*]] = load i64, i64* @__nsan_shadow_ret_tag, align 8 +; DLQ-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], ptrtoint (double (double, i32*)* @frexp to i64) +; DLQ-NEXT: [[TMP15:%.*]] = load x86_fp80, x86_fp80* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to x86_fp80*), align 16 +; DLQ-NEXT: [[TMP16:%.*]] = fpext double [[TMP12]] to x86_fp80 +; DLQ-NEXT: [[TMP17:%.*]] = select i1 [[TMP14]], x86_fp80 [[TMP15]], x86_fp80 [[TMP16]] +; DLQ-NEXT: [[TMP18:%.*]] = call i32 @__nsan_internal_check_double_l(double [[TMP12]], x86_fp80 [[TMP17]], i32 1, i64 0) +; DLQ-NEXT: [[TMP19:%.*]] = icmp eq i32 [[TMP18]], 1 +; DLQ-NEXT: [[TMP20:%.*]] = fpext double [[TMP12]] to x86_fp80 +; DLQ-NEXT: [[TMP21:%.*]] = select i1 [[TMP19]], x86_fp80 [[TMP20]], x86_fp80 [[TMP17]] +; DLQ-NEXT: store i64 ptrtoint (double (double, i32*)* @call_frexp_libfunc_nointrinsic to i64), i64* @__nsan_shadow_ret_tag, align 8 +; DLQ-NEXT: store x86_fp80 [[TMP21]], x86_fp80* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to x86_fp80*), align 16 +; DLQ-NEXT: ret double [[TMP12]] +; + %3 = tail call double @frexp(double %0, i32* %1) + ret double %3 +} + +define float @call_fn_taking_float_by_fn_ptr(float (float)* nocapture %fn_ptr) sanitize_numericalstability { +; CHECK-LABEL: @call_fn_taking_float_by_fn_ptr( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint float (float)* [[FN_PTR:%.*]] to i64 +; CHECK-NEXT: store i64 [[TMP0]], i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: store double 1.000000e+00, double* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to double*), align 1 +; CHECK-NEXT: [[R:%.*]] = call float [[FN_PTR]](float 1.000000e+00) +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint float (float)* [[FN_PTR]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = load double, double* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to double*), align 8 +; CHECK-NEXT: [[TMP5:%.*]] = fpext float [[R]] to double +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], double [[TMP4]], double [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP6]], i32 1, i64 0) +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = fpext float [[R]] to double +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP8]], double [[TMP9]], double [[TMP6]] +; CHECK-NEXT: store i64 ptrtoint (float (float (float)*)* @call_fn_taking_float_by_fn_ptr to i64), i64* @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP10]], double* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to double*), align 8 +; CHECK-NEXT: ret float [[R]] +; +entry: + %r = call float %fn_ptr(float 1.0) + ret float %r +} + +define void @store_float(float* %dst) sanitize_numericalstability { +; CHECK-LABEL: @store_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[DST:%.*]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_store(i8* [[TMP0]], i64 1) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to double* +; CHECK-NEXT: store double 4.200000e+01, double* [[TMP2]], align 1 +; CHECK-NEXT: store float 4.200000e+01, float* [[DST]], align 1 +; CHECK-NEXT: ret void +; +entry: + store float 42.0, float* %dst, align 1 + ret void +} + +define void @store_non_float(i32* %dst) sanitize_numericalstability { +; CHECK-LABEL: @store_non_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 42, i32* [[DST:%.*]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[DST]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[DST]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_store(i8* [[TMP1]], i64 1) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to double* +; CHECK-NEXT: store double 0x36F5000000000000, double* [[TMP3]], align 1 +; CHECK-NEXT: ret void +; +entry: + store i32 42, i32* %dst, align 1 + ret void +} + +define i1 @inline_asm(double %0) sanitize_numericalstability { +; DQQ-LABEL: @inline_asm( +; DQQ-NEXT: entry: +; DQQ-NEXT: [[TMP1:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (i1 (double)* @inline_asm to i64) +; DQQ-NEXT: [[TMP3:%.*]] = load fp128, fp128* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to fp128*), align 1 +; DQQ-NEXT: [[TMP4:%.*]] = fpext double [[TMP0:%.*]] to fp128 +; DQQ-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], fp128 [[TMP3]], fp128 [[TMP4]] +; DQQ-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: [[TMP6:%.*]] = call i32 asm "pmovmskb $1, $0", "=r,x,~{dirflag},~{fpsr},~{flags}"(double [[TMP0]]) +; DQQ-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8 +; DQQ-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], 0 +; DQQ-NEXT: ret i1 [[TMP8]] +; +; DLQ-LABEL: @inline_asm( +; DLQ-NEXT: entry: +; DLQ-NEXT: [[TMP1:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (i1 (double)* @inline_asm to i64) +; DLQ-NEXT: [[TMP3:%.*]] = load x86_fp80, x86_fp80* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to x86_fp80*), align 1 +; DLQ-NEXT: [[TMP4:%.*]] = fpext double [[TMP0:%.*]] to x86_fp80 +; DLQ-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], x86_fp80 [[TMP3]], x86_fp80 [[TMP4]] +; DLQ-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: [[TMP6:%.*]] = call i32 asm "pmovmskb $1, $0", "=r,x,~{dirflag},~{fpsr},~{flags}"(double [[TMP0]]) +; DLQ-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i8 +; DLQ-NEXT: [[TMP8:%.*]] = icmp slt i8 [[TMP7]], 0 +; DLQ-NEXT: ret i1 [[TMP8]] +; +entry: + %1 = call i32 asm "pmovmskb $1, $0", "=r,x,~{dirflag},~{fpsr},~{flags}"(double %0) + %2 = trunc i32 %1 to i8 + %3 = icmp slt i8 %2, 0 + ret i1 %3 +} + +define void @vector_extract(<2 x float> %0) sanitize_numericalstability { +; CHECK-LABEL: @vector_extract( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (void (<2 x float>)* @vector_extract to i64) +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to <2 x double>*), align 1 +; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[TMP0:%.*]] to <2 x double> +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], <2 x double> [[TMP3]], <2 x double> [[TMP4]] +; CHECK-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 +; CHECK-NEXT: ret void +; +entry: + %1 = extractelement <2 x float> %0, i32 1 + ret void +} + +define void @vector_insert(<2 x float> %0) sanitize_numericalstability { +; CHECK-LABEL: @vector_insert( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (void (<2 x float>)* @vector_insert to i64) +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to <2 x double>*), align 1 +; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[TMP0:%.*]] to <2 x double> +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], <2 x double> [[TMP3]], <2 x double> [[TMP4]] +; CHECK-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP0]], float 1.000000e+00, i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP5]], double 1.000000e+00, i32 1 +; CHECK-NEXT: ret void +; +entry: + %1 = insertelement <2 x float> %0, float 1.0, i32 1 + ret void +} + + +define void @vector_shuffle(<2 x float> %0) sanitize_numericalstability { +; CHECK-LABEL: @vector_shuffle( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (void (<2 x float>)* @vector_shuffle to i64) +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to <2 x double>*), align 1 +; CHECK-NEXT: [[TMP4:%.*]] = fpext <2 x float> [[TMP0:%.*]] to <2 x double> +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], <2 x double> [[TMP3]], <2 x double> [[TMP4]] +; CHECK-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> , <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP5]], <2 x double> , <2 x i32> +; CHECK-NEXT: ret void +; +entry: + %1 = shufflevector <2 x float> %0, <2 x float> , <2 x i32> + ret void +} + +define void @aggregate_extract({i32, {float, i1}} %0) sanitize_numericalstability { +; CHECK-LABEL: @aggregate_extract( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, { float, i1 } } [[TMP0:%.*]], 1, 0 +; CHECK-NEXT: [[TMP2:%.*]] = fpext float [[TMP1]] to double +; CHECK-NEXT: ret void +; +entry: + %1 = extractvalue {i32, {float, i1}} %0, 1, 0 + ret void +} + +define void @aggregate_insert({i32, {float, i1}} %0, float %1) sanitize_numericalstability { +; CHECK-LABEL: @aggregate_insert( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[TMP2]], ptrtoint (void ({ i32, { float, i1 } }, float)* @aggregate_insert to i64) +; CHECK-NEXT: [[TMP4:%.*]] = load double, double* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to double*), align 1 +; CHECK-NEXT: [[TMP5:%.*]] = fpext float [[TMP1:%.*]] to double +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP3]], double [[TMP4]], double [[TMP5]] +; CHECK-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP1]], double [[TMP6]], i32 5, i64 0) +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = fpext float [[TMP1]] to double +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP8]], double [[TMP9]], double [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = insertvalue { i32, { float, i1 } } [[TMP0:%.*]], float [[TMP1]], 1, 0 +; CHECK-NEXT: ret void +; +entry: + %2 = insertvalue {i32, {float, i1}} %0, float %1, 1, 0 + ret void +} + +define void @aggregate_insert_avoid_const_check({i32, {float, i1}} %0) sanitize_numericalstability { +; CHECK-LABEL: @aggregate_insert_avoid_const_check( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { i32, { float, i1 } } [[TMP0:%.*]], float 1.000000e+00, 1, 0 +; CHECK-NEXT: ret void +; +entry: + %1 = insertvalue {i32, {float, i1}} %0, float 1.0, 1, 0 + ret void +} + + +declare float @fabsf(float) + +define float @sub_fabs(float %a, float %b) sanitize_numericalstability { +; CHECK-LABEL: @sub_fabs( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (float (float, float)* @sub_fabs to i64) +; CHECK-NEXT: [[TMP2:%.*]] = load double, double* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to double*), align 1 +; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[A:%.*]] to double +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = load double, double* bitcast (i8* getelementptr inbounds ([16384 x i8], [16384 x i8]* @__nsan_shadow_args_ptr, i64 0, i64 8) to double*), align 1 +; CHECK-NEXT: [[TMP6:%.*]] = fpext float [[B:%.*]] to double +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP1]], double [[TMP5]], double [[TMP6]] +; CHECK-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[S:%.*]] = fsub float [[A]], [[B]] +; CHECK-NEXT: [[TMP8:%.*]] = fsub double [[TMP4]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = call i32 @__nsan_internal_check_float_d(float [[S]], double [[TMP8]], i32 2, i64 0) +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 1 +; CHECK-NEXT: [[TMP11:%.*]] = fpext float [[S]] to double +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP10]], double [[TMP11]], double [[TMP8]] +; CHECK-NEXT: [[R:%.*]] = call float @fabsf(float [[S]]) #4 +; CHECK-NEXT: [[TMP13:%.*]] = call double @llvm.fabs.f64(double [[TMP8]]) +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP13]], i32 1, i64 0) +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 1 +; CHECK-NEXT: [[TMP16:%.*]] = fpext float [[R]] to double +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], double [[TMP16]], double [[TMP13]] +; CHECK-NEXT: store i64 ptrtoint (float (float, float)* @sub_fabs to i64), i64* @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP17]], double* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to double*), align 8 +; CHECK-NEXT: ret float [[R]] +; +entry: + %s = fsub float %a, %b + %r = call float @fabsf(float %s) + ret float %r +} + +; Note that the `unsafe-fp-math` from the function attributes should be moved to +; individual instructions, with the shadow instructions NOT getting the attribute. +define float @param_add_return_float_unsafe_fp_math(float %a) #0 { +; CHECK-LABEL: @param_add_return_float_unsafe_fp_math( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (float (float)* @param_add_return_float_unsafe_fp_math to i64) +; CHECK-NEXT: [[TMP2:%.*]] = load double, double* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to double*), align 1 +; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[A:%.*]] to double +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]] +; CHECK-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[B:%.*]] = fadd fast float [[A]], 1.000000e+00 +; CHECK-NEXT: [[TMP5:%.*]] = fadd double [[TMP4]], 1.000000e+00 +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__nsan_internal_check_float_d(float [[B]], double [[TMP5]], i32 1, i64 0) +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = fpext float [[B]] to double +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP7]], double [[TMP8]], double [[TMP5]] +; CHECK-NEXT: store i64 ptrtoint (float (float)* @param_add_return_float_unsafe_fp_math to i64), i64* @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP9]], double* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to double*), align 8 +; CHECK-NEXT: ret float [[B]] +; +entry: + %b = fadd float %a, 1.0 + ret float %b +} + + +define void @truncate(<2 x double> %0) sanitize_numericalstability { +; DQQ-LABEL: @truncate( +; DQQ-NEXT: entry: +; DQQ-NEXT: [[TMP1:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (void (<2 x double>)* @truncate to i64) +; DQQ-NEXT: [[TMP3:%.*]] = load <2 x fp128>, <2 x fp128>* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to <2 x fp128>*), align 1 +; DQQ-NEXT: [[TMP4:%.*]] = fpext <2 x double> [[TMP0:%.*]] to <2 x fp128> +; DQQ-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], <2 x fp128> [[TMP3]], <2 x fp128> [[TMP4]] +; DQQ-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: [[TMP6:%.*]] = fptrunc <2 x double> [[TMP0]] to <2 x float> +; DQQ-NEXT: [[TMP7:%.*]] = fptrunc <2 x fp128> [[TMP5]] to <2 x double> +; DQQ-NEXT: ret void +; +; DLQ-LABEL: @truncate( +; DLQ-NEXT: entry: +; DLQ-NEXT: [[TMP1:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (void (<2 x double>)* @truncate to i64) +; DLQ-NEXT: [[TMP3:%.*]] = load <2 x x86_fp80>, <2 x x86_fp80>* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to <2 x x86_fp80>*), align 1 +; DLQ-NEXT: [[TMP4:%.*]] = fpext <2 x double> [[TMP0:%.*]] to <2 x x86_fp80> +; DLQ-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], <2 x x86_fp80> [[TMP3]], <2 x x86_fp80> [[TMP4]] +; DLQ-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: [[TMP6:%.*]] = fptrunc <2 x double> [[TMP0]] to <2 x float> +; DLQ-NEXT: [[TMP7:%.*]] = fptrunc <2 x x86_fp80> [[TMP5]] to <2 x double> +; DLQ-NEXT: ret void +; +entry: + %1 = fptrunc <2 x double> %0 to <2 x float> + ret void +} + +define void @unaryop(float %a) sanitize_numericalstability { +; CHECK-LABEL: @unaryop( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (void (float)* @unaryop to i64) +; CHECK-NEXT: [[TMP2:%.*]] = load double, double* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to double*), align 1 +; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[A:%.*]] to double +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]] +; CHECK-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[C:%.*]] = fneg float [[A]] +; CHECK-NEXT: [[TMP5:%.*]] = fneg double [[TMP4]] +; CHECK-NEXT: ret void +; +entry: + %c = fneg float %a + ret void +} + + +attributes #0 = { nounwind readonly uwtable sanitize_numericalstability "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } + diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/cfg.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/cfg.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/cfg.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -nsan -nsan-shadow-type-mapping=dqq -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Tests with more involved control flow to check lazy construction of the +; shadow values. + +define float @forward_use() sanitize_numericalstability { +; CHECK-LABEL: @forward_use( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BLOCK1:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[D:%.*]] = fadd float [[B:%.*]], 2.000000e+00 +; CHECK-NEXT: [[TMP0:%.*]] = fadd double [[TMP2:%.*]], 2.000000e+00 +; CHECK-NEXT: br label [[BLOCK1]] +; CHECK: block1: +; CHECK-NEXT: [[A:%.*]] = phi float [ [[D]], [[LOOP:%.*]] ], [ 1.000000e+00, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[TMP0]], [[LOOP]] ], [ 1.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: [[B]] = fadd float [[A]], 1.000000e+00 +; CHECK-NEXT: [[TMP2]] = fadd double [[TMP1]], 1.000000e+00 +; CHECK-NEXT: br label [[LOOP]] +; + +entry: + br label %block1 + +loop: + %d = fadd float %b, 2.0 ; this is a forward reference, requiring shadow(%b) to be available. + br label %block1 + +block1: + %a = phi float [ %d, %loop], [ 1.0, %entry ] + %b = fadd float %a, 1.0 + br label %loop +} + +define float @forward_use_with_load(float* %p) sanitize_numericalstability { +; CHECK-LABEL: @forward_use_with_load( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BLOCK1:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[D:%.*]] = fadd float [[B:%.*]], 2.000000e+00 +; CHECK-NEXT: [[TMP0:%.*]] = fadd double [[TMP12:%.*]], 2.000000e+00 +; CHECK-NEXT: br label [[BLOCK1]] +; CHECK: block1: +; CHECK-NEXT: [[A:%.*]] = phi float [ [[D]], [[LOOP:%.*]] ], [ 1.000000e+00, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[TMP0]], [[LOOP]] ], [ 1.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: [[L:%.*]] = load float, float* [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[P]] to i8* +; CHECK-NEXT: [[TMP3:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_load(i8* [[TMP2]], i64 1) +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i8* [[TMP3]], null +; CHECK-NEXT: br i1 [[TMP4]], label [[TMP8:%.*]], label [[TMP5:%.*]] +; CHECK: 5: +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP3]] to double* +; CHECK-NEXT: [[TMP7:%.*]] = load double, double* [[TMP6]], align 1 +; CHECK-NEXT: br label [[TMP10:%.*]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = fpext float [[L]] to double +; CHECK-NEXT: br label [[TMP10]] +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = phi double [ [[TMP7]], [[TMP5]] ], [ [[TMP9]], [[TMP8]] ] +; CHECK-NEXT: [[B]] = fadd float [[L]], 1.000000e+00 +; CHECK-NEXT: [[TMP12]] = fadd double [[TMP11]], 1.000000e+00 +; CHECK-NEXT: br label [[LOOP]] +; + +entry: + br label %block1 + +loop: + %d = fadd float %b, 2.0 ; this is a forward reference, requiring shadow(%b) to be available. + br label %block1 + +block1: + %a = phi float [ %d, %loop], [ 1.0, %entry ] + %l = load float, float* %p ; the load creates a new block + %b = fadd float %l, 1.0 ; this requires shadow(%l). + br label %loop +} + +define float @forward_use_with_two_uses() sanitize_numericalstability { +; CHECK-LABEL: @forward_use_with_two_uses( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BLOCK1:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[D:%.*]] = fadd float [[B:%.*]], 2.000000e+00 +; CHECK-NEXT: [[TMP0:%.*]] = fadd double [[TMP4:%.*]], 2.000000e+00 +; CHECK-NEXT: br label [[BLOCK1]] +; CHECK: block1: +; CHECK-NEXT: [[A:%.*]] = phi float [ [[D]], [[LOOP:%.*]] ], [ 1.000000e+00, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi double [ [[TMP0]], [[LOOP]] ], [ 1.000000e+00, [[ENTRY]] ] +; CHECK-NEXT: [[T1:%.*]] = fadd float [[A]], 1.000000e+00 +; CHECK-NEXT: [[TMP2:%.*]] = fadd double [[TMP1]], 1.000000e+00 +; CHECK-NEXT: [[T2:%.*]] = fadd float [[T1]], 3.000000e+00 +; CHECK-NEXT: [[TMP3:%.*]] = fadd double [[TMP2]], 3.000000e+00 +; CHECK-NEXT: [[B]] = fadd float [[T1]], [[T2]] +; CHECK-NEXT: [[TMP4]] = fadd double [[TMP2]], [[TMP3]] +; CHECK-NEXT: br label [[LOOP]] +; + +entry: + br label %block1 + +loop: + %d = fadd float %b, 2.0 ; this is a forward reference, requiring shadow(%b) to be available. + br label %block1 + +block1: + %a = phi float [ %d, %loop], [ 1.0, %entry ] + %t1 = fadd float %a, 1.0 + %t2 = fadd float %t1, 3.0 ; this requires shadow(%t1) + %b = fadd float %t1, %t2 ; this requires shadow(%t2) and shadow(%t1). + br label %loop +} diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/fcmp.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/fcmp.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/fcmp.ll @@ -0,0 +1,194 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -nsan -nsan-shadow-type-mapping=dqq -nsan-truncate-fcmp-eq=false -S | FileCheck %s --check-prefixes=CHECK,DQQ +; RUN: opt < %s -nsan -nsan-shadow-type-mapping=dlq -nsan-truncate-fcmp-eq=false -S | FileCheck %s --check-prefixes=CHECK,DLQ +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Scalar float comparison: `a > b`. +define i1 @scalar_fcmp(double %a) sanitize_numericalstability { +; DQQ-LABEL: @scalar_fcmp( +; DQQ-NEXT: entry: +; DQQ-NEXT: [[TMP0:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (i1 (double)* @scalar_fcmp to i64) +; DQQ-NEXT: [[TMP2:%.*]] = load fp128, fp128* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to fp128*), align 1 +; DQQ-NEXT: [[TMP3:%.*]] = fpext double [[A:%.*]] to fp128 +; DQQ-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], fp128 [[TMP2]], fp128 [[TMP3]] +; DQQ-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: [[R:%.*]] = fcmp oeq double [[A]], 1.000000e+00 +; DQQ-NEXT: [[TMP5:%.*]] = fcmp oeq fp128 [[TMP4]], 0xL00000000000000003FFF000000000000 +; DQQ-NEXT: [[TMP6:%.*]] = icmp eq i1 [[R]], [[TMP5]] +; DQQ-NEXT: br i1 [[TMP6]], label [[TMP8:%.*]], label [[TMP7:%.*]] +; DQQ: 7: +; DQQ-NEXT: call void @__nsan_fcmp_fail_double_q(double [[A]], double 1.000000e+00, fp128 [[TMP4]], fp128 0xL00000000000000003FFF000000000000, i32 1, i1 [[R]], i1 [[TMP5]]) +; DQQ-NEXT: br label [[TMP8]] +; DQQ: 8: +; DQQ-NEXT: ret i1 [[R]] +; +; DLQ-LABEL: @scalar_fcmp( +; DLQ-NEXT: entry: +; DLQ-NEXT: [[TMP0:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (i1 (double)* @scalar_fcmp to i64) +; DLQ-NEXT: [[TMP2:%.*]] = load x86_fp80, x86_fp80* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to x86_fp80*), align 1 +; DLQ-NEXT: [[TMP3:%.*]] = fpext double [[A:%.*]] to x86_fp80 +; DLQ-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], x86_fp80 [[TMP2]], x86_fp80 [[TMP3]] +; DLQ-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: [[R:%.*]] = fcmp oeq double [[A]], 1.000000e+00 +; DLQ-NEXT: [[TMP5:%.*]] = fcmp oeq x86_fp80 [[TMP4]], 0xK3FFF8000000000000000 +; DLQ-NEXT: [[TMP6:%.*]] = icmp eq i1 [[R]], [[TMP5]] +; DLQ-NEXT: br i1 [[TMP6]], label [[TMP8:%.*]], label [[TMP7:%.*]] +; DLQ: 7: +; DLQ-NEXT: call void @__nsan_fcmp_fail_double_l(double [[A]], double 1.000000e+00, x86_fp80 [[TMP4]], x86_fp80 0xK3FFF8000000000000000, i32 1, i1 [[R]], i1 [[TMP5]]) +; DLQ-NEXT: br label [[TMP8]] +; DLQ: 8: +; DLQ-NEXT: ret i1 [[R]] +; +entry: + %r = fcmp oeq double %a, 1.0 + ret i1 %r +} + +; Vector float comparison. +define <4 x i1> @vector_fcmp(<4 x double> %a, <4 x double> %b) sanitize_numericalstability { +; DQQ-LABEL: @vector_fcmp( +; DQQ-NEXT: entry: +; DQQ-NEXT: [[TMP0:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (<4 x i1> (<4 x double>, <4 x double>)* @vector_fcmp to i64) +; DQQ-NEXT: [[TMP2:%.*]] = load <4 x fp128>, <4 x fp128>* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to <4 x fp128>*), align 1 +; DQQ-NEXT: [[TMP3:%.*]] = fpext <4 x double> [[A:%.*]] to <4 x fp128> +; DQQ-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], <4 x fp128> [[TMP2]], <4 x fp128> [[TMP3]] +; DQQ-NEXT: [[TMP5:%.*]] = load <4 x fp128>, <4 x fp128>* bitcast (i8* getelementptr inbounds ([16384 x i8], [16384 x i8]* @__nsan_shadow_args_ptr, i64 0, i64 64) to <4 x fp128>*), align 1 +; DQQ-NEXT: [[TMP6:%.*]] = fpext <4 x double> [[B:%.*]] to <4 x fp128> +; DQQ-NEXT: [[TMP7:%.*]] = select i1 [[TMP1]], <4 x fp128> [[TMP5]], <4 x fp128> [[TMP6]] +; DQQ-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; DQQ-NEXT: [[R:%.*]] = fcmp oeq <4 x double> [[A]], [[B]] +; DQQ-NEXT: [[TMP8:%.*]] = fcmp oeq <4 x fp128> [[TMP4]], [[TMP7]] +; DQQ-NEXT: [[TMP9:%.*]] = icmp eq <4 x i1> [[R]], [[TMP8]] +; DQQ-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP9]]) +; DQQ-NEXT: br i1 [[TMP10]], label [[TMP36:%.*]], label [[TMP11:%.*]] +; DQQ: 11: +; DQQ-NEXT: [[TMP12:%.*]] = extractelement <4 x double> [[A]], i64 0 +; DQQ-NEXT: [[TMP13:%.*]] = extractelement <4 x double> [[B]], i64 0 +; DQQ-NEXT: [[TMP14:%.*]] = extractelement <4 x fp128> [[TMP4]], i64 0 +; DQQ-NEXT: [[TMP15:%.*]] = extractelement <4 x fp128> [[TMP7]], i64 0 +; DQQ-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[R]], i64 0 +; DQQ-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP8]], i64 0 +; DQQ-NEXT: call void @__nsan_fcmp_fail_double_q(double [[TMP12]], double [[TMP13]], fp128 [[TMP14]], fp128 [[TMP15]], i32 1, i1 [[TMP16]], i1 [[TMP17]]) +; DQQ-NEXT: [[TMP18:%.*]] = extractelement <4 x double> [[A]], i64 1 +; DQQ-NEXT: [[TMP19:%.*]] = extractelement <4 x double> [[B]], i64 1 +; DQQ-NEXT: [[TMP20:%.*]] = extractelement <4 x fp128> [[TMP4]], i64 1 +; DQQ-NEXT: [[TMP21:%.*]] = extractelement <4 x fp128> [[TMP7]], i64 1 +; DQQ-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[R]], i64 1 +; DQQ-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP8]], i64 1 +; DQQ-NEXT: call void @__nsan_fcmp_fail_double_q(double [[TMP18]], double [[TMP19]], fp128 [[TMP20]], fp128 [[TMP21]], i32 1, i1 [[TMP22]], i1 [[TMP23]]) +; DQQ-NEXT: [[TMP24:%.*]] = extractelement <4 x double> [[A]], i64 2 +; DQQ-NEXT: [[TMP25:%.*]] = extractelement <4 x double> [[B]], i64 2 +; DQQ-NEXT: [[TMP26:%.*]] = extractelement <4 x fp128> [[TMP4]], i64 2 +; DQQ-NEXT: [[TMP27:%.*]] = extractelement <4 x fp128> [[TMP7]], i64 2 +; DQQ-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[R]], i64 2 +; DQQ-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP8]], i64 2 +; DQQ-NEXT: call void @__nsan_fcmp_fail_double_q(double [[TMP24]], double [[TMP25]], fp128 [[TMP26]], fp128 [[TMP27]], i32 1, i1 [[TMP28]], i1 [[TMP29]]) +; DQQ-NEXT: [[TMP30:%.*]] = extractelement <4 x double> [[A]], i64 3 +; DQQ-NEXT: [[TMP31:%.*]] = extractelement <4 x double> [[B]], i64 3 +; DQQ-NEXT: [[TMP32:%.*]] = extractelement <4 x fp128> [[TMP4]], i64 3 +; DQQ-NEXT: [[TMP33:%.*]] = extractelement <4 x fp128> [[TMP7]], i64 3 +; DQQ-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[R]], i64 3 +; DQQ-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP8]], i64 3 +; DQQ-NEXT: call void @__nsan_fcmp_fail_double_q(double [[TMP30]], double [[TMP31]], fp128 [[TMP32]], fp128 [[TMP33]], i32 1, i1 [[TMP34]], i1 [[TMP35]]) +; DQQ-NEXT: br label [[TMP36]] +; DQQ: 36: +; DQQ-NEXT: ret <4 x i1> [[R]] +; +; DLQ-LABEL: @vector_fcmp( +; DLQ-NEXT: entry: +; DLQ-NEXT: [[TMP0:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (<4 x i1> (<4 x double>, <4 x double>)* @vector_fcmp to i64) +; DLQ-NEXT: [[TMP2:%.*]] = load <4 x x86_fp80>, <4 x x86_fp80>* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to <4 x x86_fp80>*), align 1 +; DLQ-NEXT: [[TMP3:%.*]] = fpext <4 x double> [[A:%.*]] to <4 x x86_fp80> +; DLQ-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], <4 x x86_fp80> [[TMP2]], <4 x x86_fp80> [[TMP3]] +; DLQ-NEXT: [[TMP5:%.*]] = load <4 x x86_fp80>, <4 x x86_fp80>* bitcast (i8* getelementptr inbounds ([16384 x i8], [16384 x i8]* @__nsan_shadow_args_ptr, i64 0, i64 40) to <4 x x86_fp80>*), align 1 +; DLQ-NEXT: [[TMP6:%.*]] = fpext <4 x double> [[B:%.*]] to <4 x x86_fp80> +; DLQ-NEXT: [[TMP7:%.*]] = select i1 [[TMP1]], <4 x x86_fp80> [[TMP5]], <4 x x86_fp80> [[TMP6]] +; DLQ-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; DLQ-NEXT: [[R:%.*]] = fcmp oeq <4 x double> [[A]], [[B]] +; DLQ-NEXT: [[TMP8:%.*]] = fcmp oeq <4 x x86_fp80> [[TMP4]], [[TMP7]] +; DLQ-NEXT: [[TMP9:%.*]] = icmp eq <4 x i1> [[R]], [[TMP8]] +; DLQ-NEXT: [[TMP10:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP9]]) +; DLQ-NEXT: br i1 [[TMP10]], label [[TMP36:%.*]], label [[TMP11:%.*]] +; DLQ: 11: +; DLQ-NEXT: [[TMP12:%.*]] = extractelement <4 x double> [[A]], i64 0 +; DLQ-NEXT: [[TMP13:%.*]] = extractelement <4 x double> [[B]], i64 0 +; DLQ-NEXT: [[TMP14:%.*]] = extractelement <4 x x86_fp80> [[TMP4]], i64 0 +; DLQ-NEXT: [[TMP15:%.*]] = extractelement <4 x x86_fp80> [[TMP7]], i64 0 +; DLQ-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[R]], i64 0 +; DLQ-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP8]], i64 0 +; DLQ-NEXT: call void @__nsan_fcmp_fail_double_l(double [[TMP12]], double [[TMP13]], x86_fp80 [[TMP14]], x86_fp80 [[TMP15]], i32 1, i1 [[TMP16]], i1 [[TMP17]]) +; DLQ-NEXT: [[TMP18:%.*]] = extractelement <4 x double> [[A]], i64 1 +; DLQ-NEXT: [[TMP19:%.*]] = extractelement <4 x double> [[B]], i64 1 +; DLQ-NEXT: [[TMP20:%.*]] = extractelement <4 x x86_fp80> [[TMP4]], i64 1 +; DLQ-NEXT: [[TMP21:%.*]] = extractelement <4 x x86_fp80> [[TMP7]], i64 1 +; DLQ-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[R]], i64 1 +; DLQ-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP8]], i64 1 +; DLQ-NEXT: call void @__nsan_fcmp_fail_double_l(double [[TMP18]], double [[TMP19]], x86_fp80 [[TMP20]], x86_fp80 [[TMP21]], i32 1, i1 [[TMP22]], i1 [[TMP23]]) +; DLQ-NEXT: [[TMP24:%.*]] = extractelement <4 x double> [[A]], i64 2 +; DLQ-NEXT: [[TMP25:%.*]] = extractelement <4 x double> [[B]], i64 2 +; DLQ-NEXT: [[TMP26:%.*]] = extractelement <4 x x86_fp80> [[TMP4]], i64 2 +; DLQ-NEXT: [[TMP27:%.*]] = extractelement <4 x x86_fp80> [[TMP7]], i64 2 +; DLQ-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[R]], i64 2 +; DLQ-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP8]], i64 2 +; DLQ-NEXT: call void @__nsan_fcmp_fail_double_l(double [[TMP24]], double [[TMP25]], x86_fp80 [[TMP26]], x86_fp80 [[TMP27]], i32 1, i1 [[TMP28]], i1 [[TMP29]]) +; DLQ-NEXT: [[TMP30:%.*]] = extractelement <4 x double> [[A]], i64 3 +; DLQ-NEXT: [[TMP31:%.*]] = extractelement <4 x double> [[B]], i64 3 +; DLQ-NEXT: [[TMP32:%.*]] = extractelement <4 x x86_fp80> [[TMP4]], i64 3 +; DLQ-NEXT: [[TMP33:%.*]] = extractelement <4 x x86_fp80> [[TMP7]], i64 3 +; DLQ-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[R]], i64 3 +; DLQ-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP8]], i64 3 +; DLQ-NEXT: call void @__nsan_fcmp_fail_double_l(double [[TMP30]], double [[TMP31]], x86_fp80 [[TMP32]], x86_fp80 [[TMP33]], i32 1, i1 [[TMP34]], i1 [[TMP35]]) +; DLQ-NEXT: br label [[TMP36]] +; DLQ: 36: +; DLQ-NEXT: ret <4 x i1> [[R]] +; +entry: + %r = fcmp oeq <4 x double> %a, %b + ret <4 x i1> %r +} + +declare float @fabsf(float) + +; Basic scalar float comparison of absolute difference with 0: `fabs(a-b) > 0`. +define float @sub_cmp_fabs(float %a, float %b) sanitize_numericalstability { +; CHECK-LABEL: @sub_cmp_fabs( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], ptrtoint (float (float, float)* @sub_cmp_fabs to i64) +; CHECK-NEXT: [[TMP2:%.*]] = load double, double* bitcast ([16384 x i8]* @__nsan_shadow_args_ptr to double*), align 1 +; CHECK-NEXT: [[TMP3:%.*]] = fpext float [[A:%.*]] to double +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP1]], double [[TMP2]], double [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = load double, double* bitcast (i8* getelementptr inbounds ([16384 x i8], [16384 x i8]* @__nsan_shadow_args_ptr, i64 0, i64 8) to double*), align 1 +; CHECK-NEXT: [[TMP6:%.*]] = fpext float [[B:%.*]] to double +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP1]], double [[TMP5]], double [[TMP6]] +; CHECK-NEXT: store i64 0, i64* @__nsan_shadow_args_tag, align 8 +; CHECK-NEXT: [[S:%.*]] = fsub float [[A]], [[B]] +; CHECK-NEXT: [[TMP8:%.*]] = fsub double [[TMP4]], [[TMP7]] +; CHECK-NEXT: [[R:%.*]] = call float @fabsf(float [[S]]) [[ATTR4:#.*]] +; CHECK-NEXT: [[TMP9:%.*]] = call double @llvm.fabs.f64(double [[TMP8]]) +; CHECK-NEXT: [[C:%.*]] = fcmp oeq float [[R]], 2.500000e-01 +; CHECK-NEXT: [[TMP10:%.*]] = fcmp oeq double [[TMP9]], 2.500000e-01 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i1 [[C]], [[TMP10]] +; CHECK-NEXT: br i1 [[TMP11]], label [[TMP13:%.*]], label [[TMP12:%.*]] +; CHECK: 12: +; CHECK-NEXT: call void @__nsan_fcmp_fail_float_d(float [[R]], float 2.500000e-01, double [[TMP9]], double 2.500000e-01, i32 1, i1 [[C]], i1 [[TMP10]]) +; CHECK-NEXT: br label [[TMP13]] +; CHECK: 13: +; CHECK-NEXT: [[TMP14:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP9]], i32 1, i64 0) +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 1 +; CHECK-NEXT: [[TMP16:%.*]] = fpext float [[R]] to double +; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], double [[TMP16]], double [[TMP9]] +; CHECK-NEXT: store i64 ptrtoint (float (float, float)* @sub_cmp_fabs to i64), i64* @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP17]], double* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to double*), align 8 +; CHECK-NEXT: ret float [[R]] +; +entry: + %s = fsub float %a, %b + %r = call float @fabsf(float %s) + %c = fcmp oeq float %r, 0.25 + ret float %r +} diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/invoke.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/invoke.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/invoke.ll @@ -0,0 +1,148 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -nsan -nsan-shadow-type-mapping=dqq -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Tests for invoke instructions that require special handling of the phis. + +declare float @may_throw() + +declare void @personalityFn() + +define float @invoke1() sanitize_numericalstability personality void ()* @personalityFn { +; CHECK-LABEL: @invoke1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = invoke float @may_throw() +; CHECK-NEXT: to label [[TMP0:%.*]] unwind label [[LAND:%.*]] +; CHECK: 0: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (float ()* @may_throw to i64) +; CHECK-NEXT: [[TMP3:%.*]] = load double, double* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to double*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = fpext float [[C]] to double +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], double [[TMP3]], double [[TMP4]] +; CHECK-NEXT: br label [[CONTINUE:%.*]] +; CHECK: continue: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: land: +; CHECK-NEXT: [[RES:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: [[LV:%.*]] = uitofp i32 1 to float +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[R:%.*]] = phi float [ [[LV]], [[LAND]] ], [ [[C]], [[CONTINUE]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi double [ 1.000000e+00, [[LAND]] ], [ [[TMP5]], [[CONTINUE]] ] +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP6]], i32 1, i64 0) +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = fpext float [[R]] to double +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP8]], double [[TMP9]], double [[TMP6]] +; CHECK-NEXT: store i64 ptrtoint (float ()* @invoke1 to i64), i64* @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP10]], double* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to double*), align 8 +; CHECK-NEXT: ret float [[R]] +; + +entry: + %c = invoke float @may_throw() to label %continue unwind label %land + +continue: + br label %exit + +land: + %res = landingpad { i8*, i32 } cleanup + %lv = uitofp i32 1 to float + br label %exit + +exit: + %r = phi float [ %lv, %land], [ %c, %continue ] + ret float %r +} + +define float @invoke2() sanitize_numericalstability personality void ()* @personalityFn { +; CHECK-LABEL: @invoke2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = invoke float @may_throw() +; CHECK-NEXT: to label [[TMP0:%.*]] unwind label [[LAND:%.*]] +; CHECK: 0: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (float ()* @may_throw to i64) +; CHECK-NEXT: [[TMP3:%.*]] = load double, double* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to double*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = fpext float [[C]] to double +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], double [[TMP3]], double [[TMP4]] +; CHECK-NEXT: br label [[CONTINUE:%.*]] +; CHECK: continue: +; CHECK-NEXT: [[CV:%.*]] = fadd float [[C]], 2.000000e+00 +; CHECK-NEXT: [[TMP6:%.*]] = fadd double [[TMP5]], 2.000000e+00 +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: land: +; CHECK-NEXT: [[RES:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: [[LV:%.*]] = uitofp i32 1 to float +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[R:%.*]] = phi float [ [[LV]], [[LAND]] ], [ [[CV]], [[CONTINUE]] ] +; CHECK-NEXT: [[TMP7:%.*]] = phi double [ 1.000000e+00, [[LAND]] ], [ [[TMP6]], [[CONTINUE]] ] +; CHECK-NEXT: [[TMP8:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP7]], i32 1, i64 0) +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = fpext float [[R]] to double +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP9]], double [[TMP10]], double [[TMP7]] +; CHECK-NEXT: store i64 ptrtoint (float ()* @invoke2 to i64), i64* @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP11]], double* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to double*), align 8 +; CHECK-NEXT: ret float [[R]] +; + +entry: + %c = invoke float @may_throw() to label %continue unwind label %land + +continue: + %cv = fadd float %c, 2.0 + br label %exit + +land: + %res = landingpad { i8*, i32 } cleanup + %lv = uitofp i32 1 to float + br label %exit + +exit: + %r = phi float [ %lv, %land], [ %cv, %continue ] + ret float %r +} + +define float @invoke3() sanitize_numericalstability personality void ()* @personalityFn { +; CHECK-LABEL: @invoke3( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C:%.*]] = invoke float @may_throw() +; CHECK-NEXT: to label [[TMP0:%.*]] unwind label [[LAND:%.*]] +; CHECK: land: +; CHECK-NEXT: [[RES:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: [[LV:%.*]] = uitofp i32 1 to float +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: 0: +; CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], ptrtoint (float ()* @may_throw to i64) +; CHECK-NEXT: [[TMP3:%.*]] = load double, double* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to double*), align 8 +; CHECK-NEXT: [[TMP4:%.*]] = fpext float [[C]] to double +; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], double [[TMP3]], double [[TMP4]] +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[R:%.*]] = phi float [ [[LV]], [[LAND]] ], [ [[C]], [[TMP0]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi double [ 1.000000e+00, [[LAND]] ], [ [[TMP5]], [[TMP0]] ] +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @__nsan_internal_check_float_d(float [[R]], double [[TMP6]], i32 1, i64 0) +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1 +; CHECK-NEXT: [[TMP9:%.*]] = fpext float [[R]] to double +; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP8]], double [[TMP9]], double [[TMP6]] +; CHECK-NEXT: store i64 ptrtoint (float ()* @invoke3 to i64), i64* @__nsan_shadow_ret_tag, align 8 +; CHECK-NEXT: store double [[TMP10]], double* bitcast ([128 x i8]* @__nsan_shadow_ret_ptr to double*), align 8 +; CHECK-NEXT: ret float [[R]] +; + +entry: + %c = invoke float @may_throw() to label %exit unwind label %land + +land: + %res = landingpad { i8*, i32 } cleanup + %lv = uitofp i32 1 to float + br label %exit + +exit: + %r = phi float [ %lv, %land], [ %c, %entry ] + ret float %r +} diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/memory.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/memory.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/memory.ll @@ -0,0 +1,476 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=x86_64-linux-gnu < %s -nsan -nsan-shadow-type-mapping=dqq -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +; Tests with memory manipulation (memcpy, llvm.memcpy, ...). + + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) + +define void @call_memcpy_intrinsic(i8* nonnull align 8 dereferenceable(16) %a, i8* nonnull align 8 dereferenceable(16) %b) sanitize_numericalstability { +; CHECK-LABEL: @call_memcpy_intrinsic( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @__nsan_copy_values(i8* [[A:%.*]], i8* [[B:%.*]], i64 16) +; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(16) [[A]], i8* nonnull align 8 dereferenceable(16) [[B]], i64 16, i1 false) +; CHECK-NEXT: ret void +; +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(16) %a, i8* nonnull align 8 dereferenceable(16) %b, i64 16, i1 false) + ret void +} + +declare dso_local i8* @memcpy(i8*, i8*, i64) local_unnamed_addr + +define void @call_memcpy(i8* nonnull align 8 dereferenceable(16) %a, i8* nonnull align 8 dereferenceable(16) %b) sanitize_numericalstability { +; CHECK-LABEL: @call_memcpy( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = tail call i8* @memcpy(i8* nonnull align 8 dereferenceable(16) [[A:%.*]], i8* nonnull align 8 dereferenceable(16) [[B:%.*]], i64 16) #3 +; CHECK-NEXT: ret void +; +entry: + tail call i8* @memcpy(i8* nonnull align 8 dereferenceable(16) %a, i8* nonnull align 8 dereferenceable(16) %b, i64 16) + ret void +} + + +define void @transfer_float(float* %dst, float* %src) sanitize_numericalstability { +; CHECK-LABEL: @transfer_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T:%.*]] = load float, float* [[SRC:%.*]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_load(i8* [[TMP0]], i64 1) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8* [[TMP1]], null +; CHECK-NEXT: br i1 [[TMP2]], label [[TMP6:%.*]], label [[TMP3:%.*]] +; CHECK: 3: +; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP1]] to double* +; CHECK-NEXT: [[TMP5:%.*]] = load double, double* [[TMP4]], align 1 +; CHECK-NEXT: br label [[TMP8:%.*]] +; CHECK: 6: +; CHECK-NEXT: [[TMP7:%.*]] = fpext float [[T]] to double +; CHECK-NEXT: br label [[TMP8]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = phi double [ [[TMP5]], [[TMP3]] ], [ [[TMP7]], [[TMP6]] ] +; CHECK-NEXT: [[TMP10:%.*]] = bitcast float* [[DST:%.*]] to i8* +; CHECK-NEXT: [[TMP11:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_store(i8* [[TMP10]], i64 1) +; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint float* [[DST]] to i64 +; CHECK-NEXT: [[TMP13:%.*]] = call i32 @__nsan_internal_check_float_d(float [[T]], double [[TMP9]], i32 4, i64 [[TMP12]]) +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = fpext float [[T]] to double +; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP14]], double [[TMP15]], double [[TMP9]] +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP11]] to double* +; CHECK-NEXT: store double [[TMP16]], double* [[TMP17]], align 1 +; CHECK-NEXT: store float [[T]], float* [[DST]], align 1 +; CHECK-NEXT: ret void +; +entry: + %t = load float, float* %src + store float %t, float* %dst, align 1 + ret void +} + +define void @transfer_non_float(i32* %dst, i32* %src) sanitize_numericalstability { +; CHECK-LABEL: @transfer_non_float( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[T:%.*]] = load i32, i32* [[SRC:%.*]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @__nsan_internal_get_raw_shadow_type_ptr(i8* [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = call i8* @__nsan_internal_get_raw_shadow_ptr(i8* [[TMP0]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i64* +; CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 1 +; CHECK-NEXT: store i32 [[T]], i32* [[DST:%.*]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[DST]] to i8* +; CHECK-NEXT: [[TMP8:%.*]] = call i8* @__nsan_internal_get_raw_shadow_type_ptr(i8* [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* +; CHECK-NEXT: store i32 [[TMP3]], i32* [[TMP9]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = call i8* @__nsan_internal_get_raw_shadow_ptr(i8* [[TMP7]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i64* +; CHECK-NEXT: store i64 [[TMP6]], i64* [[TMP11]], align 1 +; CHECK-NEXT: ret void +; +entry: + %t = load i32, i32* %src + store i32 %t, i32* %dst, align 1 + ret void +} + +define void @transfer_array([2 x float]* %a) sanitize_numericalstability { +; CHECK-LABEL: @transfer_array( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[B:%.*]] = load [2 x float], [2 x float]* [[A:%.*]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x float]* [[A]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = call i8* @__nsan_internal_get_raw_shadow_type_ptr(i8* [[TMP0]]) +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i64* +; CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = call i8* @__nsan_internal_get_raw_shadow_ptr(i8* [[TMP0]]) +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to i128* +; CHECK-NEXT: [[TMP6:%.*]] = load i128, i128* [[TMP5]], align 1 +; CHECK-NEXT: store [2 x float] [[B]], [2 x float]* [[A]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast [2 x float]* [[A]] to i8* +; CHECK-NEXT: [[TMP8:%.*]] = call i8* @__nsan_internal_get_raw_shadow_type_ptr(i8* [[TMP7]]) +; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i64* +; CHECK-NEXT: store i64 [[TMP3]], i64* [[TMP9]], align 1 +; CHECK-NEXT: [[TMP10:%.*]] = call i8* @__nsan_internal_get_raw_shadow_ptr(i8* [[TMP7]]) +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8* [[TMP10]] to i128* +; CHECK-NEXT: store i128 [[TMP6]], i128* [[TMP11]], align 1 +; CHECK-NEXT: ret void +; +entry: + %b = load [2 x float], [2 x float]* %a, align 1 + store [2 x float] %b, [2 x float]* %a, align 1 + ret void +} + +define void @swap_untyped1(i64* nonnull align 8 %p, i64* nonnull align 8 %q) sanitize_numericalstability { +; CHECK-LABEL: @swap_untyped1( +; CHECK-NEXT: [[QV:%.*]] = load i64, i64* [[Q:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[Q]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = call i8* @__nsan_internal_get_raw_shadow_type_ptr(i8* [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i64* +; CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = call i8* @__nsan_internal_get_raw_shadow_ptr(i8* [[TMP1]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i128* +; CHECK-NEXT: [[TMP7:%.*]] = load i128, i128* [[TMP6]], align 1 +; CHECK-NEXT: [[PV:%.*]] = load i64, i64* [[P:%.*]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[P]] to i8* +; CHECK-NEXT: [[TMP9:%.*]] = call i8* @__nsan_internal_get_raw_shadow_type_ptr(i8* [[TMP8]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i64* +; CHECK-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = call i8* @__nsan_internal_get_raw_shadow_ptr(i8* [[TMP8]]) +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i128* +; CHECK-NEXT: [[TMP14:%.*]] = load i128, i128* [[TMP13]], align 1 +; CHECK-NEXT: store i64 [[PV]], i64* [[Q]], align 8 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64* [[Q]] to i8* +; CHECK-NEXT: [[TMP16:%.*]] = call i8* @__nsan_internal_get_raw_shadow_type_ptr(i8* [[TMP15]]) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i64* +; CHECK-NEXT: store i64 [[TMP11]], i64* [[TMP17]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = call i8* @__nsan_internal_get_raw_shadow_ptr(i8* [[TMP15]]) +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to i128* +; CHECK-NEXT: store i128 [[TMP14]], i128* [[TMP19]], align 1 +; CHECK-NEXT: store i64 [[QV]], i64* [[P]], align 8 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i64* [[P]] to i8* +; CHECK-NEXT: [[TMP21:%.*]] = call i8* @__nsan_internal_get_raw_shadow_type_ptr(i8* [[TMP20]]) +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to i64* +; CHECK-NEXT: store i64 [[TMP4]], i64* [[TMP22]], align 1 +; CHECK-NEXT: [[TMP23:%.*]] = call i8* @__nsan_internal_get_raw_shadow_ptr(i8* [[TMP20]]) +; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP23]] to i128* +; CHECK-NEXT: store i128 [[TMP7]], i128* [[TMP24]], align 1 +; CHECK-NEXT: ret void +; + %qv = load i64, i64* %q + %pv = load i64, i64* %p + store i64 %pv, i64* %q, align 8 + store i64 %qv, i64* %p, align 8 + ret void +} + +; Same as swap_untyped1, but the load/stores are in the opposite order. +define void @swap_untyped2(i64* nonnull align 8 %p, i64* nonnull align 8 %q) sanitize_numericalstability { +; CHECK-LABEL: @swap_untyped2( +; CHECK-NEXT: [[PV:%.*]] = load i64, i64* [[P:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[P]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = call i8* @__nsan_internal_get_raw_shadow_type_ptr(i8* [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to i64* +; CHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = call i8* @__nsan_internal_get_raw_shadow_ptr(i8* [[TMP1]]) +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to i128* +; CHECK-NEXT: [[TMP7:%.*]] = load i128, i128* [[TMP6]], align 1 +; CHECK-NEXT: [[QV:%.*]] = load i64, i64* [[Q:%.*]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[Q]] to i8* +; CHECK-NEXT: [[TMP9:%.*]] = call i8* @__nsan_internal_get_raw_shadow_type_ptr(i8* [[TMP8]]) +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i64* +; CHECK-NEXT: [[TMP11:%.*]] = load i64, i64* [[TMP10]], align 1 +; CHECK-NEXT: [[TMP12:%.*]] = call i8* @__nsan_internal_get_raw_shadow_ptr(i8* [[TMP8]]) +; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8* [[TMP12]] to i128* +; CHECK-NEXT: [[TMP14:%.*]] = load i128, i128* [[TMP13]], align 1 +; CHECK-NEXT: store i64 [[PV]], i64* [[Q]], align 8 +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64* [[Q]] to i8* +; CHECK-NEXT: [[TMP16:%.*]] = call i8* @__nsan_internal_get_raw_shadow_type_ptr(i8* [[TMP15]]) +; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i64* +; CHECK-NEXT: store i64 [[TMP4]], i64* [[TMP17]], align 1 +; CHECK-NEXT: [[TMP18:%.*]] = call i8* @__nsan_internal_get_raw_shadow_ptr(i8* [[TMP15]]) +; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8* [[TMP18]] to i128* +; CHECK-NEXT: store i128 [[TMP7]], i128* [[TMP19]], align 1 +; CHECK-NEXT: store i64 [[QV]], i64* [[P]], align 8 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i64* [[P]] to i8* +; CHECK-NEXT: [[TMP21:%.*]] = call i8* @__nsan_internal_get_raw_shadow_type_ptr(i8* [[TMP20]]) +; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8* [[TMP21]] to i64* +; CHECK-NEXT: store i64 [[TMP11]], i64* [[TMP22]], align 1 +; CHECK-NEXT: [[TMP23:%.*]] = call i8* @__nsan_internal_get_raw_shadow_ptr(i8* [[TMP20]]) +; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP23]] to i128* +; CHECK-NEXT: store i128 [[TMP14]], i128* [[TMP24]], align 1 +; CHECK-NEXT: ret void +; + %pv = load i64, i64* %p + %qv = load i64, i64* %q + store i64 %pv, i64* %q, align 8 + store i64 %qv, i64* %p, align 8 + ret void +} + +define void @swap_ft1(float* nonnull align 8 %p, float* nonnull align 8 %q) sanitize_numericalstability { +; CHECK-LABEL: @swap_ft1( +; CHECK-NEXT: [[QV:%.*]] = load float, float* [[Q:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[Q]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_load(i8* [[TMP1]], i64 1) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8* [[TMP2]], null +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP7:%.*]], label [[TMP4:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to double* +; CHECK-NEXT: [[TMP6:%.*]] = load double, double* [[TMP5]], align 1 +; CHECK-NEXT: br label [[TMP9:%.*]] +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = fpext float [[QV]] to double +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = phi double [ [[TMP6]], [[TMP4]] ], [ [[TMP8]], [[TMP7]] ] +; CHECK-NEXT: [[PV:%.*]] = load float, float* [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[P]] to i8* +; CHECK-NEXT: [[TMP12:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_load(i8* [[TMP11]], i64 1) +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i8* [[TMP12]], null +; CHECK-NEXT: br i1 [[TMP13]], label [[TMP17:%.*]], label [[TMP14:%.*]] +; CHECK: 14: +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP12]] to double* +; CHECK-NEXT: [[TMP16:%.*]] = load double, double* [[TMP15]], align 1 +; CHECK-NEXT: br label [[TMP19:%.*]] +; CHECK: 17: +; CHECK-NEXT: [[TMP18:%.*]] = fpext float [[PV]] to double +; CHECK-NEXT: br label [[TMP19]] +; CHECK: 19: +; CHECK-NEXT: [[TMP20:%.*]] = phi double [ [[TMP16]], [[TMP14]] ], [ [[TMP18]], [[TMP17]] ] +; CHECK-NEXT: [[TMP21:%.*]] = bitcast float* [[Q]] to i8* +; CHECK-NEXT: [[TMP22:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_store(i8* [[TMP21]], i64 1) +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint float* [[Q]] to i64 +; CHECK-NEXT: [[TMP24:%.*]] = call i32 @__nsan_internal_check_float_d(float [[PV]], double [[TMP20]], i32 4, i64 [[TMP23]]) +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP24]], 1 +; CHECK-NEXT: [[TMP26:%.*]] = fpext float [[PV]] to double +; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP25]], double [[TMP26]], double [[TMP20]] +; CHECK-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP22]] to double* +; CHECK-NEXT: store double [[TMP27]], double* [[TMP28]], align 1 +; CHECK-NEXT: store float [[PV]], float* [[Q]], align 8 +; CHECK-NEXT: [[TMP29:%.*]] = bitcast float* [[P]] to i8* +; CHECK-NEXT: [[TMP30:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_store(i8* [[TMP29]], i64 1) +; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint float* [[P]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = call i32 @__nsan_internal_check_float_d(float [[QV]], double [[TMP10]], i32 4, i64 [[TMP31]]) +; CHECK-NEXT: [[TMP33:%.*]] = icmp eq i32 [[TMP32]], 1 +; CHECK-NEXT: [[TMP34:%.*]] = fpext float [[QV]] to double +; CHECK-NEXT: [[TMP35:%.*]] = select i1 [[TMP33]], double [[TMP34]], double [[TMP10]] +; CHECK-NEXT: [[TMP36:%.*]] = bitcast i8* [[TMP30]] to double* +; CHECK-NEXT: store double [[TMP35]], double* [[TMP36]], align 1 +; CHECK-NEXT: store float [[QV]], float* [[P]], align 8 +; CHECK-NEXT: ret void +; + %qv = load float, float* %q + %pv = load float, float* %p + store float %pv, float* %q, align 8 + store float %qv, float* %p, align 8 + ret void +} + +; Same as swap_ft1, but the load/stores are in the opposite order. +define void @swap_ft2(float* nonnull align 8 %p, float* nonnull align 8 %q) sanitize_numericalstability { +; CHECK-LABEL: @swap_ft2( +; CHECK-NEXT: [[PV:%.*]] = load float, float* [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_load(i8* [[TMP1]], i64 1) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8* [[TMP2]], null +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP7:%.*]], label [[TMP4:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to double* +; CHECK-NEXT: [[TMP6:%.*]] = load double, double* [[TMP5]], align 1 +; CHECK-NEXT: br label [[TMP9:%.*]] +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = fpext float [[PV]] to double +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = phi double [ [[TMP6]], [[TMP4]] ], [ [[TMP8]], [[TMP7]] ] +; CHECK-NEXT: [[QV:%.*]] = load float, float* [[Q:%.*]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[Q]] to i8* +; CHECK-NEXT: [[TMP12:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_load(i8* [[TMP11]], i64 1) +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i8* [[TMP12]], null +; CHECK-NEXT: br i1 [[TMP13]], label [[TMP17:%.*]], label [[TMP14:%.*]] +; CHECK: 14: +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP12]] to double* +; CHECK-NEXT: [[TMP16:%.*]] = load double, double* [[TMP15]], align 1 +; CHECK-NEXT: br label [[TMP19:%.*]] +; CHECK: 17: +; CHECK-NEXT: [[TMP18:%.*]] = fpext float [[QV]] to double +; CHECK-NEXT: br label [[TMP19]] +; CHECK: 19: +; CHECK-NEXT: [[TMP20:%.*]] = phi double [ [[TMP16]], [[TMP14]] ], [ [[TMP18]], [[TMP17]] ] +; CHECK-NEXT: [[TMP21:%.*]] = bitcast float* [[Q]] to i8* +; CHECK-NEXT: [[TMP22:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_store(i8* [[TMP21]], i64 1) +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint float* [[Q]] to i64 +; CHECK-NEXT: [[TMP24:%.*]] = call i32 @__nsan_internal_check_float_d(float [[PV]], double [[TMP10]], i32 4, i64 [[TMP23]]) +; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP24]], 1 +; CHECK-NEXT: [[TMP26:%.*]] = fpext float [[PV]] to double +; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP25]], double [[TMP26]], double [[TMP10]] +; CHECK-NEXT: [[TMP28:%.*]] = bitcast i8* [[TMP22]] to double* +; CHECK-NEXT: store double [[TMP27]], double* [[TMP28]], align 1 +; CHECK-NEXT: store float [[PV]], float* [[Q]], align 8 +; CHECK-NEXT: [[TMP29:%.*]] = bitcast float* [[P]] to i8* +; CHECK-NEXT: [[TMP30:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_store(i8* [[TMP29]], i64 1) +; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint float* [[P]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = call i32 @__nsan_internal_check_float_d(float [[QV]], double [[TMP20]], i32 4, i64 [[TMP31]]) +; CHECK-NEXT: [[TMP33:%.*]] = icmp eq i32 [[TMP32]], 1 +; CHECK-NEXT: [[TMP34:%.*]] = fpext float [[QV]] to double +; CHECK-NEXT: [[TMP35:%.*]] = select i1 [[TMP33]], double [[TMP34]], double [[TMP20]] +; CHECK-NEXT: [[TMP36:%.*]] = bitcast i8* [[TMP30]] to double* +; CHECK-NEXT: store double [[TMP35]], double* [[TMP36]], align 1 +; CHECK-NEXT: store float [[QV]], float* [[P]], align 8 +; CHECK-NEXT: ret void +; + %pv = load float, float* %p + %qv = load float, float* %q + store float %pv, float* %q, align 8 + store float %qv, float* %p, align 8 + ret void +} + +define void @swap_vectorft1(<2 x float>* nonnull align 16 %p, <2 x float>* nonnull align 16 %q) sanitize_numericalstability { +; CHECK-LABEL: @swap_vectorft1( +; CHECK-NEXT: [[QV:%.*]] = load <2 x float>, <2 x float>* [[Q:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float>* [[Q]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_load(i8* [[TMP1]], i64 2) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8* [[TMP2]], null +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP7:%.*]], label [[TMP4:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to <2 x double>* +; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 1 +; CHECK-NEXT: br label [[TMP9:%.*]] +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = fpext <2 x float> [[QV]] to <2 x double> +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x double> [ [[TMP6]], [[TMP4]] ], [ [[TMP8]], [[TMP7]] ] +; CHECK-NEXT: [[PV:%.*]] = load <2 x float>, <2 x float>* [[P:%.*]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x float>* [[P]] to i8* +; CHECK-NEXT: [[TMP12:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_load(i8* [[TMP11]], i64 2) +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i8* [[TMP12]], null +; CHECK-NEXT: br i1 [[TMP13]], label [[TMP17:%.*]], label [[TMP14:%.*]] +; CHECK: 14: +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP12]] to <2 x double>* +; CHECK-NEXT: [[TMP16:%.*]] = load <2 x double>, <2 x double>* [[TMP15]], align 1 +; CHECK-NEXT: br label [[TMP19:%.*]] +; CHECK: 17: +; CHECK-NEXT: [[TMP18:%.*]] = fpext <2 x float> [[PV]] to <2 x double> +; CHECK-NEXT: br label [[TMP19]] +; CHECK: 19: +; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x double> [ [[TMP16]], [[TMP14]] ], [ [[TMP18]], [[TMP17]] ] +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x float>* [[Q]] to i8* +; CHECK-NEXT: [[TMP22:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_store(i8* [[TMP21]], i64 2) +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x float> [[PV]], i64 0 +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x double> [[TMP20]], i64 0 +; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint <2 x float>* [[Q]] to i64 +; CHECK-NEXT: [[TMP26:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP23]], double [[TMP24]], i32 4, i64 [[TMP25]]) +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[PV]], i64 1 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x double> [[TMP20]], i64 1 +; CHECK-NEXT: [[TMP29:%.*]] = ptrtoint <2 x float>* [[Q]] to i64 +; CHECK-NEXT: [[TMP30:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP27]], double [[TMP28]], i32 4, i64 [[TMP29]]) +; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP26]], [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[TMP31]], 1 +; CHECK-NEXT: [[TMP33:%.*]] = fpext <2 x float> [[PV]] to <2 x double> +; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP32]], <2 x double> [[TMP33]], <2 x double> [[TMP20]] +; CHECK-NEXT: [[TMP35:%.*]] = bitcast i8* [[TMP22]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP34]], <2 x double>* [[TMP35]], align 1 +; CHECK-NEXT: store <2 x float> [[PV]], <2 x float>* [[Q]], align 16 +; CHECK-NEXT: [[TMP36:%.*]] = bitcast <2 x float>* [[P]] to i8* +; CHECK-NEXT: [[TMP37:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_store(i8* [[TMP36]], i64 2) +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x float> [[QV]], i64 0 +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x double> [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP40:%.*]] = ptrtoint <2 x float>* [[P]] to i64 +; CHECK-NEXT: [[TMP41:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP38]], double [[TMP39]], i32 4, i64 [[TMP40]]) +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x float> [[QV]], i64 1 +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <2 x double> [[TMP10]], i64 1 +; CHECK-NEXT: [[TMP44:%.*]] = ptrtoint <2 x float>* [[P]] to i64 +; CHECK-NEXT: [[TMP45:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP42]], double [[TMP43]], i32 4, i64 [[TMP44]]) +; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP41]], [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = icmp eq i32 [[TMP46]], 1 +; CHECK-NEXT: [[TMP48:%.*]] = fpext <2 x float> [[QV]] to <2 x double> +; CHECK-NEXT: [[TMP49:%.*]] = select i1 [[TMP47]], <2 x double> [[TMP48]], <2 x double> [[TMP10]] +; CHECK-NEXT: [[TMP50:%.*]] = bitcast i8* [[TMP37]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP49]], <2 x double>* [[TMP50]], align 1 +; CHECK-NEXT: store <2 x float> [[QV]], <2 x float>* [[P]], align 16 +; CHECK-NEXT: ret void +; + %qv = load <2 x float>, <2 x float>* %q + %pv = load <2 x float>, <2 x float>* %p + store <2 x float> %pv, <2 x float>* %q, align 16 + store <2 x float> %qv, <2 x float>* %p, align 16 + ret void +} + +; Same as swap_vectorft1, but the load/stores are in the opposite order. +define void @swap_vectorft2(<2 x float>* nonnull align 16 %p, <2 x float>* nonnull align 16 %q) sanitize_numericalstability { +; CHECK-LABEL: @swap_vectorft2( +; CHECK-NEXT: [[PV:%.*]] = load <2 x float>, <2 x float>* [[P:%.*]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float>* [[P]] to i8* +; CHECK-NEXT: [[TMP2:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_load(i8* [[TMP1]], i64 2) +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i8* [[TMP2]], null +; CHECK-NEXT: br i1 [[TMP3]], label [[TMP7:%.*]], label [[TMP4:%.*]] +; CHECK: 4: +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to <2 x double>* +; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* [[TMP5]], align 1 +; CHECK-NEXT: br label [[TMP9:%.*]] +; CHECK: 7: +; CHECK-NEXT: [[TMP8:%.*]] = fpext <2 x float> [[PV]] to <2 x double> +; CHECK-NEXT: br label [[TMP9]] +; CHECK: 9: +; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x double> [ [[TMP6]], [[TMP4]] ], [ [[TMP8]], [[TMP7]] ] +; CHECK-NEXT: [[QV:%.*]] = load <2 x float>, <2 x float>* [[Q:%.*]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x float>* [[Q]] to i8* +; CHECK-NEXT: [[TMP12:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_load(i8* [[TMP11]], i64 2) +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i8* [[TMP12]], null +; CHECK-NEXT: br i1 [[TMP13]], label [[TMP17:%.*]], label [[TMP14:%.*]] +; CHECK: 14: +; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP12]] to <2 x double>* +; CHECK-NEXT: [[TMP16:%.*]] = load <2 x double>, <2 x double>* [[TMP15]], align 1 +; CHECK-NEXT: br label [[TMP19:%.*]] +; CHECK: 17: +; CHECK-NEXT: [[TMP18:%.*]] = fpext <2 x float> [[QV]] to <2 x double> +; CHECK-NEXT: br label [[TMP19]] +; CHECK: 19: +; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x double> [ [[TMP16]], [[TMP14]] ], [ [[TMP18]], [[TMP17]] ] +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x float>* [[Q]] to i8* +; CHECK-NEXT: [[TMP22:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_store(i8* [[TMP21]], i64 2) +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x float> [[PV]], i64 0 +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x double> [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP25:%.*]] = ptrtoint <2 x float>* [[Q]] to i64 +; CHECK-NEXT: [[TMP26:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP23]], double [[TMP24]], i32 4, i64 [[TMP25]]) +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x float> [[PV]], i64 1 +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x double> [[TMP10]], i64 1 +; CHECK-NEXT: [[TMP29:%.*]] = ptrtoint <2 x float>* [[Q]] to i64 +; CHECK-NEXT: [[TMP30:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP27]], double [[TMP28]], i32 4, i64 [[TMP29]]) +; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP26]], [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[TMP31]], 1 +; CHECK-NEXT: [[TMP33:%.*]] = fpext <2 x float> [[PV]] to <2 x double> +; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP32]], <2 x double> [[TMP33]], <2 x double> [[TMP10]] +; CHECK-NEXT: [[TMP35:%.*]] = bitcast i8* [[TMP22]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP34]], <2 x double>* [[TMP35]], align 1 +; CHECK-NEXT: store <2 x float> [[PV]], <2 x float>* [[Q]], align 16 +; CHECK-NEXT: [[TMP36:%.*]] = bitcast <2 x float>* [[P]] to i8* +; CHECK-NEXT: [[TMP37:%.*]] = call i8* @__nsan_get_shadow_ptr_for_float_store(i8* [[TMP36]], i64 2) +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x float> [[QV]], i64 0 +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <2 x double> [[TMP20]], i64 0 +; CHECK-NEXT: [[TMP40:%.*]] = ptrtoint <2 x float>* [[P]] to i64 +; CHECK-NEXT: [[TMP41:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP38]], double [[TMP39]], i32 4, i64 [[TMP40]]) +; CHECK-NEXT: [[TMP42:%.*]] = extractelement <2 x float> [[QV]], i64 1 +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <2 x double> [[TMP20]], i64 1 +; CHECK-NEXT: [[TMP44:%.*]] = ptrtoint <2 x float>* [[P]] to i64 +; CHECK-NEXT: [[TMP45:%.*]] = call i32 @__nsan_internal_check_float_d(float [[TMP42]], double [[TMP43]], i32 4, i64 [[TMP44]]) +; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP41]], [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = icmp eq i32 [[TMP46]], 1 +; CHECK-NEXT: [[TMP48:%.*]] = fpext <2 x float> [[QV]] to <2 x double> +; CHECK-NEXT: [[TMP49:%.*]] = select i1 [[TMP47]], <2 x double> [[TMP48]], <2 x double> [[TMP20]] +; CHECK-NEXT: [[TMP50:%.*]] = bitcast i8* [[TMP37]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP49]], <2 x double>* [[TMP50]], align 1 +; CHECK-NEXT: store <2 x float> [[QV]], <2 x float>* [[P]], align 16 +; CHECK-NEXT: ret void +; + %pv = load <2 x float>, <2 x float>* %p + %qv = load <2 x float>, <2 x float>* %q + store <2 x float> %pv, <2 x float>* %q, align 16 + store <2 x float> %qv, <2 x float>* %p, align 16 + ret void +}