diff --git a/llvm/lib/BinaryFormat/Magic.cpp b/llvm/lib/BinaryFormat/Magic.cpp --- a/llvm/lib/BinaryFormat/Magic.cpp +++ b/llvm/lib/BinaryFormat/Magic.cpp @@ -55,8 +55,16 @@ memcmp(Magic.data(), COFF::WinResMagic, sizeof(COFF::WinResMagic)) == 0) return file_magic::windows_resource; // 0x0000 = COFF unknown machine type - if (Magic[1] == 0) - return file_magic::coff_object; + if (Magic[1] == 0) { + // We don't want to mis-identify files that simply start with 2 leading + // null bytes as a COFF object. One such example is the rust.metadata.bin + // file which the rust toolchain embedds in ar archives. This file starts + // with 4 null bytes. To work around this, also check the following two + // bytes which represet the number of COFF sections and shoudl be + // non-zero. + if (Magic[2] != 0 || Magic[3] != 0) + return file_magic::coff_object; + } if (startswith(Magic, "\0asm")) return file_magic::wasm_object; break; diff --git a/llvm/unittests/BinaryFormat/TestFileMagic.cpp b/llvm/unittests/BinaryFormat/TestFileMagic.cpp --- a/llvm/unittests/BinaryFormat/TestFileMagic.cpp +++ b/llvm/unittests/BinaryFormat/TestFileMagic.cpp @@ -84,6 +84,7 @@ "DS\x00\x00\x00"; const char tapi_file[] = "--- !tapi-tbd-v1\n"; const char tapi_file_tbd_v1[] = "---\narchs: ["; +const char all_zeros[] = "\x00\x00\x00\x00"; TEST_F(MagicTest, Magic) { struct type { @@ -119,6 +120,8 @@ DEFINE(tapi_file), {"tapi_file_tbd_v1", tapi_file_tbd_v1, sizeof(tapi_file_tbd_v1), file_magic::tapi_file}, + // A file containing all zeros should not be recognized as a valid object + {"all_zeros", all_zeros, sizeof(all_zeros), file_magic::unknown}, #undef DEFINE };