diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -396,6 +396,7 @@ libc.src.stdio.stderr libc.src.stdio.stdin libc.src.stdio.stdout + libc.src.stdio.ungetc # stdlib.h entrypoints libc.src.stdlib._Exit diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -641,6 +641,11 @@ ArgSpec, ArgSpec] >, + FunctionSpec< + "ungetc", + RetValSpec, + [ArgSpec, ArgSpec] + >, ], [ ObjectSpec< diff --git a/libc/src/__support/File/file.h b/libc/src/__support/File/file.h --- a/libc/src/__support/File/file.h +++ b/libc/src/__support/File/file.h @@ -187,6 +187,14 @@ int flush_unlocked(); + // Returns EOF on error and keeps the file unchanged. + int ungetc_unlocked(int c); + + int ungetc(int c) { + FileLock lock(this); + return ungetc_unlocked(c); + } + // Sets the internal buffer to |buffer| with buffering mode |mode|. // |size| is the size of |buffer|. This new |buffer| is owned by the // stream only if |owned| is true. diff --git a/libc/src/__support/File/file.cpp b/libc/src/__support/File/file.cpp --- a/libc/src/__support/File/file.cpp +++ b/libc/src/__support/File/file.cpp @@ -203,10 +203,14 @@ for (size_t i = 0; i < available_data; ++i) dataref[i] = bufref[i + pos]; read_limit = pos = 0; // Reset the pointers. + // Update the dataref to reflect that fact that we have already + // copied |available_data| into |data|. + dataref = cpp::span(dataref.data() + available_data, + dataref.size() - available_data); size_t to_fetch = len - available_data; if (to_fetch > bufsize) { - size_t fetched_size = platform_read(this, data, to_fetch); + size_t fetched_size = platform_read(this, dataref.data(), to_fetch); if (fetched_size < to_fetch) { if (errno == 0) eof = true; @@ -233,6 +237,44 @@ return transfer_size + available_data; } +int File::ungetc_unlocked(int c) { + // There is no meaning to unget if: + // 1. You are trying to push back EOF. + // 2. Read operations are not allowed on this file. + // 3. The previous operation was a write operation. + if (c == EOF || !read_allowed() || (prev_op == FileOp::WRITE)) + return EOF; + + cpp::span bufref(static_cast(buf), bufsize); + if (read_limit == 0) { + // If |read_limit| is zero, it can mean three things: + // a. This file was just created. + // b. The previous operation was a seek operation. + // c. The previous operation was a read operation which emptied + // the buffer. + // For all the above cases, we simply write |c| at the beginning + // of the buffer and bump |read_limit|. Note that |pos| will also + // be zero in this case, so we don't need to adjust it. + bufref[0] = static_cast(c); + ++read_limit; + } else { + // If |read_limit| is non-zero, it means that there is data in the buffer + // from a previous read operation. Which would also mean that |pos| is not + // zero. So, we decrement |pos| and write |c| in to the buffer at the new + // |pos|. If too many ungetc operations are performed without reads, it + // can lead to (pos == 0 but read_limit != 0). We will just error out in + // such a case. + if (pos == 0) + return EOF; + --pos; + bufref[pos] = static_cast(c); + } + + eof = false; // There is atleast one character that can be read now. + err = false; // This operation was a success. + return c; +} + int File::seek(long offset, int whence) { FileLock lock(this); if (prev_op == FileOp::WRITE && pos > 0) { diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -282,6 +282,18 @@ libc.src.__support.File.platform_file ) +add_entrypoint_object( + ungetc + SRCS + ungetc.cpp + HDRS + ungetc.h + DEPENDS + libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + add_entrypoint_object( fopencookie SRCS diff --git a/libc/src/stdio/ungetc.h b/libc/src/stdio/ungetc.h new file mode 100644 --- /dev/null +++ b/libc/src/stdio/ungetc.h @@ -0,0 +1,20 @@ +//===-- Implementation header of ungetc -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_UNGETC_H +#define LLVM_LIBC_SRC_STDIO_UNGETC_H + +#include + +namespace __llvm_libc { + +int ungetc(int c, ::FILE *stream); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_STDIO_UNGETC_H diff --git a/libc/src/stdio/ungetc.cpp b/libc/src/stdio/ungetc.cpp new file mode 100644 --- /dev/null +++ b/libc/src/stdio/ungetc.cpp @@ -0,0 +1,20 @@ +//===-- Implementation of ungetc ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/ungetc.h" +#include "src/__support/File/file.h" + +#include + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(int, ungetc, (int c, ::FILE *stream)) { + return reinterpret_cast<__llvm_libc::File *>(stream)->ungetc(c); +} + +} // namespace __llvm_libc diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt --- a/libc/test/src/stdio/CMakeLists.txt +++ b/libc/test/src/stdio/CMakeLists.txt @@ -21,6 +21,22 @@ libc.src.stdio.fwrite ) +add_libc_unittest( + ungetc_test + SUITE + libc_stdio_unittests + SRCS + ungetc_test.cpp + DEPENDS + libc.include.stdio + libc.src.stdio.fclose + libc.src.stdio.fopen + libc.src.stdio.fread + libc.src.stdio.fseek + libc.src.stdio.fwrite + libc.src.stdio.ungetc +) + add_libc_unittest( unlocked_fileop_test SUITE diff --git a/libc/test/src/stdio/ungetc_test.cpp b/libc/test/src/stdio/ungetc_test.cpp new file mode 100644 --- /dev/null +++ b/libc/test/src/stdio/ungetc_test.cpp @@ -0,0 +1,59 @@ +//===-- Unittests for ungetc ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/fclose.h" +#include "src/stdio/fopen.h" +#include "src/stdio/fread.h" +#include "src/stdio/fseek.h" +#include "src/stdio/fwrite.h" +#include "src/stdio/ungetc.h" +#include "utils/UnitTest/Test.h" + +#include + +TEST(LlvmLibcUngetcTest, UngetAndReadBack) { + constexpr char FILENAME[] = "testdata/ungetc_test.test"; + ::FILE *file = __llvm_libc::fopen(FILENAME, "w"); + ASSERT_FALSE(file == nullptr); + constexpr char CONTENT[] = "abcdef"; + constexpr size_t CONTENT_SIZE = sizeof(CONTENT); + ASSERT_EQ(CONTENT_SIZE, __llvm_libc::fwrite(CONTENT, 1, CONTENT_SIZE, file)); + // Cannot unget to an un-readable file. + ASSERT_EQ(EOF, __llvm_libc::ungetc('1', file)); + ASSERT_EQ(0, __llvm_libc::fclose(file)); + + file = __llvm_libc::fopen(FILENAME, "r+"); + ASSERT_FALSE(file == nullptr); + char c; + ASSERT_EQ(__llvm_libc::fread(&c, 1, 1, file), size_t(1)); + ASSERT_EQ(c, CONTENT[0]); + ASSERT_EQ(__llvm_libc::ungetc(int(c), file), int(c)); + + char data[CONTENT_SIZE]; + ASSERT_EQ(CONTENT_SIZE, __llvm_libc::fread(data, 1, CONTENT_SIZE, file)); + ASSERT_STREQ(CONTENT, data); + + ASSERT_EQ(0, __llvm_libc::fseek(file, 0, SEEK_SET)); + // ungetc should not fail after a seek operation. + int unget_char = 'z'; + ASSERT_EQ(unget_char, __llvm_libc::ungetc(unget_char, file)); + // Another unget should fail. + ASSERT_EQ(EOF, __llvm_libc::ungetc(unget_char, file)); + // ungetting a char at the beginning of the file will allow us to fetch + // one additional character. + char new_data[CONTENT_SIZE + 1]; + ASSERT_EQ(CONTENT_SIZE + 1, + __llvm_libc::fread(new_data, 1, CONTENT_SIZE + 1, file)); + ASSERT_STREQ("zabcdef", new_data); + + ASSERT_EQ(size_t(1), __llvm_libc::fwrite("x", 1, 1, file)); + // unget should fail after a write operation. + ASSERT_EQ(EOF, __llvm_libc::ungetc('1', file)); + + ASSERT_EQ(0, __llvm_libc::fclose(file)); +}