Skip to content

zvfs: improve libc FILE to integer fd abstraction #83386

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion lib/libc/newlib/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
# SPDX-License-Identifier: Apache-2.0

zephyr_library()
zephyr_library_sources(libc-hooks.c)
zephyr_library_sources(
libc-hooks.c
z_libc.c
)

# Do not allow LTO when compiling libc-hooks.c file
set_source_files_properties(libc-hooks.c PROPERTIES COMPILE_OPTIONS $<TARGET_PROPERTY:compiler,prohibit_lto>)
Expand Down
2 changes: 1 addition & 1 deletion lib/libc/newlib/libc-hooks.c
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ int _open(const char *name, int flags, ...)
{
return -1;
}
__weak FUNC_ALIAS(_open, open, int);
__weak int open(const char *name, int mode, ...) ALIAS_OF(_open);

int _close(int file)
{
Expand Down
81 changes: 81 additions & 0 deletions lib/libc/newlib/z_libc.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Copyright (c) 2024 Tenstorrent AI ULC
*
* SPDX-License-Identifier: Apache-2.0
*/

#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>

#include <zephyr/sys/fdtable.h>

static int z_libc_sflags(const char *mode)
{
int ret = 0;

switch (mode[0]) {
case 'r':
ret = ZVFS_O_RDONLY;
break;

case 'w':
ret = ZVFS_O_WRONLY | ZVFS_O_CREAT | ZVFS_O_TRUNC;
break;

case 'a':
ret = ZVFS_O_WRONLY | ZVFS_O_CREAT | ZVFS_O_APPEND;
break;
default:
return 0;
}

while (*++mode) {
switch (*mode) {
case '+':
ret |= ZVFS_O_RDWR;
break;
case 'x':
ret |= ZVFS_O_EXCL;
break;
default:
break;
}
}

return ret;
}

FILE *z_libc_file_alloc(int fd, const char *mode)
{
FILE *fp;
/*
* These symbols have conflicting declarations in upstream headers.
* Use uintptr_t and a cast to assign cleanly.
*/
extern uintptr_t _close_r;
extern uintptr_t _lseek_r;
extern uintptr_t _read_r;
extern uintptr_t _write_r;

fp = calloc(1, sizeof(*fp));
if (fp == NULL) {
return NULL;
}

fp->_flags = z_libc_sflags(mode);
fp->_file = fd;
fp->_cookie = (void *)fp;

*(uintptr_t *)fp->_read = _read_r;
*(uintptr_t *)fp->_write = _write_r;
*(uintptr_t *)fp->_seek = _lseek_r;
*(uintptr_t *)fp->_close = _close_r;

return fp;
}

int z_libc_file_get_fd(FILE *fp)
{
return fp->_file;
}
1 change: 1 addition & 0 deletions lib/libc/picolibc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ zephyr_library_sources(
exit.c
locks.c
stdio.c
z_libc.c
)

zephyr_library_compile_options($<TARGET_PROPERTY:compiler,prohibit_lto>)
Expand Down
60 changes: 60 additions & 0 deletions lib/libc/picolibc/z_libc.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Copyright (c) 2024 Tenstorrent AI ULC
*
* SPDX-License-Identifier: Apache-2.0
*/

#include "stdio-bufio.h"

#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>

#include <zephyr/sys/fdtable.h>

#define FDEV_SETUP_ZVFS(fd, buf, size, rwflags, bflags) \
FDEV_SETUP_BUFIO(fd, buf, size, zvfs_read_wrap, zvfs_write_wrap, zvfs_lseek, zvfs_close, \
rwflags, bflags)

int __posix_sflags(const char *mode, int *optr);

/* FIXME: do not use ssize_t or off_t */
ssize_t zvfs_read(int fd, void *buf, size_t sz, const size_t *from_offset);
ssize_t zvfs_write(int fd, const void *buf, size_t sz, const size_t *from_offset);
off_t zvfs_lseek(int fd, off_t offset, int whence);
int zvfs_close(int fd);

static ssize_t zvfs_read_wrap(int fd, void *buf, size_t sz)
{
return zvfs_read(fd, buf, sz, NULL);
}

static ssize_t zvfs_write_wrap(int fd, const void *buf, size_t sz)
{
return zvfs_write(fd, buf, sz, NULL);
}

FILE *z_libc_file_alloc(int fd, const char *mode)
{
struct __file_bufio *bf;
int __maybe_unused unused;

bf = calloc(1, sizeof(struct __file_bufio) + BUFSIZ);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There was some comments about not using dynamic memory alloc. I just wonder if it could be possible to allow static allocation here, for example have Kconfig option to allow user either use dynamic alloc or to have an static array of these structs. This would allow the user to decide what suits his/hers usage better. Using malloc allows always a possibility to memory leaks.

if (bf == NULL) {
return NULL;
}

*bf = (struct __file_bufio)FDEV_SETUP_ZVFS(fd, (char *)(bf + 1), BUFSIZ,
__posix_sflags(mode, &unused), __BFALL);

__bufio_lock_init(&(bf->xfile.cfile.file));

return &(bf->xfile.cfile.file);
}

int z_libc_file_get_fd(const FILE *fp)
{
const struct __file_bufio *const bf = (const struct __file_bufio *)fp;

return POINTER_TO_INT(bf->ptr);
}
34 changes: 30 additions & 4 deletions lib/os/fdtable.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@
#include <zephyr/internal/syscall_handler.h>
#include <zephyr/sys/atomic.h>

#ifndef CONFIG_MINIMAL_LIBC
extern FILE *z_libc_file_alloc(int fd, const char *mode);
Copy link
Member Author

@cfriedt cfriedt Dec 31, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is a better name here z_libc_fd_get_file() to highlight that it's the opposite of z_libc_file_get_fd()?

Naming things is hard, and it's really just an artifact of picolibc and newlib that they use calloc and malloc internally.

In theory, it should be easy enough to create a statically allocated table of FILE objects and have them use that instead of dynamic allocation.

Copy link
Collaborator

@keith-packard keith-packard Dec 31, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was looking through my local repo and found an implementation of fopen for Zephyr which doesn't use the underlying POSIX APIs at all. With your re-implementation of z_libc_file_get_file (nee fdopen), I wonder if we shouldn't provide both of these within Zephyr rather than within picolibc; we could then use a static array of FILE objects for both and you'd be able to easily translate between them.

The picolibc stdio implementation provides stable API which can support this, so it's not even a hack :-)

Check out https://github.com/keith-packard/zephyr/blob/picolibc-fopen/lib/libc/picolibc/fopen.c

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was looking through my local repo and found an implementation of fopen for Zephyr which doesn't use the underlying POSIX APIs at all.

That sounds promising.

With your re-implementation of z_libc_fd_get_file (nee fdopen), I wonder if we shouldn't provide both of these within Zephyr rather than within picolibc

That's sort of where I was heading as well. It's simple enough to implement inside of Zephyr and doesn't require any libc modifications.

It's also reasonably easy for other C libraries to do something similar, even those without POSIX support, like IAR.

The main difference between fdopen() and z_libc_fd_get_file() is that they have slightly different use-cases; the former is for calling from user code into the OS and is dependent on the POSIX API, whereas the latter is for calling into the libc from the OS, and doesn't depend on the POSIX API. So they're kind of going in opposite directions. The latter also eliminates possibly recursive calls.

we could then use a static array of FILE objects for both and you'd be able to easily translate between them.

That sounds great too - some Zephyr users prefer to avoid malloc-related things entirely. So that would be a bit more "user friendly" (for certain kinds of users).

The picolibc stdio implementation provides stable API which can support this, so it's not even a hack :-)

👍

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@keith-packard - do you want to make a separate PR, or would you prefer if I added e.g. static FILE table here?

I'm happy to rename things as needed as well.

Some might prefer e.g. a __-prefix

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main difference between fdopen() and z_libc_fd_get_file() is that they have slightly different use-cases; the former is for calling from user code into the OS and is dependent on the POSIX API, whereas the latter is for calling into the libc from the OS, and doesn't depend on the POSIX API. So they're kind of going in opposite directions. The latter also eliminates possibly recursive calls.

There's nothing POSIX-specific about fdopen. On newlib, it doesn't use POSIX apis, it uses the existing
wrappers just like fopen does. With picolibc, it would be implemented entirely in Zephyr using non-POSIX Zephyr APIs. Using the POSIX name makes it far easier for developers to use, as long as the semantics are effectively the same. Creating a new name just makes this function harder to find.

That sounds great too - some Zephyr users prefer to avoid malloc-related things entirely. So that would be a bit more "user friendly" (for certain kinds of users).

Yup, I avoid using malloc in most of my embedded code too -- I find it far easier to reason about memory usage when it's all pre-allocated. However, in this case it sure looks like you're replacing one general allocator (malloc) with a special-purpose allocator (static array of FILE structs). I think the argument for doing that isn't very strong; you still need to evaluate the whole application to ensure there won't be any resource starvation.

I prefer to use APIs which don't rely on any dynamic allocation. Picolibc does this by allowing applications to declare FILE structs themselves, rather than calling a function to allocate them. The only problem with this approach is that stdio doesn't generally know about all of the in-use FILE structs, so fflush(NULL) doesn't work. We'd need some helpers to make that easy to use with Zephyr.

That's secondary to how fopen (and "fdopen") should be implemented in Zephyr; I think what we probably want is to implement "fdopen" (whatever the name ends up being) using the fdtable APIs and then implement fopen on top of that, just as picolibc does when using POSIX apis.

Copy link
Member Author

@cfriedt cfriedt Dec 31, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's nothing POSIX-specific about fdopen.

Other than the fact that fdopen is entirely POSIX specific (it's not a part of ISO C).

With picolibc, it would be implemented entirely in Zephyr using non-POSIX Zephyr APIs.

That will be ideal, because then ISO C and POSIX would use the same underlying APIs.

Using the POSIX name makes it far easier for developers to use, as long as the semantics are effectively the same. Creating a new name just makes this function harder to find.

Uhm... I think you're a bit off there.

The point is that there is no dependency cycle. The OS shouldn't refer to a FILE <-> fd mapping managed by the C library if it uses the same name that application code uses. This is because that would be crossing the user-kernel API line twice, adding an API dependency cycle (which is a slippery slope). It's much better to have no cycles in the DAG of API calls.

By having a separate name for the kernel and for users, we mitigate the dependency cycle with a mutual dependency.

It's pretty simple. We will still offer both the POSIX API call and the native Zephyr call.

However, in this case it sure looks like you're replacing one general allocator (malloc) with a special-purpose allocator (static array of FILE structs).

A pool of objects is kind of like a special-purpose allocator, yes, but it's generally considered safer for certain applications than using heap allocation.

The main point is enabling standards-compliant code.

I think the argument for doing that isn't very strong; you still need to evaluate the whole application to ensure there won't be any resource starvation.

Technically, we could do declarative allocation as long as we can put FILE objects into the same linker section. Then it's effectively using both declarative allocation as well as pool-based allocation.

I'd like to eventually convert "files" to be proper k_objects soon. Likely that would require some linkage between the libc FILE and an new kernel object.

I prefer to use APIs which don't rely on any dynamic allocation.

That makes sense for most high-reliability / safety critical applications.

That's secondary to how fopen (and "fdopen") should be implemented in Zephyr

Yes.

extern int z_libc_file_get_fd(const FILE *fp);
#endif

struct stat;

struct fd_entry {
Expand Down Expand Up @@ -75,6 +80,20 @@ static struct fd_entry fdtable[CONFIG_ZVFS_OPEN_MAX] = {

static K_MUTEX_DEFINE(fdtable_lock);

#ifdef CONFIG_MINIMAL_LIBC
static ALWAYS_INLINE inline FILE *z_libc_file_alloc(int fd, const char *mode)
{
ARG_UNUSED(mode);

return (FILE *)&fdtable[fd];
}

static ALWAYS_INLINE inline int z_libc_file_get_fd(const FILE *fp)
{
return (const struct fd_entry *)fp - fdtable;
}
#endif

static int z_fd_ref(int fd)
{
return atomic_inc(&fdtable[fd].refcount) + 1;
Expand Down Expand Up @@ -413,23 +432,30 @@ int zvfs_close(int fd)

FILE *zvfs_fdopen(int fd, const char *mode)
{
ARG_UNUSED(mode);
FILE *ret;

if (_check_fd(fd) < 0) {
return NULL;
}

return (FILE *)&fdtable[fd];
ret = z_libc_file_alloc(fd, mode);
if (ret == NULL) {
errno = ENOMEM;
}

return ret;
}

int zvfs_fileno(FILE *file)
{
if (!IS_ARRAY_ELEMENT(fdtable, file)) {
int fd = z_libc_file_get_fd(file);

if (fd < 0 || fd >= ARRAY_SIZE(fdtable)) {
errno = EBADF;
return -1;
}

return (struct fd_entry *)file - fdtable;
return fd;
}

int zvfs_fstat(int fd, struct stat *buf)
Expand Down
Loading