-
Couldn't load subscription status.
- Fork 42
Uhyve file read/write should not assume target buffer is contiguous in physical memory #86
Description
Hello,
With Uhyve, when performing file read/write operations in the host, the target buffer is fully read or written from the guest physical memory. Such a design assumes that the buffer is contiguous in physical memory, which is not always the case. When using a buffer allocated with malloc, example of bad things that can happen are:
- Reading from a file in a buffer that is not or partially mapped to physical memory (because of on-demand mapping)
- Reading from a file in a buffer which virtual pages are not mapped to contiguous physical pages
- Writing in a file from a buffer which virtual pages are not mapped to contiguous physical pages
You can confirm these issues with these two sample programs and the corresponding Makefile:
/* read_test.c */
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#define PAGE_SIZE 4096
#define FILE_TO_READ "fread.bin"
#define FILE_SIZE PAGE_SIZE * 4
char verify_buf[FILE_SIZE];
int main(int argc, char **argv) {
int fd, verify_fd, i;
char *buf;
printf("Read test starting\n");
buf = malloc(FILE_SIZE);
if(!buf) {
fprintf(stderr, "error malloc!\n");
return -1;
}
verify_fd = open(FILE_TO_READ, O_RDONLY, 0x0);
fd = open(FILE_TO_READ, O_RDONLY, 0x0);
if(fd == -1 || verify_fd == -1) {
printf("error open\n");
return -1;
}
if(read(verify_fd, verify_buf, FILE_SIZE) != FILE_SIZE) {
fprintf(stderr, "read error\n");
return -1;
}
if(read(fd, buf, FILE_SIZE) != FILE_SIZE) {
fprintf(stderr, "read error\n");
return -1;
}
printf("Read in malloc'd buffer done\n");
for(i=0; i<FILE_SIZE; i++)
if(buf[i] != verify_buf[i]) {
fprintf(stderr, "verification failed!\n");
fprintf(stderr, "buf[%d] == %x\n", i, buf[i]);
fprintf(stderr, "verify_buf[%d] == %x\n", i, verify_buf[i]);
return -1;
}
printf("Read verification successful!\n");
free(buf);
close(fd);
return 0;
}/* write_test.c */
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#define PAGE_SIZE 4096
#define FILE_SIZE PAGE_SIZE * 4
#define FILE_TO_WRITE "fwrite.bin"
char verify_buf[FILE_SIZE];
int main(int argc, char **argv) {
char *buf;
int fd, i;
printf("write test starting\n");
buf = malloc(FILE_SIZE);
if(!buf) {
fprintf(stderr, "error malloc\n");
return -1;
}
fd = open(FILE_TO_WRITE, O_RDWR | O_CREAT | O_TRUNC, 0666);
if(fd == -1) {
fprintf(stderr, "error opening %s\n", FILE_TO_WRITE);
return -1;
}
for(i=0; i<FILE_SIZE; i++)
buf[i] = (i/4096) + '0';
if(write(fd, buf, FILE_SIZE) != FILE_SIZE) {
fprintf(stderr, "error writing in %s", FILE_TO_WRITE);
return -1;
}
printf("Write from malloc'd buffer done\n");
if(lseek(fd, 0x0, SEEK_SET) != 0) {
fprintf(stderr, "error lseek\n");
return -1;
}
if(read(fd, verify_buf, FILE_SIZE) != FILE_SIZE) {
fprintf(stderr, "error read\n");
return -1;
}
for(i=0; i<FILE_SIZE; i++)
if(verify_buf[i] != buf[i]) {
fprintf(stderr, "verification failed!\n");
fprintf(stderr, "buf[%d] == %x\n", i, buf[i]);
fprintf(stderr, "verify_buf[%d] == %x\n", i, verify_buf[i]);
return -1;
}
printf("Write verification successful!\n");
free(buf);
close(fd);
return 0;
}# Makefile
HERMIT_LOCAL_INSTALL=/home/pierre/Desktop/HermitCore/prefix/
HERMIT_TOOLCHAIN_INSTALL=/opt/hermit/
READ_TEST=read_test
WRITE_TEST=write_test
READ_SAMPLE_FILE=fread.bin
WRITE_SAMPLE_FILE=fwrite.bin
READ_TEST_SRC=$(READ_TEST).c
WRITE_TEST_SRC=$(WRITE_TEST).c
CFLAGS=-Wall -Werror
LDFLAGS=-L$(HERMIT_LOCAL_INSTALL)/x86_64-hermit/lib
CC=$(HERMIT_TOOLCHAIN_INSTALL)/bin/x86_64-hermit-gcc
PROXY=$(HERMIT_LOCAL_INSTALL)/bin/proxy
VERBOSE?=0
all: $(READ_TEST) $(WRITE_TEST)
$(READ_TEST): $(READ_TEST_SRC) $(READ_SAMPLE_FILE)
$(CC) $(CFLAGS) $(READ_TEST_SRC) -o $(READ_TEST) $(LDFLAGS)
$(WRITE_TEST): $(WRITE_TEST_SRC)
$(CC) $(CFLAGS) $(WRITE_TEST_SRC) -o $(WRITE_TEST) $(LDFLAGS)
$(READ_SAMPLE_FILE):
dd if=/dev/urandom of=$(READ_SAMPLE_FILE) bs=4K count=4
test: $(READ_TEST) $(WRITE_TEST)
HERMIT_ISLE=uhyve HERMIT_VERBOSE=$(VERBOSE) $(PROXY) $(READ_TEST)
HERMIT_ISLE=uhyve HERMIT_VERBOSE=$(VERBOSE) $(PROXY) $(WRITE_TEST)
clean:
rm -rf $(READ_TEST) $(WRITE_TEST) *.o $(READ_SAMPLE_FILE) \
$(WRITE_SAMPLE_FILE)Here is my proposed solution: a patch that forces the file read and write operations to be performed on a page-by-page basis, i.e. Uhyve file read or write operation is called for each page composing the buffer. I did not extensively tested it, nor did I evaluated the performance impact (I guess it slows things down a bit).
diff --git a/kernel/syscall.c b/kernel/syscall.c
index d10691d..f7b7b00 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -159,6 +159,47 @@ typedef struct {
ssize_t ret;
} __attribute__((packed)) uhyve_read_t;
+/* Pages belonging to the heap are mapped on demand, and are not always
+ * contiguous in physical memory. Thus, we need to force allocation and call
+ * the hypervisor file read function page by page.
+ */
+ssize_t uhyve_noncontiguous_read(int fd, char *buf, size_t len) {
+ ssize_t bytes_read = 0;
+ size_t cur_len = 0;
+ uhyve_read_t arg = {fd, 0x0, 0x0, -1};
+
+ while(len) {
+
+ if(!((size_t)buf % PAGE_SIZE)) {
+ cur_len = (len < PAGE_SIZE) ? len : PAGE_SIZE;
+ } else {
+ cur_len = PAGE_CEIL((size_t)buf) - (size_t)buf;
+ cur_len = (((size_t)buf + len) > PAGE_CEIL((size_t)buf))
+ ? (PAGE_CEIL((size_t)buf) - (size_t)buf) : len;
+ }
+
+ /* Force mapping */
+ memset(buf, 0x0, 1);
+
+ arg.buf = (char *)virt_to_phys((size_t)buf);
+ arg.len = cur_len;
+ arg.ret = -1;
+ uhyve_send(UHYVE_PORT_READ, (unsigned)virt_to_phys((size_t)&arg));
+
+ if(arg.ret < 0)
+ return arg.ret;
+
+ bytes_read += arg.ret;
+ if(arg.ret != cur_len)
+ break;
+
+ buf += cur_len;
+ len -= cur_len;
+ }
+
+ return bytes_read;
+}
+
ssize_t sys_read(int fd, char* buf, size_t len)
{
sys_read_t sysargs = {__NR_read, fd, len};
@@ -174,11 +215,16 @@ ssize_t sys_read(int fd, char* buf, size_t len)
}
if (is_uhyve()) {
+
+ return uhyve_noncontiguous_read(fd, buf, len);
+
+#if 0
uhyve_read_t uhyve_args = {fd, (char*) virt_to_phys((size_t) buf), len, -1};
uhyve_send(UHYVE_PORT_READ, (unsigned)virt_to_phys((size_t)&uhyve_args));
return uhyve_args.ret;
+#endif
}
spinlock_irqsave_lock(&lwip_lock);
@@ -222,6 +268,39 @@ typedef struct {
size_t len;
} __attribute__((packed)) uhyve_write_t;
+ssize_t uhyve_noncontiguous_write(int fd, const char *buf, size_t len) {
+ char *cur_buf = (char *)buf;
+ ssize_t bytes_written = 0;
+ size_t cur_len = 0;
+ uhyve_write_t arg = {fd, 0x0, 0x0};
+
+ while(len) {
+
+ if(!((size_t)cur_buf % PAGE_SIZE)) {
+ cur_len = (len < PAGE_SIZE) ? len : PAGE_SIZE;
+ } else {
+ cur_len = (((size_t)cur_buf + len) > PAGE_CEIL((size_t)cur_buf))
+ ? (PAGE_CEIL((size_t)cur_buf) - (size_t)cur_buf) : len;
+ }
+
+ arg.buf = (char *)virt_to_phys((size_t)cur_buf);
+ arg.len = cur_len;
+ uhyve_send(UHYVE_PORT_WRITE, (unsigned)virt_to_phys((size_t)&arg));
+
+ if(arg.len == (size_t)(-1))
+ return -1;
+
+ bytes_written += arg.len;
+ if(arg.len != cur_len)
+ break;
+
+ cur_buf += cur_len;
+ len -= cur_len;
+ }
+
+ return bytes_written;
+}
+
ssize_t sys_write(int fd, const char* buf, size_t len)
{
if (BUILTIN_EXPECT(!buf, 0))
@@ -240,11 +319,16 @@ ssize_t sys_write(int fd, const char* buf, size_t len)
}
if (is_uhyve()) {
+
+ return uhyve_noncontiguous_write(fd, buf, len);
+
+#if 0
uhyve_write_t uhyve_args = {fd, (const char*) virt_to_phys((size_t) buf), len};
uhyve_send(UHYVE_PORT_WRITE, (unsigned)virt_to_phys((size_t)&uhyve_args));
return uhyve_args.len;
+#endif
}
spinlock_irqsave_lock(&lwip_lock);