Skip to content
This repository was archived by the owner on Nov 24, 2021. It is now read-only.
This repository was archived by the owner on Nov 24, 2021. It is now read-only.

Uhyve file read/write should not assume target buffer is contiguous in physical memory #86

@olivierpierre

Description

@olivierpierre

Hello,

With Uhyve, when performing file read/write operations in the host, the target buffer is fully read or written from the guest physical memory. Such a design assumes that the buffer is contiguous in physical memory, which is not always the case. When using a buffer allocated with malloc, example of bad things that can happen are:

  • Reading from a file in a buffer that is not or partially mapped to physical memory (because of on-demand mapping)
  • Reading from a file in a buffer which virtual pages are not mapped to contiguous physical pages
  • Writing in a file from a buffer which virtual pages are not mapped to contiguous physical pages

You can confirm these issues with these two sample programs and the corresponding Makefile:

/* read_test.c */

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>

#define PAGE_SIZE		4096
#define FILE_TO_READ	"fread.bin"
#define FILE_SIZE		PAGE_SIZE * 4

char verify_buf[FILE_SIZE];

int main(int argc, char **argv) {
	int fd, verify_fd, i;
	char *buf;

	printf("Read test starting\n");

	buf = malloc(FILE_SIZE);
	if(!buf) {
		fprintf(stderr, "error malloc!\n");
		return -1;
	}

	verify_fd = open(FILE_TO_READ, O_RDONLY, 0x0);
	fd = open(FILE_TO_READ, O_RDONLY, 0x0);
	if(fd == -1 || verify_fd == -1) {
		printf("error open\n");
		return -1;
	}

	if(read(verify_fd, verify_buf, FILE_SIZE) != FILE_SIZE) {
		fprintf(stderr, "read error\n");
		return -1;
	}

	if(read(fd, buf, FILE_SIZE) != FILE_SIZE) {
		fprintf(stderr, "read error\n");
		return -1;
	}
	printf("Read in malloc'd buffer done\n");

	for(i=0; i<FILE_SIZE; i++)
		if(buf[i] != verify_buf[i]) {
			fprintf(stderr, "verification failed!\n");
			fprintf(stderr, "buf[%d] == %x\n", i, buf[i]);
			fprintf(stderr, "verify_buf[%d] == %x\n", i, verify_buf[i]);
			return -1;
		}

	printf("Read verification successful!\n");

	free(buf);
	close(fd);
	return 0;
}
/* write_test.c */

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>

#define PAGE_SIZE		4096
#define FILE_SIZE		PAGE_SIZE * 4
#define FILE_TO_WRITE	"fwrite.bin"

char verify_buf[FILE_SIZE];

int main(int argc, char **argv) {
	char *buf;
	int fd, i;

	printf("write test starting\n");

	buf = malloc(FILE_SIZE);
	if(!buf) {
		fprintf(stderr, "error malloc\n");
		return -1;
	}

	fd = open(FILE_TO_WRITE, O_RDWR | O_CREAT | O_TRUNC, 0666);
	if(fd == -1) {
		fprintf(stderr, "error opening %s\n", FILE_TO_WRITE);
		return -1;
	}

	for(i=0; i<FILE_SIZE; i++)
		buf[i] = (i/4096) + '0';

	if(write(fd, buf, FILE_SIZE) != FILE_SIZE) {
		fprintf(stderr, "error writing in %s", FILE_TO_WRITE);
		return -1;
	}
	printf("Write from malloc'd buffer done\n");

	if(lseek(fd, 0x0, SEEK_SET) != 0) {
		fprintf(stderr, "error lseek\n");
		return -1;
	}

	if(read(fd, verify_buf, FILE_SIZE) != FILE_SIZE) {
		fprintf(stderr, "error read\n");
		return -1;
	}

	for(i=0; i<FILE_SIZE; i++)
		if(verify_buf[i] != buf[i]) {
			fprintf(stderr, "verification failed!\n");
			fprintf(stderr, "buf[%d] == %x\n", i, buf[i]);
			fprintf(stderr, "verify_buf[%d] == %x\n", i, verify_buf[i]);
			return -1;
		}

	printf("Write verification successful!\n");

	free(buf);
	close(fd);
	return 0;
}
# Makefile

HERMIT_LOCAL_INSTALL=/home/pierre/Desktop/HermitCore/prefix/
HERMIT_TOOLCHAIN_INSTALL=/opt/hermit/
READ_TEST=read_test
WRITE_TEST=write_test
READ_SAMPLE_FILE=fread.bin
WRITE_SAMPLE_FILE=fwrite.bin
READ_TEST_SRC=$(READ_TEST).c
WRITE_TEST_SRC=$(WRITE_TEST).c
CFLAGS=-Wall -Werror
LDFLAGS=-L$(HERMIT_LOCAL_INSTALL)/x86_64-hermit/lib
CC=$(HERMIT_TOOLCHAIN_INSTALL)/bin/x86_64-hermit-gcc
PROXY=$(HERMIT_LOCAL_INSTALL)/bin/proxy
VERBOSE?=0

all: $(READ_TEST) $(WRITE_TEST)

$(READ_TEST): $(READ_TEST_SRC) $(READ_SAMPLE_FILE)
	$(CC) $(CFLAGS) $(READ_TEST_SRC) -o $(READ_TEST) $(LDFLAGS)

$(WRITE_TEST): $(WRITE_TEST_SRC)
	$(CC) $(CFLAGS) $(WRITE_TEST_SRC) -o $(WRITE_TEST) $(LDFLAGS)

$(READ_SAMPLE_FILE):
	dd if=/dev/urandom of=$(READ_SAMPLE_FILE) bs=4K count=4

test: $(READ_TEST) $(WRITE_TEST)
	HERMIT_ISLE=uhyve HERMIT_VERBOSE=$(VERBOSE) $(PROXY) $(READ_TEST)
	HERMIT_ISLE=uhyve HERMIT_VERBOSE=$(VERBOSE) $(PROXY) $(WRITE_TEST)

clean:
	rm -rf $(READ_TEST) $(WRITE_TEST) *.o $(READ_SAMPLE_FILE) \
		$(WRITE_SAMPLE_FILE)

Here is my proposed solution: a patch that forces the file read and write operations to be performed on a page-by-page basis, i.e. Uhyve file read or write operation is called for each page composing the buffer. I did not extensively tested it, nor did I evaluated the performance impact (I guess it slows things down a bit).

diff --git a/kernel/syscall.c b/kernel/syscall.c
index d10691d..f7b7b00 100644
--- a/kernel/syscall.c
+++ b/kernel/syscall.c
@@ -159,6 +159,47 @@ typedef struct {
 	ssize_t ret;
 } __attribute__((packed)) uhyve_read_t;
 
+/* Pages belonging to the heap are mapped on demand, and are not always
+ * contiguous in physical memory. Thus, we need to force allocation and call
+ * the hypervisor file read function page by page.
+ */
+ssize_t uhyve_noncontiguous_read(int fd, char *buf, size_t len) {
+	ssize_t bytes_read = 0;
+	size_t cur_len = 0;
+	uhyve_read_t arg = {fd, 0x0, 0x0, -1};
+
+	while(len) {
+
+		if(!((size_t)buf % PAGE_SIZE)) {
+			cur_len = (len < PAGE_SIZE) ? len : PAGE_SIZE;
+		} else {
+			cur_len = PAGE_CEIL((size_t)buf) - (size_t)buf;
+			cur_len = (((size_t)buf + len) > PAGE_CEIL((size_t)buf))
+				? (PAGE_CEIL((size_t)buf) - (size_t)buf) : len;
+		}
+
+		/* Force mapping */
+		memset(buf, 0x0, 1);
+
+		arg.buf = (char *)virt_to_phys((size_t)buf);
+		arg.len = cur_len;
+		arg.ret = -1;
+		uhyve_send(UHYVE_PORT_READ, (unsigned)virt_to_phys((size_t)&arg));
+
+		if(arg.ret < 0)
+			return arg.ret;
+
+		bytes_read += arg.ret;
+		if(arg.ret != cur_len)
+			break;
+
+		buf += cur_len;
+		len -= cur_len;
+	}
+
+	return bytes_read;
+}
+
 ssize_t sys_read(int fd, char* buf, size_t len)
 {
 	sys_read_t sysargs = {__NR_read, fd, len};
@@ -174,11 +215,16 @@ ssize_t sys_read(int fd, char* buf, size_t len)
 	}
 
 	if (is_uhyve()) {
+
+		return uhyve_noncontiguous_read(fd, buf, len);
+
+#if 0
 		uhyve_read_t uhyve_args = {fd, (char*) virt_to_phys((size_t) buf), len, -1};
 
 		uhyve_send(UHYVE_PORT_READ, (unsigned)virt_to_phys((size_t)&uhyve_args));
 
 		return uhyve_args.ret;
+#endif
 	}
 
 	spinlock_irqsave_lock(&lwip_lock);
@@ -222,6 +268,39 @@ typedef struct {
 	size_t len;
 } __attribute__((packed)) uhyve_write_t;
 
+ssize_t uhyve_noncontiguous_write(int fd, const char *buf, size_t len) {
+	char *cur_buf = (char *)buf;
+	ssize_t bytes_written = 0;
+	size_t cur_len = 0;
+	uhyve_write_t arg = {fd, 0x0, 0x0};
+
+	while(len) {
+
+		if(!((size_t)cur_buf % PAGE_SIZE)) {
+			cur_len = (len < PAGE_SIZE) ? len : PAGE_SIZE;
+		} else {
+			cur_len = (((size_t)cur_buf + len) > PAGE_CEIL((size_t)cur_buf))
+				? (PAGE_CEIL((size_t)cur_buf) - (size_t)cur_buf) : len;
+		}
+
+		arg.buf = (char *)virt_to_phys((size_t)cur_buf);
+		arg.len = cur_len;
+		uhyve_send(UHYVE_PORT_WRITE, (unsigned)virt_to_phys((size_t)&arg));
+
+		if(arg.len == (size_t)(-1))
+			return -1;
+
+		bytes_written += arg.len;
+		if(arg.len != cur_len)
+			break;
+
+		cur_buf += cur_len;
+		len -= cur_len;
+	}
+
+	return bytes_written;
+}
+
 ssize_t sys_write(int fd, const char* buf, size_t len)
 {
 	if (BUILTIN_EXPECT(!buf, 0))
@@ -240,11 +319,16 @@ ssize_t sys_write(int fd, const char* buf, size_t len)
 	}
 
 	if (is_uhyve()) {
+
+		return uhyve_noncontiguous_write(fd, buf, len);
+
+#if 0
 		uhyve_write_t uhyve_args = {fd, (const char*) virt_to_phys((size_t) buf), len};
 
 		uhyve_send(UHYVE_PORT_WRITE, (unsigned)virt_to_phys((size_t)&uhyve_args));
 
 		return uhyve_args.len;
+#endif
 	}
 
 	spinlock_irqsave_lock(&lwip_lock);

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions