Skip to content

Commit fab5207

Browse files
authored
faster filesize and read(::IOStream) (#35925)
1 parent 853fe04 commit fab5207

File tree

4 files changed

+67
-17
lines changed

4 files changed

+67
-17
lines changed

base/filesystem.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ import .Base:
4242
IOError, _UVError, _sizeof_uv_fs, check_open, close, eof, eventloop, fd, isopen,
4343
bytesavailable, position, read, read!, readavailable, seek, seekend, show,
4444
skip, stat, unsafe_read, unsafe_write, write, transcode, uv_error,
45-
rawhandle, OS_HANDLE, INVALID_OS_HANDLE, windowserror
45+
rawhandle, OS_HANDLE, INVALID_OS_HANDLE, windowserror, filesize
4646

4747
import .Base.RefValue
4848

base/iostream.jl

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,12 @@ function position(s::IOStream)
219219
return pos
220220
end
221221

222+
function filesize(s::IOStream)
223+
sz = @_lock_ios s ccall(:ios_filesize, Int64, (Ptr{Cvoid},), s.ios)
224+
systemerror("filesize", sz == -1)
225+
return sz
226+
end
227+
222228
_eof_nolock(s::IOStream) = ccall(:ios_eof_blocking, Cint, (Ptr{Cvoid},), s.ios) != 0
223229
eof(s::IOStream) = @_lock_ios s _eof_nolock(s)
224230

@@ -441,9 +447,10 @@ function readbytes_all!(s::IOStream, b::Array{UInt8}, nb)
441447
lb = max(65536, (nr+1) * 2)
442448
resize!(b, lb)
443449
end
444-
nr += Int(ccall(:ios_readall, Csize_t, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t),
445-
s.ios, pointer(b, nr+1), min(lb-nr, nb-nr)))
446-
_eof_nolock(s) && break
450+
thisr = Int(ccall(:ios_readall, Csize_t, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t),
451+
s.ios, pointer(b, nr+1), min(lb-nr, nb-nr)))
452+
nr += thisr
453+
(nr == nb || thisr == 0 || _eof_nolock(s)) && break
447454
end
448455
end
449456
if lb > olb && lb > nr
@@ -486,21 +493,33 @@ function readbytes!(s::IOStream, b::Array{UInt8}, nb=length(b); all::Bool=true)
486493
end
487494

488495
function read(s::IOStream)
489-
sz = try # filesize is just a hint, so ignore if `fstat` fails
490-
filesize(s)
491-
catch ex
492-
ex isa IOError || rethrow()
493-
Int64(0)
494-
end
495-
if sz > 0
496-
pos = position(s)
497-
if pos > 0
498-
sz -= pos
496+
# First we try to fill the buffer. If that gives us the whole file,
497+
# copy it out and return. Otherwise look at the file size and use it
498+
# to prealloate space. Determining the size requires extra syscalls,
499+
# which we want to avoid for small files.
500+
@_lock_ios s begin
501+
nb = ccall(:ios_fillbuf, Cssize_t, (Ptr{Cvoid},), s.ios)
502+
if nb != -1
503+
b = StringVector(nb)
504+
readbytes_all!(s, b, nb)
505+
else
506+
sz = try # filesize is just a hint, so ignore if it fails
507+
filesize(s)
508+
catch ex
509+
ex isa IOError || rethrow()
510+
Int64(-1)
511+
end
512+
if sz > 0
513+
pos = position(s)
514+
if pos > 0
515+
sz -= pos
516+
end
517+
end
518+
b = StringVector(sz < 0 ? 1024 : sz)
519+
nr = readbytes_all!(s, b, sz < 0 ? typemax(Int) : sz)
520+
resize!(b, nr)
499521
end
500522
end
501-
b = StringVector(sz <= 0 ? 1024 : sz)
502-
nr = readbytes_all!(s, b, typemax(Int))
503-
resize!(b, nr)
504523
return b
505524
end
506525

src/support/ios.c

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,17 @@ size_t ios_readprep(ios_t *s, size_t n)
367367
return (size_t)(s->size - s->bpos);
368368
}
369369

370+
// attempt to fill the buffer. returns the number of bytes available if we
371+
// have read the whole file, or -1 if there might be more data.
372+
ssize_t ios_fillbuf(ios_t *s)
373+
{
374+
size_t nb = s->maxsize - s->bpos;
375+
size_t got = ios_readprep(s, nb);
376+
if (got < nb)
377+
return (ssize_t)got;
378+
return -1;
379+
}
380+
370381
static void _write_update_pos(ios_t *s)
371382
{
372383
if (s->bpos > s->ndirty) s->ndirty = s->bpos;
@@ -535,6 +546,22 @@ int64_t ios_pos(ios_t *s)
535546
return fdpos;
536547
}
537548

549+
int64_t ios_filesize(ios_t *s)
550+
{
551+
if (s->fd == -1)
552+
return -1;
553+
int64_t fdpos = s->fpos;
554+
if (fdpos == (int64_t)-1) {
555+
fdpos = lseek(s->fd, 0, SEEK_CUR);
556+
if (fdpos == (int64_t)-1)
557+
return fdpos;
558+
s->fpos = fdpos;
559+
}
560+
off_t sz = lseek(s->fd, 0, SEEK_END);
561+
lseek(s->fd, (off_t)fdpos, SEEK_SET);
562+
return sz;
563+
}
564+
538565
int ios_trunc(ios_t *s, size_t size)
539566
{
540567
if (s->bm == bm_mem) {
@@ -936,6 +963,7 @@ ios_t *ios_file(ios_t *s, const char *fname, int rd, int wr, int create, int tru
936963
goto open_file_err;
937964

938965
s = ios_fd(s, fd, 1, 1);
966+
s->fpos = 0;
939967
if (!rd)
940968
s->readable = 0;
941969
if (!wr)

src/support/ios.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ JL_DLLEXPORT int64_t ios_seek(ios_t *s, int64_t pos) JL_NOTSAFEPOINT; // absolut
8989
JL_DLLEXPORT int64_t ios_seek_end(ios_t *s) JL_NOTSAFEPOINT;
9090
JL_DLLEXPORT int64_t ios_skip(ios_t *s, int64_t offs); // relative seek
9191
JL_DLLEXPORT int64_t ios_pos(ios_t *s) JL_NOTSAFEPOINT; // get current position
92+
JL_DLLEXPORT int64_t ios_filesize(ios_t *s);
9293
JL_DLLEXPORT int ios_trunc(ios_t *s, size_t size) JL_NOTSAFEPOINT;
9394
JL_DLLEXPORT int ios_eof(ios_t *s);
9495
JL_DLLEXPORT int ios_eof_blocking(ios_t *s);
@@ -108,6 +109,8 @@ JL_DLLEXPORT size_t ios_copyuntil(ios_t *to, ios_t *from, char delim) JL_NOTSAFE
108109
JL_DLLEXPORT size_t ios_nchomp(ios_t *from, size_t ntowrite);
109110
// ensure at least n bytes are buffered if possible. returns # available.
110111
JL_DLLEXPORT size_t ios_readprep(ios_t *from, size_t n);
112+
// fill the buffer and determine whether it contains the whole rest of the file
113+
JL_DLLEXPORT ssize_t ios_fillbuf(ios_t *s);
111114

112115
/* stream creation */
113116
JL_DLLEXPORT

0 commit comments

Comments
 (0)