Skip to content

Commit 6a5e7f7

Browse files
committed
Add use_cache option to create_files
1 parent 3ba0fa6 commit 6a5e7f7

File tree

2 files changed

+205
-69
lines changed

2 files changed

+205
-69
lines changed

src/fileio.jl

Lines changed: 125 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,79 @@
11
# This file is a part of ParallelProcessingTools.jl, licensed under the MIT License (MIT).
22

3+
"""
4+
ParallelProcessingTools.split_basename_ext(file_basename_with_ext::AbstractString)
5+
6+
Splits a filename (given without its directory path) into a basename without
7+
file extension and the file extension. Returns a tuple `(basename_noext, ext)`.
8+
9+
Example:
10+
11+
```
12+
ParallelProcessingTools.split_basename_ext("myfile.tar.gz") == ("myfile", ".tar.gz")
13+
```
14+
"""
15+
function split_basename_ext(bn_ext::AbstractString)
16+
ext_startpos = findfirst('.', bn_ext)
17+
bn, ext = isnothing(ext_startpos) ? (bn_ext, "") : (bn_ext[1:ext_startpos-1], bn_ext[ext_startpos:end])
18+
return bn, ext
19+
end
20+
321

422
"""
523
ParallelProcessingTools.tmp_filename(fname::AbstractString)
24+
ParallelProcessingTools.tmp_filename(fname::AbstractString, dir::AbstractString)
25+
26+
Returns a temporary filename, based on `fname`.
627
7-
Returns a temporary filename, based on `fname`, in the same directory.
28+
By default, the temporary filename is in the same directory as `fname`,
29+
otherwise in `dir`.
830
9-
Does *not* create the temporary file.
31+
Does *not* create the temporary file, only returns the filename (including
32+
directory path).
1033
"""
11-
function tmp_filename(fname::AbstractString)
12-
d, fn, ext = _split_dir_fn_ext(fname)
34+
function tmp_filename end
35+
36+
function tmp_filename(fname::AbstractString, dir::AbstractString)
37+
bn_ext = basename(fname)
38+
bn, ext = split_basename_ext(bn_ext)
1339
tag = _rand_fname_tag()
14-
joinpath(d, "$(fn)_$(tag)$(ext)")
15-
end
16-
17-
function _split_dir_fn_ext(fname::AbstractString)
18-
d = dirname(fname)
19-
f = basename(fname)
20-
ext_startpos = findfirst('.', f)
21-
fn, ext = isnothing(ext_startpos) ? (f, "") : (f[1:ext_startpos-1], f[ext_startpos:end])
22-
return d, fn, ext
40+
joinpath(dir, "$(bn)_$(tag)$(ext)")
2341
end
2442

43+
tmp_filename(fname::AbstractString) = tmp_filename(fname, dirname(fname))
44+
2545
_rand_fname_tag() = String(rand(b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", 8))
2646

2747

2848
"""
2949
function create_files(
30-
body, filenames::AbstractString...;
31-
create_dirs::Bool = true, overwrite::Bool = true, delete_on_error::Bool=true
50+
f_write, filenames::AbstractString...;
51+
create_dirs::Bool = true, overwrite::Bool = true, delete_on_error::Bool=true,
52+
use_cache::Bool = false, cache_dir::AbstractString = tempdir(),
53+
verbose::Bool = true
3254
)
3355
34-
Creates `filenames` in an atomic fashion.
56+
Creates `filenames` in an atomic fashion via a user-provided function
57+
`f_write`. Returns `nothing`.
58+
59+
Using temporary filenames, calls `f_write(temporary_filenames...)`. If
60+
`f_write` doesn't throw an exception, the files `temporary_filenames` are
61+
renamed to `filenames`. If `f_write` throws an exception, the temporary files
62+
are either deleted (if `delete_on_error` is `true`) or left in place (e.g. for
63+
debugging purposes).
3564
36-
Creates temporary files in the same directories as `filenames`, then
37-
calls `body(temporary_filenames...)`. If `body` returns successfully,
38-
the files `temporary_filenames` are renamed to `filenames`. If `body` throws
39-
an exception, the temporary files are either deleted (if `delete_on_error` is
40-
`true`) or left in place (e.g. for debugging purposes).
65+
If `create_dirs` is `true`, the `temporary_filenames` are created in
66+
`cache_dir` and then atomically moved to `filenames`, otherwise, they are
67+
created next to `filenames` (in the same directories).
4168
4269
If `create_dirs` is `true`, directories are created if necessary.
4370
44-
If all of files already exist and `overwrite` is `false`, takes no action
45-
(or, if the file is created by other code running in parallel, while `body` is
46-
running, does not overwrite it).
71+
If all of `filenames` already exist and `overwrite` is `false`, takes no
72+
action (or, on case the files are created by other code running in parallel,
73+
while `f_write` is running, does not replace them).
74+
75+
If `verbose` is `true`, uses log-level `Logging.Info` to log file creation,
76+
otherwise `Logging.Debug`.
4777
4878
Throws an error if only some of the files exist and `overwrite` is `false`.
4979
@@ -52,82 +82,117 @@ Returns `nothing`.
5282
Example:
5383
5484
```julia
55-
create_files("foo.txt", "bar.txt") do foo, bar
85+
create_files("foo.txt", "bar.txt", use_cache = true) do foo, bar
5686
write(foo, "Hello")
5787
write(bar, "World")
5888
end
5989
```
90+
91+
Set `ENV["JULIA_DEBUG"] = "ParallelProcessingTools"` to see a log of all
92+
intermediate steps.
93+
94+
On Linux you can set `use_cache = true` and `cache_dir = "/dev/shm"` to use
95+
the default Linux RAM disk as an intermediate directory.
6096
"""
6197
function create_files(
62-
body, filenames::AbstractString...;
63-
create_dirs::Bool = true, overwrite::Bool = true, delete_on_error::Bool=true
98+
@nospecialize(f_write), @nospecialize(filenames::AbstractString...);
99+
create_dirs::Bool = true, overwrite::Bool = true, delete_on_error::Bool=true,
100+
use_cache::Bool = false, cache_dir::AbstractString = tempdir(),
101+
verbose::Bool = true
64102
)
65-
tmp_filenames = String[]
66-
completed_filenames = String[]
103+
loglevel = verbose ? Info : Debug
104+
105+
target_fnames = String[filenames...] # Fix type
106+
staging_fnames = String[]
107+
writeto_fnames = String[]
108+
completed_fnames = String[]
67109

68-
pre_existing = isfile.(filenames)
110+
pre_existing = isfile.(target_fnames)
69111
if any(pre_existing)
70112
if all(pre_existing)
71113
if !overwrite
72-
@info "Files $filenames already exist, nothing to do."
114+
@logmsg loglevel "Files $target_fnames already exist, nothing to do."
73115
return nothing
74116
end
75117
else
76-
!overwrite && throw(ErrorException("Only some of $filenames exist but not allowed to overwrite"))
118+
!overwrite && throw(ErrorException("Only some of $target_fnames exist but not allowed to overwrite"))
77119
end
78120
end
79121

80-
dirs = dirname.(filenames)
81-
for dir in dirs
82-
if !isdir(dir) && create_dirs
83-
mkpath(dir)
84-
@info "Created directory $dir."
122+
dirs = dirname.(target_fnames)
123+
if create_dirs
124+
for dir in dirs
125+
if !isdir(dir) && create_dirs
126+
mkpath(dir)
127+
@logmsg loglevel "Created directory $dir."
128+
end
129+
end
130+
131+
if use_cache && !isdir(cache_dir)
132+
mkpath(cache_dir)
133+
@logmsg loglevel "Created cache directory $cache_dir."
85134
end
86135
end
87136

88137
try
89-
for fname in filenames
90-
tmp_fname = tmp_filename(fname)
91-
@assert !isfile(tmp_fname)
92-
push!(tmp_filenames, tmp_fname)
93-
end
138+
staging_fnames = tmp_filename.(target_fnames)
139+
@assert !any(isfile, staging_fnames)
140+
141+
writeto_fnames = use_cache ? tmp_filename.(target_fnames, Ref(cache_dir)) : staging_fnames
142+
@assert !any(isfile, writeto_fnames)
94143

95-
body(tmp_filenames...)
144+
@debug "Creating intermediate files $writeto_fnames."
145+
f_write(writeto_fnames...)
96146

97-
post_body_existing = isfile.(filenames)
98-
if any(post_body_existing)
99-
if all(post_body_existing)
147+
post_f_write_existing = isfile.(target_fnames)
148+
if any(post_f_write_existing)
149+
if all(post_f_write_existing)
100150
if !overwrite
101-
@info "Files $filenames already exist, won't replace."
151+
@logmsg loglevel "Files $target_fnames already exist, won't replace."
102152
return nothing
103153
end
104154
else
105-
!overwrite && throw(ErrorException("Only some of $filenames exist but not allowed to replace files"))
155+
!overwrite && throw(ErrorException("Only some of $target_fnames exist but not allowed to replace files"))
106156
end
107157
end
108-
158+
109159
try
110-
for (tmp_fname, fname) in zip(tmp_filenames, filenames)
111-
mv(tmp_fname, fname; force=true)
112-
@assert isfile(fname)
113-
push!(completed_filenames, fname)
160+
if use_cache
161+
for (writeto_fn, staging_fn) in zip(writeto_fnames, staging_fnames)
162+
@assert writeto_fn != staging_fn
163+
@debug "Moving file \"$writeto_fn\" to \"$staging_fn\"."
164+
isfile(writeto_fn) || error("Expected file \"$writeto_fn\" to exist, but it doesn't.")
165+
mv(writeto_fn, staging_fn; force=true)
166+
isfile(staging_fn) || error("Tried to move file \"$writeto_fn\" to \"$staging_fn\", but \"$staging_fn\" doesn't exist.")
167+
end
168+
end
169+
for (staging_fn, target_fn) in zip(staging_fnames, target_fnames)
170+
@assert staging_fn != target_fn
171+
@debug "Renaming file \"$staging_fn\" to \"$target_fn\"."
172+
isfile(staging_fn) || error("Expected file \"$staging_fn\" to exist, but it doesn't.")
173+
mv(staging_fn, target_fn; force=true)
174+
isfile(target_fn) || error("Tried to rename file \"$staging_fn\" to \"$target_fn\", but \"$target_fn\" doesn't exist.")
175+
push!(completed_fnames, target_fn)
114176
end
115-
@info "Successfully created files $filenames."
177+
@logmsg loglevel "Created files $target_fnames."
116178
catch
117-
if !isempty(completed_filenames)
118-
@error "Failed to rename some temporary files to final filenames, removing $completed_filenames"
119-
for fname in completed_filenames
179+
if !isempty(completed_fnames)
180+
@error "Failed to rename some temporary files to final filenames, removing $completed_fnames"
181+
for fname in completed_fnames
120182
rm(fname; force=true)
121183
end
122184
end
123185
rethrow()
124186
end
125187

126-
@assert all(fn -> !isfile(fn), tmp_filenames)
188+
@assert all(fn -> !isfile(fn), staging_fnames)
127189
finally
128190
if delete_on_error
129-
for tmp_fname in tmp_filenames
130-
isfile(tmp_fname) && rm(tmp_fname; force=true);
191+
for writeto_fn in writeto_fnames
192+
isfile(writeto_fn) && rm(writeto_fn; force=true);
193+
end
194+
for staging_fn in staging_fnames
195+
isfile(staging_fn) && rm(staging_fn; force=true);
131196
end
132197
end
133198
end

test/test_fileio.jl

Lines changed: 80 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,91 @@
33
using Test
44
using ParallelProcessingTools
55

6+
using ParallelProcessingTools: split_basename_ext, tmp_filename
7+
8+
old_julia_debug = get(ENV, "JULIA_DEBUG", "")
9+
ENV["JULIA_DEBUG"] = old_julia_debug * ",ParallelProcessingTools"
10+
611

712
@testset "fileio" begin
8-
mktempdir() do dir
9-
data1 = "Hello"
10-
data2 = "World"
13+
@testset "split_basename_ext" begin
14+
@test @inferred(split_basename_ext("foo_bar baz.tar.gz")) == ("foo_bar baz", ".tar.gz")
15+
end
1116

12-
fn1 = joinpath(dir, "hello.txt")
13-
fn2 = joinpath(dir, "world.txt")
17+
@testset "tmp_filename" begin
18+
dir = joinpath("foo", "bar")
19+
tmpdir = joinpath(tempdir(), "somedir")
20+
bn = "test.tar.gz"
21+
fn = joinpath(dir, bn)
1422

15-
create_files(fn1, fn2) do fn1, fn2
16-
write(fn1, data1)
17-
write(fn2, data2)
23+
@test @inferred(tmp_filename(fn)) isa AbstractString
24+
let tmpfn = @inferred tmp_filename(fn)
25+
@test dirname(tmpfn) == dir
26+
tmp_bn, tmp_ex = split_basename_ext(basename(tmpfn))
27+
@test startswith(tmp_bn, "test_")
28+
@test tmp_ex == ".tar.gz"
1829
end
1930

20-
@test read(fn1, String) == data1 && read(fn2, String) == data2
31+
@test @inferred(tmp_filename(fn, tmpdir)) isa AbstractString
32+
let tmpfn = @inferred tmp_filename(fn, tmpdir)
33+
@test dirname(tmpfn) == tmpdir
34+
tmp_bn, tmp_ex = split_basename_ext(basename(tmpfn))
35+
@test startswith(tmp_bn, "test_")
36+
@test tmp_ex == ".tar.gz"
37+
end
38+
end
39+
40+
for use_cache in [false, true]
41+
@testset "create_files" begin
42+
mktempdir() do dir
43+
data1 = "Hello"
44+
data2 = "World"
45+
46+
fn1 = joinpath(dir, "targetdir", "hello.txt")
47+
fn2 = joinpath(dir, "targetdir", "world.txt")
48+
49+
# Target directory does not exist yet:
50+
try
51+
# Will not create missing target directory:
52+
create_files(fn1, fn2, use_cache = use_cache, create_dirs = false, verbose = true) do fn1, fn2
53+
write(fn1, data1); write(fn2, data2)
54+
end
55+
@test false # Should have thrown an exception
56+
catch err
57+
@test err isa SystemError || err isa Base.IOError
58+
end
59+
60+
# Test atomicity, fail in between writing files:
61+
@test_throws ErrorException create_files(fn1, fn2, use_cache = use_cache, verbose = true) do fn1, fn2
62+
write(fn1, data1)
63+
error("Some error")
64+
write(fn2, data2)
65+
end
66+
@test !isfile(fn1) && !isfile(fn2)
67+
68+
# Will create:
69+
create_files(fn1, fn2, use_cache = use_cache, verbose = true) do fn1, fn2
70+
write(fn1, data1); write(fn2, data2)
71+
end
72+
@test read(fn1, String) == data1 && read(fn2, String) == data2
73+
74+
# Modify the target files:
75+
write(fn1, "dummy content"); write(fn2, "dummy content");
76+
77+
# Wont't overwrite:
78+
create_files(fn1, fn2, use_cache = use_cache, overwrite = false, verbose = true) do fn1, fn2
79+
write(fn1, data1); write(fn2, data2)
80+
end
81+
@test read(fn1, String) != data1 && read(fn2, String) != data2
82+
83+
# Will overwrite:
84+
create_files(fn1, fn2, use_cache = use_cache, verbose = true) do fn1, fn2
85+
write(fn1, data1); write(fn2, data2)
86+
end
87+
@test read(fn1, String) == data1 && read(fn2, String) == data2
88+
end
89+
end
2190
end
2291
end
92+
93+
ENV["JULIA_DEBUG"] = old_julia_debug; nothing

0 commit comments

Comments
 (0)