Skip to content

Commit 9fb14af

Browse files
authored
FasterAI (#151)
* Add dataset recipes and dataset registry * Add fastai dataset registry with some recipes * move `typify` helepr * add learning method registry * add test for `ImageSegmentationFolders` recipe * add missing `mockblock` for `OneHotMulti` * update learning methods * add convenience `plotpredictions` method * Add multi-label recipe * add some tests for query functions
1 parent 777b6bf commit 9fb14af

File tree

11 files changed

+83
-35
lines changed

11 files changed

+83
-35
lines changed

CHANGELOG.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@ All notable changes to this project will be documented in this file.
44
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

7-
## [Unreleased] - 2020-03-08 – now
7+
## [Unreleased] - 2020-07-32 – now
88

99
### Added
10-
- `plotlrfind` to visualize results of `LRFinderPhase`
10+
- High-level API "FasterAI"
11+
- Find datasets and learning methods based on `Block`s: `finddataset`, `findlearningmethods`
12+
- `loaddataset` for quickly loading data containers from configured recipes
13+
- Data container recipes (`DatasetRecipe`, `loadrecipe`)
1114

12-
### Changed
13-
- Documentation notebooks to reflect changes in API
15+
### Changed

src/datablock/wrappers.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@ end
3434
Wrapper `Block` to attach a name to a block. Can be used in conjunction
3535
with [`Only`](#) to apply encodings to specific blocks only.
3636
"""
37-
struct Named{Name, B<:Block} <: WrapperBlock
37+
struct Named{Name, B<:AbstractBlock} <: WrapperBlock
3838
block::B
3939
end
40-
Named(name::Symbol, block::B) where {B<:Block} = Named{name, B}(block)
40+
Named(name::Symbol, block::B) where {B<:AbstractBlock} = Named{name, B}(block)
4141

4242

4343
# the name is preserved through encodings and decodings

src/datasets/fastairegistry.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11

22
const FASTAI_DATA_RECIPES = Dict{String, Vector{DatasetRecipe}}(
33
# Image classification datasets
4-
[name => [ImageClassificationFolders()] for name in (
4+
[name => [ImageFolders()] for name in (
55
"imagenette", "imagenette-160", "imagenette-320",
66
"imagenette2", "imagenette2-160", "imagenette2-320",
77
"imagewoof", "imagewoof-160", "imagewoof-320",
88
"imagewoof2", "imagewoof2-160", "imagewoof2-320",
99
)]...,
1010

1111
"camvid_tiny" => [ImageSegmentationFolders()],
12+
"pascal_2007" => [ImageTableMultiLabel()],
1213
)
1314

1415

src/datasets/recipes.jl

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ recipeblocks(::R) where {R<:DatasetRecipe} = recipeblocks(R)
4747
# ImageClfFolders
4848

4949
"""
50-
ImageClfFolders(; labelfn = parentname, split = false)
50+
ImageFolders(; labelfn = parentname, split = false)
5151
5252
Recipe for loading a single-label image classification dataset
5353
stored in a hierarchical folder format. If `split == true`, split
@@ -56,16 +56,16 @@ defaults to the name of the parent folder but a custom function can
5656
be passed as `labelfn`.
5757
5858
```julia
59-
julia> recipeblocks(ImageClassificationFolders)
59+
julia> recipeblocks(ImageFolders)
6060
Tuple{Image{2}, Label}
6161
```
6262
"""
63-
Base.@kwdef struct ImageClassificationFolders <: DatasetRecipe
63+
Base.@kwdef struct ImageFolders <: DatasetRecipe
6464
labelfn = parentname
6565
split::Bool = false
6666
end
6767

68-
function loadrecipe(recipe::ImageClassificationFolders, path)
68+
function loadrecipe(recipe::ImageFolders, path)
6969
isdir(path) || error("$path is not a directory")
7070
data = loadfolderdata(
7171
path,
@@ -81,7 +81,7 @@ function loadrecipe(recipe::ImageClassificationFolders, path)
8181
return data, blocks
8282
end
8383

84-
recipeblocks(::Type{ImageClassificationFolders}) = Tuple{Image{2}, Label}
84+
recipeblocks(::Type{ImageFolders}) = Tuple{Image{2}, Label}
8585

8686

8787
# ImageSegmentationFolders
@@ -124,3 +124,37 @@ function loadrecipe(recipe::ImageSegmentationFolders, path)
124124
end
125125

126126
recipeblocks(::Type{ImageSegmentationFolders}) = Tuple{Image{2}, Mask{2}}
127+
128+
# ImageTableMultiLabel
129+
130+
Base.@kwdef struct ImageTableMultiLabel <: DatasetRecipe
131+
csvfile::String = "train.csv"
132+
imagefolder::String = "train"
133+
filecol::Symbol = :fname
134+
labelcol::Symbol = :labels
135+
split::Bool = false
136+
splitcol::Symbol = :is_valid
137+
labelsep::String = " "
138+
end
139+
140+
141+
function loadrecipe(recipe::ImageTableMultiLabel, path)
142+
csvpath = joinpath(path, recipe.csvfile)
143+
isfile(csvpath) || error("File $csvpath does not exist")
144+
df = loadfile(csvpath)
145+
images = mapobs(f -> loadfile(joinpath(path, recipe.imagefolder, f)), df[:, recipe.filecol])
146+
labels = map(str -> split(str, recipe.labelsep), df[:,recipe.labelcol])
147+
data = (images, labels)
148+
blocks = Image{2}(), LabelMulti(unique(Iterators.flatten(labels)))
149+
if recipe.split
150+
idxs = 1:nobs(data)
151+
splits = df[:, recipe.splitcol]
152+
data = Dict(
153+
"train" => datasubset(data, idxs[splits]),
154+
"valid" => datasubset(data, idxs[(!).(splits)])
155+
)
156+
end
157+
return data, blocks
158+
end
159+
160+
recipeblocks(::Type{ImageTableMultiLabel}) = Tuple{Image{2}, LabelMulti}

src/datasets/transformations.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ struct MappedData{F, D}
66
data::D
77
end
88

9-
Base.show(io::IO, data::MappedData) = print(io, "mapobs($(data.f), $(data.data))")
9+
Base.show(io::IO, data::MappedData) = print(io, "mapobs($(data.f), $(summary(data.data)))")
1010
Base.show(io::IO, data::MappedData{F, <:AbstractArray}) where F = print(io, "mapobs($(data.f), $(ShowLimit(data.data, limit=80)))")
1111
LearnBase.nobs(data::MappedData) = nobs(data.data)
1212
LearnBase.getobs(data::MappedData, idx::Int) = data.f(getobs(data.data, idx))

test/datablock.jl

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,6 @@
1-
#include("imports.jl")
2-
using FastAI
3-
import FastAI: Block, Encoding, encode, decode, checkblock, encodedblock, decodedblock
4-
using FastAI: Label, LabelMulti, Mask, Image, ImageTensor, testencoding
5-
using FastAI: OneHot
6-
using Test
7-
using StaticArrays
8-
using Images
9-
using FastAI: grabbounds
10-
using Images
11-
12-
##
1+
include("imports.jl")
2+
3+
134
struct ABlock <: Block
145
end
156
checkblock(::ABlock, ::Int) = true
@@ -69,9 +60,7 @@ end
6960
testencoding(enc, block, image)
7061
@testset "randstate is shared" begin
7162
im1, im2 = encode(enc, Training(), (block, block), (image, image))
72-
im3 = encode(enc, Training(), block, image)
7363
@test im1 im2
74-
@test !(im1 == im3)
7564
end
7665

7766
@testset "don't transform data that doesn't need to be resized" begin

test/datasets/recipes.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,31 +15,31 @@ function testrecipe(recipe::Datasets.DatasetRecipe, data, blocks)
1515
end
1616

1717

18-
@testset ExtendedTestSet "ImageClassificationFolders" begin
18+
@testset ExtendedTestSet "ImageFolders" begin
1919
path = joinpath(datasetpath("mnist_var_size_tiny"), "train")
2020

2121
@testset ExtendedTestSet "Basic configuration" begin
22-
recipe = Datasets.ImageClassificationFolders()
22+
recipe = Datasets.ImageFolders()
2323
data, blocks = loadrecipe(recipe, path)
2424
testrecipe(recipe, data, blocks)
2525
@test blocks[1] isa Image
2626
@test blocks[2].classes == ["3", "7"]
2727
end
2828

2929
@testset ExtendedTestSet "Split configuration" begin
30-
recipe = Datasets.ImageClassificationFolders(split=true)
30+
recipe = Datasets.ImageFolders(split=true)
3131
data, blocks = loadrecipe(recipe, path)
3232
testrecipe(recipe, data["train"], blocks)
3333
end
3434

3535
@testset ExtendedTestSet "Error cases" begin
3636
@testset ExtendedTestSet "Empty directory" begin
37-
recipe = Datasets.ImageClassificationFolders(split=true)
37+
recipe = Datasets.ImageFolders(split=true)
3838
@test_throws ErrorException loadrecipe(recipe, mktempdir())
3939
end
4040

4141
@testset ExtendedTestSet "Only one label" begin
42-
recipe = Datasets.ImageClassificationFolders(labelfn=x -> "1")
42+
recipe = Datasets.ImageFolders(labelfn=x -> "1")
4343
@test_throws ErrorException loadrecipe(recipe, path)
4444
end
4545
end

test/datasets/registry.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ include("../imports.jl")
2323

2424
@testset ExtendedTestSet "registerrecipe!" begin
2525
@test_nowarn Datasets.registerrecipe!(
26-
reg, "mnist_var_size_tiny", Datasets.ImageClassificationFolders())
26+
reg, "mnist_var_size_tiny", Datasets.ImageFolders())
2727
end
2828

2929
@testset ExtendedTestSet "finddatasets" begin

test/fasterai.jl

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
include("imports.jl")
2+
3+
4+
5+
@testset ExtendedTestSet "FasterAI" begin
6+
@test length(listdatasources()) > 10
7+
8+
@test !isempty(finddatasets(blocks=(Image, Label)))
9+
@test !isempty(finddatasets(blocks=(Image, LabelMulti)))
10+
@test !isempty(finddatasets(blocks=(Image, Mask)))
11+
12+
@test ImageClassificationSingle findlearningmethods((Image, Label))
13+
@test ImageClassificationMulti findlearningmethods((Image, LabelMulti))
14+
end

test/imports.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
using Colors: RGB, N0f8, Gray
33
using FastAI
44
using FastAI: ParamGroups, IndexGrouper, getgroup, DiscriminativeLRs, decay_optim
5-
using FastAI: Image, Keypoints, Mask, testencoding, Label, OneHot, ProjectiveTransforms,
6-
encodedblock, decodedblock, encode, decode, mockblock
5+
import FastAI: Image, Keypoints, Mask, testencoding, Label, OneHot, ProjectiveTransforms,
6+
encodedblock, decodedblock, encode, decode, mockblock, checkblock, Block, Encoding
77
using FilePathsBase
88
using FastAI.Datasets
99
using DLPipelines

0 commit comments

Comments
 (0)