Skip to content

Commit 7a94a0f

Browse files
committed
Lecture 10: Scripts
1 parent e2dd221 commit 7a94a0f

File tree

5 files changed

+266
-47
lines changed

5 files changed

+266
-47
lines changed

scripts/lecture_10/Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
44
ImageInspector = "b0ce21f1-0238-464b-b95f-8a4068743199"
55
MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
66
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
7+
RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b"
78
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
89
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
910

@@ -12,4 +13,5 @@ BSON = "= 0.2.6"
1213
Flux = "= 0.11.6"
1314
MLDatasets = "= 0.5.6"
1415
Plots = "= 1.10.3"
16+
RDatasets = "= 0.7.4"
1517
julia = "1.5"

scripts/lecture_10/script.jl

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
using Base.Iterators: partition
2+
using Random
3+
using BSON
4+
using Statistics
5+
using MLDatasets
6+
using Flux
7+
using Flux: onehotbatch, onecold, crossentropy
8+
using Flux.Data: DataLoader
9+
using Plots
10+
using ImageInspector
11+
import RDatasets: dataset
12+
13+
# Introduction to Flux
14+
15+
include("utilities.jl")
16+
17+
Random.seed!(666)
18+
19+
iris = dataset("datasets", "iris")
20+
21+
X = Matrix(iris[:, 1:4])
22+
y = iris.Species
23+
24+
X_train, y_train, X_test, y_test, classes = prepare_data(X', y; dims=2)
25+
26+
n_hidden = 5
27+
m = Chain(
28+
Dense(size(X_train,1), n_hidden, relu),
29+
Dense(n_hidden, size(y_train,1), identity),
30+
softmax,
31+
)
32+
33+
m(X_train)
34+
35+
params(m[2])[2] .= [-1;0;1]
36+
37+
L(x,y) = crossentropy(m(x), y)
38+
39+
L(X_train, y_train)
40+
41+
ps = params(m)
42+
grad = gradient(() -> L(X_train, y_train), ps)
43+
44+
grad = gradient(() -> L(X_train, y_train), params(X_train))
45+
46+
size(grad[X_train])
47+
48+
opt = Descent(0.1)
49+
max_iter = 250
50+
51+
acc_test = zeros(max_iter)
52+
for i in 1:max_iter
53+
gs = gradient(() -> L(X_train, y_train), ps)
54+
Flux.Optimise.update!(opt, ps, gs)
55+
acc_test[i] = accuracy(X_test, y_test)
56+
end
57+
58+
plot(acc_test, xlabel="Iteration", ylabel="Test accuracy", label="", ylim=(-0.01,1.01))
59+
60+
# Loading data
61+
62+
T = Float32
63+
X_train, y_train = MLDatasets.MNIST.traindata(T)
64+
X_test, y_test = MLDatasets.MNIST.testdata(T)
65+
66+
# Exercise
67+
68+
69+
70+
# Exercise
71+
72+
73+
74+
# Loading data
75+
76+
function load_data(dataset; T=Float32, onehot=false, classes=0:9)
77+
X_train, y_train = dataset.traindata(T)
78+
X_test, y_test = dataset.testdata(T)
79+
80+
X_train = reshape_data(X_train)
81+
X_test = reshape_data(X_test)
82+
83+
if onehot
84+
y_train = onehotbatch(y_train, classes)
85+
y_test = onehotbatch(y_test, classes)
86+
end
87+
88+
return X_train, y_train, X_test, y_test
89+
end
90+
91+
X_train, y_train, X_test, y_test = load_data(MLDatasets.MNIST; T=T, onehot=true)
92+
93+
# Exercise
94+
95+
96+
97+
# Exercise
98+
99+
100+
101+
# Bonus
102+
103+
batches = map(partition(randperm(size(y, 2)), batchsize)) do inds
104+
return (X[:, :, :, inds], y[:, inds])
105+
end
106+
107+
[(X[:, :, :, inds], y[:, inds]) for inds in partition(randperm(size(y, 2)), batchsize)]
108+
109+
# Define model
110+
111+
Random.seed!(666)
112+
m = Chain(
113+
Conv((2,2), 1=>16, relu),
114+
MaxPool((2,2)),
115+
Conv((2,2), 16=>8, relu),
116+
MaxPool((2,2)),
117+
flatten,
118+
Dense(288, size(y_train,1)),
119+
softmax,
120+
)
121+
122+
L(X, y) = crossentropy(m(X), y)
123+
124+
# Train model
125+
126+
function train_model!(m, L, X, y;
127+
opt = Descent(0.1),
128+
batchsize = 128,
129+
n_epochs = 10,
130+
file_name = "")
131+
132+
batches = DataLoader((X, y); batchsize, shuffle = true)
133+
134+
for _ in 1:n_epochs
135+
Flux.train!(L, params(m), batches, opt)
136+
end
137+
138+
!isempty(file_name) && BSON.bson(file_name, m=m)
139+
140+
return
141+
end
142+
143+
# Exercise
144+
145+
146+
147+
# Exercise
148+
149+
150+

scripts/lecture_10/script_init.jl

Lines changed: 14 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -12,37 +12,22 @@ using Flux: onehotbatch, onecold, crossentropy
1212
using Flux.Data: DataLoader
1313
using Plots
1414
using ImageInspector
15+
import RDatasets: dataset
1516

16-
function reshape_data(X::AbstractArray{<:Real, 3})
17-
s = size(X)
18-
return reshape(X, s[1], s[2], 1, s[3])
19-
end
17+
plot(1:10)
2018

21-
reshape_data(X::AbstractArray{<:Real, 4}) = X
22-
23-
function load_data(dataset; T=Float32, onehot=false, classes=0:9)
24-
X_train, y_train = dataset.traindata(T)
25-
X_test, y_test = dataset.testdata(T)
26-
27-
X_train = reshape_data(X_train)
28-
X_test = reshape_data(X_test)
29-
30-
if onehot
31-
y_train = onehotbatch(y_train, classes)
32-
y_test = onehotbatch(y_test, classes)
33-
end
34-
35-
return X_train, y_train, X_test, y_test
36-
end
19+
iris = dataset("datasets", "iris")
3720

3821
T = Float32
39-
X_train, y_train, X_test, y_test = load_data(MLDatasets.MNIST; T=T, onehot=true);
40-
load_data(MLDatasets.CIFAR10; T=T, onehot=true);
22+
X_train, y_train = MLDatasets.MNIST.traindata(T)
23+
X_train = reshape(X_train, size(X_train,1), size(X_train,2), 1, size(X_train,3))
24+
y_train = onehotbatch(y_train, 0:9)
25+
MLDatasets.MNIST.testdata(T)
26+
MLDatasets.CIFAR10.traindata(T)
4127

42-
inds = findall(y_train .== 0)[1:15]
43-
imageplot(1 .- X_train, inds; nrows=3, size=(800,480))
28+
imageplot(1 .- X_train, 1:3; nrows=1, size=(800,480))
4429

45-
m = Chain(
30+
m_aux = Chain(
4631
Conv((2,2), 1=>16, relu),
4732
MaxPool((2,2)),
4833
Conv((2,2), 16=>8, relu),
@@ -52,27 +37,10 @@ m = Chain(
5237
softmax,
5338
)
5439

55-
L(X, y) = crossentropy(m(X), y)
56-
57-
function train_model!(m, L, X, y;
58-
opt = Descent(0.1),
59-
batchsize = 128,
60-
n_epochs = 10,
61-
file_name = "")
62-
63-
batches = DataLoader((X, y); batchsize, shuffle = true)
64-
65-
for _ in 1:n_epochs
66-
Flux.train!(L, params(m), batches, opt)
67-
end
68-
69-
!isempty(file_name) && BSON.bson(file_name, m=m)
70-
71-
return
72-
end
40+
L_aux(X, y) = crossentropy(m_aux(X), y)
7341

74-
train_model!(m, L, X_train, y_train; n_epochs=1)
42+
batches_aux = DataLoader((X_train, y_train); batchsize=64, shuffle = true)
7543

76-
accuracy(x, y) = mean(onecold(m(x)) .== onecold(y))
44+
gradient(() -> L_aux(X_train[:,:,:,1:10], y_train[:,1:10]), params(m_aux))
7745

78-
accuracy(X_test, y_test)
46+
onecold(m_aux(X_train[:,:,:,1:10]))

scripts/lecture_10/script_sol.jl

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,56 @@ using Flux: onehotbatch, onecold, crossentropy
88
using Flux.Data: DataLoader
99
using Plots
1010
using ImageInspector
11+
import RDatasets: dataset
12+
13+
# Introduction to Flux
14+
15+
include("utilities.jl")
16+
17+
Random.seed!(666)
18+
19+
iris = dataset("datasets", "iris")
20+
21+
X = Matrix(iris[:, 1:4])
22+
y = iris.Species
23+
24+
X_train, y_train, X_test, y_test, classes = prepare_data(X', y; dims=2)
25+
26+
n_hidden = 5
27+
m = Chain(
28+
Dense(size(X_train,1), n_hidden, relu),
29+
Dense(n_hidden, size(y_train,1), identity),
30+
softmax,
31+
)
32+
33+
m(X_train)
34+
35+
params(m[2])[2] .= [-1;0;1]
36+
37+
L(x,y) = crossentropy(m(x), y)
38+
39+
L(X_train, y_train)
40+
41+
ps = params(m)
42+
grad = gradient(() -> L(X_train, y_train), ps)
43+
44+
grad = gradient(() -> L(X_train, y_train), params(X_train))
45+
46+
size(grad[X_train])
47+
48+
opt = Descent(0.1)
49+
max_iter = 250
50+
51+
acc_test = zeros(max_iter)
52+
for i in 1:max_iter
53+
gs = gradient(() -> L(X_train, y_train), ps)
54+
Flux.Optimise.update!(opt, ps, gs)
55+
acc_test[i] = accuracy(X_test, y_test)
56+
end
57+
58+
plot(acc_test, xlabel="Iteration", ylabel="Test accuracy", label="", ylim=(-0.01,1.01))
59+
60+
# Loading data
1161

1262
T = Float32
1363
X_train, y_train = MLDatasets.MNIST.traindata(T)
@@ -66,7 +116,7 @@ batches = DataLoader((X_train, y_train); batchsize, shuffle = true)
66116

67117
# Bonus
68118

69-
batches = map(partition(randperm(size(y, 2)), batchsize)) do inds
119+
map(partition(randperm(size(y, 2)), batchsize)) do inds
70120
return (X[:, :, :, inds], y[:, inds])
71121
end
72122

scripts/lecture_10/utilities.jl

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
using Statistics
2+
3+
function split(X, y::AbstractVector; dims=1, ratio_train=0.8, kwargs...)
4+
n = length(y)
5+
size(X, dims) == n || throw(DimensionMismatch("..."))
6+
7+
n_train = round(Int, ratio_train*n)
8+
i_rand = randperm(n)
9+
i_train = i_rand[1:n_train]
10+
i_test = i_rand[n_train+1:end]
11+
12+
return selectdim(X, dims, i_train), y[i_train], selectdim(X, dims, i_test), y[i_test]
13+
end
14+
15+
function normalize(X_train, X_test; dims=1, kwargs...)
16+
col_mean = mean(X_train; dims)
17+
col_std = std(X_train; dims)
18+
19+
return (X_train .- col_mean) ./ col_std, (X_test .- col_mean) ./ col_std
20+
end
21+
22+
function prepare_data(X, y; do_normal=true, do_onehot=true, kwargs...)
23+
X_train, y_train, X_test, y_test = split(X, y; kwargs...)
24+
25+
if do_normal
26+
X_train, X_test = normalize(X_train, X_test; kwargs...)
27+
end
28+
29+
classes = unique(y)
30+
31+
if do_onehot
32+
y_train = onehot(y_train, classes)
33+
y_test = onehot(y_test, classes)
34+
end
35+
36+
return X_train, y_train, X_test, y_test, classes
37+
end
38+
39+
function onehot(y, classes)
40+
y_onehot = falses(length(classes), length(y))
41+
for (i, class) in enumerate(classes)
42+
y_onehot[i, y .== class] .= 1
43+
end
44+
return y_onehot
45+
end
46+
47+
# onecold(y, classes) = [classes[argmax(y_col)] for y_col in eachcol(y)]
48+
49+
accuracy(x, y) = mean(onecold(m(x), classes) .== onecold(y, classes))

0 commit comments

Comments
 (0)