diff --git a/02_first/supervised_pizzas.livemd b/02_first/supervised_pizzas.livemd index 9f45945..d2f05f5 100644 --- a/02_first/supervised_pizzas.livemd +++ b/02_first/supervised_pizzas.livemd @@ -4,7 +4,9 @@ Mix.install([ {:vega_lite, "~> 0.1.6"}, {:kino, "~> 0.8.1"}, - {:kino_vega_lite, "~> 0.1.7"} + {:kino_vega_lite, "~> 0.1.7"}, + {:explorer, "~> 0.5.6"}, + {:kino_explorer, "~> 0.1.4"} ]) ``` @@ -13,36 +15,26 @@ Mix.install([ ### Read the data ```elixir -file = +data = __DIR__ |> Path.join("pizza.txt") |> Path.expand() - -# Read the data from the file, remove the header and return -# `[%{reservations: integer(), pizzas: integer()}]` -data = - file |> File.read!() - |> String.split("\n", trim: true) - |> Enum.slice(1..-1) - |> Enum.map(&String.split(&1, ~r{\s+}, trim: true)) - |> Enum.map(fn [r, p] -> - %{reservations: String.to_integer(r), pizzas: String.to_integer(p)} - end) - -Kino.DataTable.new(data) + # convert any two or more spaces into a comma + |> String.replace(~r/[[:blank:]]{2,}/, ",") + |> Explorer.DataFrame.load_csv!() ``` ### Plot the data - + ```elixir VegaLite.new(width: 600, height: 400) -|> VegaLite.data_from_values(data, only: ["reservations", "pizzas"]) +|> VegaLite.data_from_values(data, only: ["Reservations", "Pizzas"]) |> VegaLite.mark(:point) -|> VegaLite.encode_field(:x, "reservations", type: :quantitative) -|> VegaLite.encode_field(:y, "pizzas", type: :quantitative) +|> VegaLite.encode_field(:x, "Reservations", type: :quantitative) +|> VegaLite.encode_field(:y, "Pizzas", type: :quantitative) ``` ## Tracing a Line @@ -99,15 +91,6 @@ end ### Train the system -```elixir -# Transform the data to unpack the 2 columns `reservations` and -# `pizzas` into separate arrays called x and y -%{x: x, y: y} = - Enum.reduce(data, %{x: [], y: []}, fn item, %{x: x, y: y} -> - %{x: x ++ [item.reservations], y: y ++ [item.pizzas]} - end) -``` - ```elixir iterations = Kino.Input.number("iterations", default: 10_000) ``` @@ -120,6 +103,10 @@ lr = Kino.Input.number("lr (learning rate)", default: 0.01) iterations = Kino.Input.read(iterations) lr = Kino.Input.read(lr) +# Extract "Reservations" and "Pizzas" from the dataframe +x = Explorer.Series.to_list(data["Reservations"]) +y = Explorer.Series.to_list(data["Pizzas"]) + w = C2.LinearRegression.train(x, y, iterations = iterations, lr = lr) ``` @@ -138,7 +125,7 @@ predictions = end) ``` - + ```elixir VegaLite.new(width: 600, height: 400) @@ -250,7 +237,7 @@ predictions = end) ``` - + ```elixir VegaLite.new(width: 600, height: 400) diff --git a/03_gradient/gradient_descend.livemd b/03_gradient/gradient_descend.livemd index 51048b4..09f106b 100644 --- a/03_gradient/gradient_descend.livemd +++ b/03_gradient/gradient_descend.livemd @@ -4,29 +4,23 @@ Mix.install([ {:vega_lite, "~> 0.1.6"}, {:kino, "~> 0.8.1"}, - {:kino_vega_lite, "~> 0.1.7"} + {:kino_vega_lite, "~> 0.1.7"}, + {:explorer, "~> 0.5.6"}, + {:kino_explorer, "~> 0.1.4"} ]) ``` ## Read the data ```elixir -file = +data = __DIR__ |> Path.join("pizza.txt") |> Path.expand() - -# Read the data from the file, remove the header and return -# `[%{reservations: integer(), pizzas: integer()}]` -data = - file |> File.read!() - |> String.split("\n", trim: true) - |> Enum.slice(1..-1) - |> Enum.map(&String.split(&1, ~r{\s+}, trim: true)) - |> Enum.map(fn [r, p] -> %{reservations: String.to_integer(r), pizzas: String.to_integer(p)} end) - -Kino.DataTable.new(data) + # convert any two or more spaces into a comma + |> String.replace(~r/[[:blank:]]{2,}/, ",") + |> Explorer.DataFrame.load_csv!() ``` ## Linear regression with bias @@ -88,12 +82,9 @@ end ### Plot the loss curve ```elixir -# Transform the data to unpack the 2 columns `reservations` and -# `pizzas` into separate arrays called x and y -%{x: x, y: y} = - Enum.reduce(data, %{x: [], y: []}, fn item, %{x: x, y: y} -> - %{x: x ++ [item.reservations], y: y ++ [item.pizzas]} - end) +# Extract "Reservations" and "Pizzas" from the dataframe +x = Explorer.Series.to_list(data["Reservations"]) +y = Explorer.Series.to_list(data["Pizzas"]) ``` ```elixir diff --git a/04_hyperspace/multiple_regression.livemd b/04_hyperspace/multiple_regression.livemd index a5e4550..fa6a064 100644 --- a/04_hyperspace/multiple_regression.livemd +++ b/04_hyperspace/multiple_regression.livemd @@ -7,7 +7,9 @@ Mix.install( {:nx, "~> 0.5"}, {:vega_lite, "~> 0.1.6"}, {:kino, "~> 0.8.1"}, - {:kino_vega_lite, "~> 0.1.7"} + {:kino_vega_lite, "~> 0.1.7"}, + {:explorer, "~> 0.5.6"}, + {:kino_explorer, "~> 0.1.4"} ], config: [nx: [default_backend: EXLA.Backend]] ) @@ -18,43 +20,23 @@ Mix.install( ### Preparing Data ```elixir -file = +data = __DIR__ |> Path.join("pizza_3_vars.txt") |> Path.expand() + |> File.read!() + # convert any two or more spaces into a comma + |> String.replace(~r/[[:blank:]]{2,}/, ",") + |> Explorer.DataFrame.load_csv!() +``` -# Read the data from the file, remove the header and return -# `[%{reservations: integer(), temperature: integer(), tourists: integer(), pizzas: integer()}]` -data = - File.read!(file) - |> String.split("\n", trim: true) - |> Enum.slice(1..-1) - |> Enum.map(&String.split(&1, ~r{\s+}, trim: true)) - |> Enum.map(fn [r, temp, tour, p] -> - %{ - reservations: String.to_integer(r), - temperature: String.to_integer(temp), - tourists: String.to_integer(tour), - pizzas: String.to_integer(p) - } - end) - -Kino.DataTable.new(data, keys: [:reservations, :temperature, :tourists, :pizzas]) -``` - -```elixir -# Transform the data to unpack the 4 columns `reservations`, -# `temperature`, `tourists` and `pizzas` into separate arrays -# called x1, x2, x3 and y -%{x1: x1, x2: x2, x3: x3, y: y} = - Enum.reduce(data, %{x1: [], x2: [], x3: [], y: []}, fn item, %{x1: x1, x2: x2, x3: x3, y: y} -> - %{ - x1: x1 ++ [item.reservations], - x2: x2 ++ [item.temperature], - x3: x3 ++ [item.tourists], - y: y ++ [item.pizzas] - } - end) +```elixir +# Extract "Reservations", "Temperature", "Tourists" and "Pizzas" +# respectively as x1, x2, x3, y from the dataframe +x1 = Explorer.Series.to_list(data["Reservations"]) +x2 = Explorer.Series.to_list(data["Temperature"]) +x3 = Explorer.Series.to_list(data["Tourists"]) +y = Explorer.Series.to_list(data["Pizzas"]) ``` ### Let's build the matrix x for input variables diff --git a/05_discerning/classifier.livemd b/05_discerning/classifier.livemd index 746e6a4..d8f90a2 100644 --- a/05_discerning/classifier.livemd +++ b/05_discerning/classifier.livemd @@ -7,7 +7,9 @@ Mix.install( {:nx, "~> 0.5"}, {:vega_lite, "~> 0.1.6"}, {:kino, "~> 0.8.1"}, - {:kino_vega_lite, "~> 0.1.7"} + {:kino_vega_lite, "~> 0.1.7"}, + {:explorer, "~> 0.5.6"}, + {:kino_explorer, "~> 0.1.4"} ], config: [nx: [default_backend: EXLA.Backend]] ) @@ -161,58 +163,35 @@ end ## Read the data ```elixir -file = +data = __DIR__ |> Path.join("police.txt") |> Path.expand() - -# Read the data from the file, remove the header and return -# `[%{reservations: integer(), temperature: integer(), tourists: integer(), police: integer()}]` -data = - File.read!(file) - |> String.split("\n", trim: true) - |> Enum.slice(1..-1) - |> Enum.map(&String.split(&1, ~r{\s+}, trim: true)) - |> Enum.map(fn [r, temp, tour, p] -> - %{ - reservations: String.to_integer(r), - temperature: String.to_integer(temp), - tourists: String.to_integer(tour), - police: String.to_integer(p) - } - end) - -Kino.DataTable.new(data, keys: [:reservations, :temperature, :tourists, :police]) + |> File.read!() + # convert any two or more spaces into a comma + |> String.replace(~r/[[:blank:]]{2,}/, ",") + |> Explorer.DataFrame.load_csv!() ``` ### Prepare the data ```elixir -# Transform the data to unpack the 4 columns `reservations`, -# `temperature`, `tourists` and `police` into separate arrays -# called x1, x2, x3 and y -%{x1: x1, x2: x2, x3: x3, y: y} = - Enum.reduce(data, %{x1: [], x2: [], x3: [], y: []}, fn item, %{x1: x1, x2: x2, x3: x3, y: y} -> - %{ - x1: x1 ++ [item.reservations], - x2: x2 ++ [item.temperature], - x3: x3 ++ [item.tourists], - y: y ++ [item.police] - } - end) -``` +inputs = + data + |> Explorer.DataFrame.select(["Reservations", "Temperature", "Tourists"]) + |> Nx.stack(axis: 1) -```elixir -# bias -x0 = List.duplicate(1, length(x1)) +bias = Nx.broadcast(1, {elem(Nx.shape(inputs), 0), 1}) -x = - [x0, x1, x2, x3] - |> Nx.tensor() - |> Nx.transpose() +# append the bias to the inputs +x = Nx.concatenate([bias, inputs], axis: 1) -# Same of `y.reshape(-1, 1)` used in the book -y = Nx.tensor(y) |> Nx.reshape({:auto, 1}) +y = + data["Police"] + |> Explorer.Series.to_list() + |> Nx.tensor() + # Same of `y.reshape(-1, 1)` used in the book + |> Nx.reshape({:auto, 1}) ``` ### Our new model @@ -222,18 +201,22 @@ Plot of the `forward()` function. ```elixir alias VegaLite, as: Vl -reservations_tensor = Nx.tensor([x0, x1]) |> Nx.transpose() +# bias and reservations +reservations_tensor_with_bias = x[[.., 0..1]] # It can take a bit of time -weight = C5.Classifier.train(reservations_tensor, y, iterations = 1_000_000, lr = 0.01) +weight = C5.Classifier.train(reservations_tensor_with_bias, y, iterations = 1_000_000, lr = 0.01) -predictions = C5.Classifier.forward(reservations_tensor, weight) -rounded_predictions = C5.Classifier.classify(reservations_tensor, weight) +predictions = C5.Classifier.forward(reservations_tensor_with_bias, weight) +rounded_predictions = C5.Classifier.classify(reservations_tensor_with_bias, weight) :ok ``` ```elixir +# extract the reservations from the tensor +x1 = x[[.., 1]] |> Nx.to_list() + Vl.new(width: 600, height: 400, title: "Model - forward()") |> Vl.layers([ Vl.new()