nickgnd · nickgnd · Jun 1, 2023 · Jun 2, 2023 · Jun 2, 2023 · Jun 2, 2023
diff --git a/02_first/supervised_pizzas.livemd b/02_first/supervised_pizzas.livemd
@@ -4,7 +4,9 @@
 Mix.install([
   {:vega_lite, "~> 0.1.6"},
   {:kino, "~> 0.8.1"},
-  {:kino_vega_lite, "~> 0.1.7"}
+  {:kino_vega_lite, "~> 0.1.7"},
+  {:explorer, "~> 0.5.6"},
+  {:kino_explorer, "~> 0.1.4"}
 ])
 ```
 
@@ -13,36 +15,26 @@ Mix.install([
 ### Read the data
 
 ```elixir
-file =
+data =
   __DIR__
   |> Path.join("pizza.txt")
   |> Path.expand()
-
-# Read the data from the file, remove the header and return
-# `[%{reservations: integer(), pizzas: integer()}]`
-data =
-  file
   |> File.read!()
-  |> String.split("\n", trim: true)
-  |> Enum.slice(1..-1)
-  |> Enum.map(&String.split(&1, ~r{\s+}, trim: true))
-  |> Enum.map(fn [r, p] ->
-    %{reservations: String.to_integer(r), pizzas: String.to_integer(p)}
-  end)
-
-Kino.DataTable.new(data)
+  # convert any two or more spaces into a comma
+  |> String.replace(~r/[[:blank:]]{2,}/, ",")
+  |> Explorer.DataFrame.load_csv!()
 ```
 
 ### Plot the data
 
-<!-- livebook:{"attrs":{"chart_title":null,"height":400,"layers":[{"chart_type":"point","color_field":null,"color_field_aggregate":null,"color_field_bin":false,"color_field_scale_scheme":null,"color_field_type":null,"data_variable":"data","x_field":"reservations","x_field_aggregate":null,"x_field_bin":false,"x_field_scale_type":null,"x_field_type":"quantitative","y_field":"pizzas","y_field_aggregate":null,"y_field_bin":false,"y_field_scale_type":null,"y_field_type":"quantitative"}],"vl_alias":"Elixir.VegaLite","width":600},"chunks":null,"kind":"Elixir.KinoVegaLite.ChartCell","livebook_object":"smart_cell"} -->
+<!-- livebook:{"attrs":{"chart_title":null,"height":400,"layers":[{"active":true,"chart_type":"point","color_field":null,"color_field_aggregate":null,"color_field_bin":false,"color_field_scale_scheme":null,"color_field_type":null,"data_variable":"data","geodata_color":"blue","latitude_field":null,"longitude_field":null,"x_field":"Reservations","x_field_aggregate":null,"x_field_bin":false,"x_field_scale_type":null,"x_field_type":"quantitative","y_field":"Pizzas","y_field_aggregate":null,"y_field_bin":false,"y_field_scale_type":null,"y_field_type":"quantitative"}],"vl_alias":"Elixir.VegaLite","width":600},"chunks":null,"kind":"Elixir.KinoVegaLite.ChartCell","livebook_object":"smart_cell"} -->
 
 ```elixir
 VegaLite.new(width: 600, height: 400)
-|> VegaLite.data_from_values(data, only: ["reservations", "pizzas"])
+|> VegaLite.data_from_values(data, only: ["Reservations", "Pizzas"])
 |> VegaLite.mark(:point)
-|> VegaLite.encode_field(:x, "reservations", type: :quantitative)
-|> VegaLite.encode_field(:y, "pizzas", type: :quantitative)
+|> VegaLite.encode_field(:x, "Reservations", type: :quantitative)
+|> VegaLite.encode_field(:y, "Pizzas", type: :quantitative)
 ```
 
 ## Tracing a Line
@@ -99,15 +91,6 @@ end
 
 ### Train the system
 
-```elixir
-# Transform the data to unpack the 2 columns `reservations` and
-# `pizzas` into separate arrays called x and y
-%{x: x, y: y} =
-  Enum.reduce(data, %{x: [], y: []}, fn item, %{x: x, y: y} ->
-    %{x: x ++ [item.reservations], y: y ++ [item.pizzas]}
-  end)
-```
-
 ```elixir
 iterations = Kino.Input.number("iterations", default: 10_000)
 ```
@@ -120,6 +103,10 @@ lr = Kino.Input.number("lr (learning rate)", default: 0.01)
 iterations = Kino.Input.read(iterations)
 lr = Kino.Input.read(lr)
 
+# Extract "Reservations" and "Pizzas" from the dataframe
+x = Explorer.Series.to_list(data["Reservations"])
+y = Explorer.Series.to_list(data["Pizzas"])
+
 w = C2.LinearRegression.train(x, y, iterations = iterations, lr = lr)
 ```
 
@@ -138,7 +125,7 @@ predictions =
   end)
 ```
 
-<!-- livebook:{"attrs":{"chart_title":null,"height":400,"layers":[{"chart_type":"point","color_field":null,"color_field_aggregate":null,"color_field_bin":false,"color_field_scale_scheme":null,"color_field_type":null,"data_variable":"data","x_field":"reservations","x_field_aggregate":null,"x_field_bin":false,"x_field_scale_type":null,"x_field_type":"quantitative","y_field":"pizzas","y_field_aggregate":null,"y_field_bin":false,"y_field_scale_type":null,"y_field_type":"quantitative"},{"chart_type":"line","color_field":null,"color_field_aggregate":null,"color_field_bin":false,"color_field_scale_scheme":null,"color_field_type":null,"data_variable":"predictions","x_field":"x","x_field_aggregate":null,"x_field_bin":false,"x_field_scale_type":null,"x_field_type":"quantitative","y_field":"prediction","y_field_aggregate":null,"y_field_bin":false,"y_field_scale_type":null,"y_field_type":"quantitative"}],"vl_alias":"Elixir.VegaLite","width":600},"chunks":null,"kind":"Elixir.KinoVegaLite.ChartCell","livebook_object":"smart_cell"} -->
+<!-- livebook:{"attrs":{"chart_title":null,"height":400,"layers":[{"active":true,"chart_type":"point","color_field":null,"color_field_aggregate":null,"color_field_bin":false,"color_field_scale_scheme":null,"color_field_type":null,"data_variable":"data","geodata_color":"blue","latitude_field":null,"longitude_field":null,"x_field":"reservations","x_field_aggregate":null,"x_field_bin":false,"x_field_scale_type":null,"x_field_type":"quantitative","y_field":"pizzas","y_field_aggregate":null,"y_field_bin":false,"y_field_scale_type":null,"y_field_type":"quantitative"},{"active":true,"chart_type":"line","color_field":null,"color_field_aggregate":null,"color_field_bin":false,"color_field_scale_scheme":null,"color_field_type":null,"data_variable":"predictions","geodata_color":"blue","latitude_field":null,"longitude_field":null,"x_field":"x","x_field_aggregate":null,"x_field_bin":false,"x_field_scale_type":null,"x_field_type":"quantitative","y_field":"prediction","y_field_aggregate":null,"y_field_bin":false,"y_field_scale_type":null,"y_field_type":"quantitative"}],"vl_alias":"Elixir.VegaLite","width":600},"chunks":null,"kind":"Elixir.KinoVegaLite.ChartCell","livebook_object":"smart_cell"} -->
 
 ```elixir
 VegaLite.new(width: 600, height: 400)
@@ -250,7 +237,7 @@ predictions =
   end)
 ```
 
-<!-- livebook:{"attrs":{"chart_title":null,"height":400,"layers":[{"chart_type":"point","color_field":null,"color_field_aggregate":null,"color_field_bin":false,"color_field_scale_scheme":null,"color_field_type":null,"data_variable":"data","x_field":"reservations","x_field_aggregate":null,"x_field_bin":false,"x_field_scale_type":null,"x_field_type":"quantitative","y_field":"pizzas","y_field_aggregate":null,"y_field_bin":false,"y_field_scale_type":null,"y_field_type":"quantitative"},{"chart_type":"line","color_field":null,"color_field_aggregate":null,"color_field_bin":false,"color_field_scale_scheme":null,"color_field_type":null,"data_variable":"predictions","x_field":"x","x_field_aggregate":null,"x_field_bin":false,"x_field_scale_type":null,"x_field_type":"quantitative","y_field":"prediction","y_field_aggregate":null,"y_field_bin":false,"y_field_scale_type":null,"y_field_type":"quantitative"}],"vl_alias":"Elixir.VegaLite","width":600},"chunks":null,"kind":"Elixir.KinoVegaLite.ChartCell","livebook_object":"smart_cell"} -->
+<!-- livebook:{"attrs":{"chart_title":null,"height":400,"layers":[{"active":true,"chart_type":"point","color_field":null,"color_field_aggregate":null,"color_field_bin":false,"color_field_scale_scheme":null,"color_field_type":null,"data_variable":"data","geodata_color":"blue","latitude_field":null,"longitude_field":null,"x_field":"reservations","x_field_aggregate":null,"x_field_bin":false,"x_field_scale_type":null,"x_field_type":"quantitative","y_field":"pizzas","y_field_aggregate":null,"y_field_bin":false,"y_field_scale_type":null,"y_field_type":"quantitative"},{"active":true,"chart_type":"line","color_field":null,"color_field_aggregate":null,"color_field_bin":false,"color_field_scale_scheme":null,"color_field_type":null,"data_variable":"predictions","geodata_color":"blue","latitude_field":null,"longitude_field":null,"x_field":"x","x_field_aggregate":null,"x_field_bin":false,"x_field_scale_type":null,"x_field_type":"quantitative","y_field":"prediction","y_field_aggregate":null,"y_field_bin":false,"y_field_scale_type":null,"y_field_type":"quantitative"}],"vl_alias":"Elixir.VegaLite","width":600},"chunks":null,"kind":"Elixir.KinoVegaLite.ChartCell","livebook_object":"smart_cell"} -->
 
 ```elixir
 VegaLite.new(width: 600, height: 400)

diff --git a/03_gradient/gradient_descend.livemd b/03_gradient/gradient_descend.livemd
@@ -4,29 +4,23 @@
 Mix.install([
   {:vega_lite, "~> 0.1.6"},
   {:kino, "~> 0.8.1"},
-  {:kino_vega_lite, "~> 0.1.7"}
+  {:kino_vega_lite, "~> 0.1.7"},
+  {:explorer, "~> 0.5.6"},
+  {:kino_explorer, "~> 0.1.4"}
 ])
 ```
 
 ## Read the data
 
 ```elixir
-file =
+data =
   __DIR__
   |> Path.join("pizza.txt")
   |> Path.expand()
-
-# Read the data from the file, remove the header and return
-# `[%{reservations: integer(), pizzas: integer()}]`
-data =
-  file
   |> File.read!()
-  |> String.split("\n", trim: true)
-  |> Enum.slice(1..-1)
-  |> Enum.map(&String.split(&1, ~r{\s+}, trim: true))
-  |> Enum.map(fn [r, p] -> %{reservations: String.to_integer(r), pizzas: String.to_integer(p)} end)
-
-Kino.DataTable.new(data)
+  # convert any two or more spaces into a comma
+  |> String.replace(~r/[[:blank:]]{2,}/, ",")
+  |> Explorer.DataFrame.load_csv!()
 ```
 
 ## Linear regression with bias
@@ -88,12 +82,9 @@ end
 ### Plot the loss curve
 
 ```elixir
-# Transform the data to unpack the 2 columns `reservations` and
-# `pizzas` into separate arrays called x and y
-%{x: x, y: y} =
-  Enum.reduce(data, %{x: [], y: []}, fn item, %{x: x, y: y} ->
-    %{x: x ++ [item.reservations], y: y ++ [item.pizzas]}
-  end)
+# Extract "Reservations" and "Pizzas" from the dataframe
+x = Explorer.Series.to_list(data["Reservations"])
+y = Explorer.Series.to_list(data["Pizzas"])
 ```
 
 ```elixir

diff --git a/04_hyperspace/multiple_regression.livemd b/04_hyperspace/multiple_regression.livemd
@@ -7,7 +7,9 @@ Mix.install(
     {:nx, "~> 0.5"},
     {:vega_lite, "~> 0.1.6"},
     {:kino, "~> 0.8.1"},
-    {:kino_vega_lite, "~> 0.1.7"}
+    {:kino_vega_lite, "~> 0.1.7"},
+    {:explorer, "~> 0.5.6"},
+    {:kino_explorer, "~> 0.1.4"}
   ],
   config: [nx: [default_backend: EXLA.Backend]]
 )
@@ -18,43 +20,23 @@ Mix.install(
 ### Preparing Data
 
 ```elixir
-file =
+data =
   __DIR__
   |> Path.join("pizza_3_vars.txt")
   |> Path.expand()
+  |> File.read!()
+  # convert any two or more spaces into a comma
+  |> String.replace(~r/[[:blank:]]{2,}/, ",")
+  |> Explorer.DataFrame.load_csv!()
+```
 
-# Read the data from the file, remove the header and return
-# `[%{reservations: integer(), temperature: integer(), tourists: integer(), pizzas: integer()}]`
-data =
-  File.read!(file)
-  |> String.split("\n", trim: true)
-  |> Enum.slice(1..-1)
-  |> Enum.map(&String.split(&1, ~r{\s+}, trim: true))
-  |> Enum.map(fn [r, temp, tour, p] ->
-    %{
-      reservations: String.to_integer(r),
-      temperature: String.to_integer(temp),
-      tourists: String.to_integer(tour),
-      pizzas: String.to_integer(p)
-    }
-  end)
-
-Kino.DataTable.new(data, keys: [:reservations, :temperature, :tourists, :pizzas])
-```
-
-```elixir
-# Transform the data to unpack the 4 columns `reservations`,
-# `temperature`, `tourists` and `pizzas` into separate arrays
-# called x1, x2, x3 and y
-%{x1: x1, x2: x2, x3: x3, y: y} =
-  Enum.reduce(data, %{x1: [], x2: [], x3: [], y: []}, fn item, %{x1: x1, x2: x2, x3: x3, y: y} ->
-    %{
-      x1: x1 ++ [item.reservations],
-      x2: x2 ++ [item.temperature],
-      x3: x3 ++ [item.tourists],
-      y: y ++ [item.pizzas]
-    }
-  end)
+```elixir
+# Extract "Reservations", "Temperature", "Tourists" and "Pizzas"
+# respectively as x1, x2, x3, y from the dataframe
+x1 = Explorer.Series.to_list(data["Reservations"])
+x2 = Explorer.Series.to_list(data["Temperature"])
+x3 = Explorer.Series.to_list(data["Tourists"])
+y = Explorer.Series.to_list(data["Pizzas"])
 ```
 
 ### Let's build the matrix x for input variables

diff --git a/05_discerning/classifier.livemd b/05_discerning/classifier.livemd
@@ -7,7 +7,9 @@ Mix.install(
     {:nx, "~> 0.5"},
     {:vega_lite, "~> 0.1.6"},
     {:kino, "~> 0.8.1"},
-    {:kino_vega_lite, "~> 0.1.7"}
+    {:kino_vega_lite, "~> 0.1.7"},
+    {:explorer, "~> 0.5.6"},
+    {:kino_explorer, "~> 0.1.4"}
   ],
   config: [nx: [default_backend: EXLA.Backend]]
 )
@@ -161,58 +163,35 @@ end
 ## Read the data
 
 ```elixir
-file =
+data =
   __DIR__
   |> Path.join("police.txt")
   |> Path.expand()
-
-# Read the data from the file, remove the header and return
-# `[%{reservations: integer(), temperature: integer(), tourists: integer(), police: integer()}]`
-data =
-  File.read!(file)
-  |> String.split("\n", trim: true)
-  |> Enum.slice(1..-1)
-  |> Enum.map(&String.split(&1, ~r{\s+}, trim: true))
-  |> Enum.map(fn [r, temp, tour, p] ->
-    %{
-      reservations: String.to_integer(r),
-      temperature: String.to_integer(temp),
-      tourists: String.to_integer(tour),
-      police: String.to_integer(p)
-    }
-  end)
-
-Kino.DataTable.new(data, keys: [:reservations, :temperature, :tourists, :police])
+  |> File.read!()
+  # convert any two or more spaces into a comma
+  |> String.replace(~r/[[:blank:]]{2,}/, ",")
+  |> Explorer.DataFrame.load_csv!()
 ```
 
 ### Prepare the data
 
 ```elixir
-# Transform the data to unpack the 4 columns `reservations`,
-# `temperature`, `tourists` and `police` into separate arrays
-# called x1, x2, x3 and y
-%{x1: x1, x2: x2, x3: x3, y: y} =
-  Enum.reduce(data, %{x1: [], x2: [], x3: [], y: []}, fn item, %{x1: x1, x2: x2, x3: x3, y: y} ->
-    %{
-      x1: x1 ++ [item.reservations],
-      x2: x2 ++ [item.temperature],
-      x3: x3 ++ [item.tourists],
-      y: y ++ [item.police]
-    }
-  end)
-```
+inputs =
+  data
+  |> Explorer.DataFrame.select(["Reservations", "Temperature", "Tourists"])
+  |> Nx.stack(axis: 1)
 
-```elixir
-# bias
-x0 = List.duplicate(1, length(x1))
+bias = Nx.broadcast(1, {elem(Nx.shape(inputs), 0), 1})
 
-x =
-  [x0, x1, x2, x3]
-  |> Nx.tensor()
-  |> Nx.transpose()
+# append the bias to the inputs
+x = Nx.concatenate([bias, inputs], axis: 1)
 
-# Same of `y.reshape(-1, 1)` used in the book
-y = Nx.tensor(y) |> Nx.reshape({:auto, 1})
+y =
+  data["Police"]
+  |> Explorer.Series.to_list()
+  |> Nx.tensor()
+  # Same of `y.reshape(-1, 1)` used in the book
+  |> Nx.reshape({:auto, 1})
 ```
 
 ### Our new model
@@ -222,18 +201,22 @@ Plot of the `forward()` function.
 ```elixir
 alias VegaLite, as: Vl
 
-reservations_tensor = Nx.tensor([x0, x1]) |> Nx.transpose()
+# bias and reservations
+reservations_tensor_with_bias = x[[.., 0..1]]
 
 # It can take a bit of time
-weight = C5.Classifier.train(reservations_tensor, y, iterations = 1_000_000, lr = 0.01)
+weight = C5.Classifier.train(reservations_tensor_with_bias, y, iterations = 1_000_000, lr = 0.01)
 
-predictions = C5.Classifier.forward(reservations_tensor, weight)
-rounded_predictions = C5.Classifier.classify(reservations_tensor, weight)
+predictions = C5.Classifier.forward(reservations_tensor_with_bias, weight)
+rounded_predictions = C5.Classifier.classify(reservations_tensor_with_bias, weight)
 
 :ok
 ```
 
 ```elixir
+# extract the reservations from the tensor
+x1 = x[[.., 1]] |> Nx.to_list()
+
 Vl.new(width: 600, height: 400, title: "Model - forward()")
 |> Vl.layers([
   Vl.new()