Skip to content

Commit 180aa1b

Browse files
committed
generate datasets
1 parent 0d7b1a2 commit 180aa1b

File tree

4 files changed

+102
-15
lines changed

4 files changed

+102
-15
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Data Space
22

3-
**Machine learning in data space web demo.**
3+
**Machine learning in data space web demo. Go to [dataspace.bengfort.com](http://dataspace.bengfort.com) for the live version.**
44

55
In the tradition of Tkinter SVM GUI, the purpose of this app is to demonstrate how machine learning model forms are affected by the shape of the underlying dataset. By selecting a dataset or by creating one of your own, you can fit a model to the data and see how the model would make decisions based on the data it has been trained on. Although this is a toy example, hopefully it helps give you the intuition that the machine learning process is a model selection search for the best combination of features, algorithm, and hyperparameter that generalize well in a bounded feature space.
66

app.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,22 @@
1919
##########################################################################
2020

2121
from flask import Flask
22-
from flask import render_template
22+
from flask import render_template, jsonify, request
23+
24+
from functools import partial
25+
from sklearn.preprocessing import MinMaxScaler
26+
from sklearn.datasets import make_blobs, make_circles, make_moons, make_classification
27+
28+
29+
##########################################################################
30+
## Data Generators
31+
##########################################################################
32+
33+
make_moons = partial(make_moons, n_samples=256, noise=0.075)
34+
make_blobs = partial(make_blobs, n_samples=256, n_features=2, centers=2)
35+
make_circles = partial(make_circles, n_samples=256, noise=0.075, factor=0.5)
36+
make_binary = partial(make_classification, n_samples=256, n_features=2, n_redundant=0, n_classes=2)
37+
make_multiclass = partial(make_classification, n_samples=256, n_features=2, n_redundant=0, n_clusters_per_class=1, n_classes=4)
2338

2439

2540
##########################################################################
@@ -39,6 +54,27 @@ def index():
3954
return render_template('index.html', title='Home')
4055

4156

57+
@app.route("/generate", methods=["POST"])
58+
def generate():
59+
# TODO: test content type and send 400 if not JSON
60+
data = request.get_json()
61+
generator = {
62+
'binary': make_binary,
63+
'multiclass': make_multiclass,
64+
'blobs': make_blobs,
65+
'circles': make_circles,
66+
'moons': make_moons,
67+
}[data.get("generator", "binary")]
68+
69+
X, y = generator()
70+
X = MinMaxScaler().fit_transform(X)
71+
data = [
72+
{"x": float(x[0]), "y": float(x[1]), "c": int(y)}
73+
for x, y in zip(X, y)
74+
]
75+
return jsonify(data)
76+
77+
4278
##########################################################################
4379
## Run the Web App
4480
##########################################################################

static/js/dataspace.js

Lines changed: 49 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,29 +14,27 @@ class Dataspace {
1414
// drawing properties are hardcoded for now
1515
this.width = this.$svg.width();
1616
this.height = this.$svg.height();
17-
this.color = d3.scale.category10();
17+
this.color = d3.scaleOrdinal(d3.schemeCategory10);
1818

19-
this.xScale = d3.scale.linear()
19+
this.xScale = d3.scaleLinear()
2020
.domain([0, 1])
2121
.range([margin.left, this.width - margin.right]);
2222

23-
this.yScale = d3.scale.linear()
23+
this.yScale = d3.scaleLinear()
2424
.domain([0, 1])
2525
.range([margin.top, this.height - margin.bottom])
2626
}
2727

2828
draw() {
2929
var self = this;
30-
this.svg.selectAll("circle")
31-
.data(this.dataset)
30+
self.svg.selectAll("circle")
31+
.data(self.dataset)
3232
.enter()
3333
.append("circle")
34-
.attr({
35-
cx: function (d) { return self.xScale(d.x); },
36-
cy: function (d) { return self.yScale(d.y); },
37-
fill: function (d) { return self.color(d.c); },
38-
r: radius
39-
});
34+
.attr('cx', function (d) { console.log(d); return self.xScale(d.x); })
35+
.attr('cy', function (d) { return self.yScale(d.y); })
36+
.attr('fill', function (d) { return self.color(d.c); })
37+
.attr('r', radius);
4038
}
4139

4240
// Add raw data point (e.g. where x and y are between 0 and 1)
@@ -55,6 +53,27 @@ class Dataspace {
5553
this.addPoint(point);
5654
}
5755

56+
// Fetch dataset and add it to plot
57+
fetch(data) {
58+
this.reset();
59+
d3.json("/generate", {
60+
method: "POST",
61+
body: JSON.stringify(data),
62+
headers: {
63+
"Content-Type": "application/json; charset=UTF-8"
64+
}
65+
}).then(json => {
66+
this.dataset = json;
67+
this.draw();
68+
});
69+
}
70+
71+
// Reset the plotting area
72+
reset() {
73+
this.dataset = [];
74+
this.svg.selectAll("circle").remove();
75+
}
76+
5877
}
5978

6079
$(document).ready(function() {
@@ -74,4 +93,23 @@ $(document).ready(function() {
7493
return false;
7594
})
7695

96+
// Clear the dataset and the points currently drawn
97+
$("button#resetBtn").click(function(e) {
98+
app.reset();
99+
return false;
100+
});
101+
102+
// Handle the dataset generator form
103+
$("form#datasetForm").submit(function(e) {
104+
e.preventDefault();
105+
var form = $(e.target);
106+
var data = form.serializeArray().reduce(function (obj, item) {
107+
obj[item.name] = item.value;
108+
return obj;
109+
}, {});
110+
111+
app.fetch(data);
112+
return false;
113+
})
114+
77115
});

templates/index.html

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,22 @@
6666
<option value="2">2</option>
6767
<option value="3">3</option>
6868
</select>
69+
<button class="btn btn-primary" id="resetBtn">Reset</button>
6970
</form>
7071
</div>
7172
<div class="col-md-6">
72-
73+
<form class="form-inline pull-right" id="datasetForm">
74+
<label class="my-1 mr-2" for="generator">Generate Dataset</label>
75+
<select class="custom-select my-1 mr-sm-2" name="generator">
76+
<option selected>&hellip;</option>
77+
<option value="moons">Moons</option>
78+
<option value="circles">Circles</option>
79+
<option value="blobs">Blobs</option>
80+
<option value="binary">Binary</option>
81+
<option value="multiclass">Multiclass</option>
82+
</select>
83+
<button type="submit" class="btn btn-primary" id="createBtn">Generate</button>
84+
</form>
7385
</div>
7486
</div><!-- controls ends -->
7587
</main>
@@ -120,7 +132,8 @@ <h5 class="modal-title" id="aboutModalLabel">About Data Space</h5>
120132
integrity="sha256-G7A4JrJjJlFqP0yamznwPjAApIKPkadeHfyIwiaa9e0=" crossorigin="anonymous"></script>
121133
<script src="https://stackpath.bootstrapcdn.com/bootstrap/4.1.0/js/bootstrap.min.js"
122134
integrity="sha384-uefMccjFJAIv6A+rW+L4AHf99KvxDjWSu1z9VI8SKNVmz4sk7buKt/6v9KI65qnm" crossorigin="anonymous"></script>
123-
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.12/d3.min.js" charset="utf-8"></script>
135+
<script src="https://d3js.org/d3.v5.min.js"></script>
136+
<script src="https://unpkg.com/d3-fetch"></script>
124137
<script src="{{ url_for('static', filename='js/dataspace.js') }}"></script>
125138

126139
<!-- google analytics -->

0 commit comments

Comments
 (0)