Skip to content

Commit 481a395

Browse files
committed
[analysis] add with resulting plots
1 parent 8178790 commit 481a395

10 files changed

+1454
-206
lines changed

analysis/graph-details.ipynb

Lines changed: 176 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,13 @@
3535
"metadata": {},
3636
"outputs": [],
3737
"source": [
38-
"EVAL_DIR = \"/home/felix/todo/osm-tmp\"\n",
38+
"OUTPUT = \"single\"\n",
39+
"EVAL_DIR = \"/home/felix/todo/algohol/single\"\n",
3940
"MLP_METHODS = [\"kmeans\", \"gonzalez\", \"merge\"]\n",
40-
"MLP_LEVELS = [[int(2 ** i)] for i in np.arange(8.0, 13.5, 1.0)]\n",
41+
"MLP_LEVELS = [[int(2 ** i)] for i in np.arange(9.0, 12.5, 1.0)]\n",
4142
"FAST_QUERY_METHODS = [\"pcrp\", \"pch\", \"prp\"]\n",
42-
"QUERY_METHODS = [\"normal\", \"bi\"] + FAST_QUERY_METHODS\n",
43-
"AREAS = [\"saarland\"]\n",
43+
"QUERY_METHODS = [\"normal\"] + FAST_QUERY_METHODS\n",
44+
"AREAS = [\"baden-wuerttemberg\"]\n",
4445
"print(MLP_METHODS, \"with\", MLP_LEVELS)"
4546
]
4647
},
@@ -80,46 +81,41 @@
8081
"metadata": {},
8182
"outputs": [],
8283
"source": [
83-
"assert((df_graph.groupby([\"amount_edges\"]).size() == 5).all())"
84+
"assert((df_graph.groupby([\"amount_edges\"]).size() == len(QUERY_METHODS)).all())"
8485
]
8586
},
8687
{
8788
"cell_type": "code",
8889
"execution_count": null,
89-
"id": "amber-cycle",
90+
"id": "unauthorized-contest",
9091
"metadata": {},
9192
"outputs": [],
9293
"source": [
93-
"fig, ax = plt.subplots()\n",
94-
"speedups = list()\n",
95-
"for area in AREAS:\n",
96-
" dijkstra = df_graph[(df_graph.Query == \"normal\") & (df_graph.Area == area)][\"amount_used_edges\"].iloc[0]\n",
97-
" bidijkstra = df_graph[(df_graph.Query == \"bi\") & (df_graph.Area == area)][\"amount_used_edges\"].iloc[0]\n",
98-
" for query in FAST_QUERY_METHODS:\n",
99-
" for mlp in MLP_METHODS:\n",
100-
" x = list()\n",
101-
" y = list()\n",
102-
" for partitions in MLP_LEVELS:\n",
103-
" tmp = df_graph[(df_graph.Area == area) & (df_graph.Query == query) & (df_graph.MLP_method == mlp) & (df_graph.Levels == \"_\".join(map(str, partitions)))]\n",
104-
" x.append(partitions[0])\n",
105-
" y.append(tmp[\"amount_used_edges\"])\n",
106-
"# speedups.append({\"Query\": query, \"MLP\": mlp, \"_\".join(map(str, partitions)): dijkstra / tmp[\"time\"].mean()})\n",
107-
" plt.plot(x, y, marker=plot_get(query), color=plot_get(mlp), label=query + \"-\" + mlp, alpha=0.7)\n",
108-
"plt.xlabel(\"MLP-Partition-Size\")\n",
109-
"plt.ylabel(\"edges per algorithm\")\n",
110-
"ax.set_yscale('log')\n",
111-
"plt.legend(loc='upper left')\n",
112-
"fig.savefig(\"used-edges-single-level.pgf\", bbox_inches=\"tight\")"
94+
"df_graph.groupby([\"MLP_method\", \"Levels\", \"Query\"])[\"amount_used_edges\"].first()"
11395
]
11496
},
11597
{
11698
"cell_type": "code",
11799
"execution_count": null,
118-
"id": "unauthorized-contest",
100+
"id": "humanitarian-armenia",
119101
"metadata": {},
120102
"outputs": [],
121103
"source": [
122-
"df_graph.groupby([\"MLP_method\", \"Levels\", \"Query\"])[\"amount_used_edges\"].first()"
104+
"def format_tex(float_number):\n",
105+
"# exponent = np.floor(np.log10(float_number))\n",
106+
" exponent = 6\n",
107+
" mantissa = float_number/10**exponent\n",
108+
" return \"${:0.1f}\\\\times10^{{{:}}}$\".format(float(mantissa), str(int(exponent)))"
109+
]
110+
},
111+
{
112+
"cell_type": "code",
113+
"execution_count": null,
114+
"id": "handy-preservation",
115+
"metadata": {},
116+
"outputs": [],
117+
"source": [
118+
"df_graph[\"amount_used_edges\"] = pd.to_numeric(df_graph[\"amount_used_edges\"], downcast=\"float\")"
123119
]
124120
},
125121
{
@@ -133,16 +129,15 @@
133129
"for area in AREAS:\n",
134130
" dijkstra = df_graph[(df_graph.Query == \"normal\") & (df_graph.Area == area)][\"amount_used_edges\"].iloc[0]\n",
135131
" print(\"original edge amount:\", dijkstra)\n",
136-
" bidijkstra = df_graph[(df_graph.Query == \"bi\") & (df_graph.Area == area)][\"amount_used_edges\"].iloc[0]\n",
137132
" for query in FAST_QUERY_METHODS:\n",
138133
" for mlp in MLP_METHODS:\n",
139134
" line = dict()\n",
140135
" for partitions in MLP_LEVELS:\n",
141136
" tmp = df_graph[(df_graph.Area == area) & (df_graph.Query == query) & (df_graph.MLP_method == mlp) & (df_graph.Levels == \"_\".join(map(str, partitions)))]\n",
142137
" line[partitions[0]] = tmp[\"amount_used_edges\"].values[0]\n",
143138
" df_new = pd.DataFrame([line])\n",
144-
" df_new[\"MLP_method\"] = mlp\n",
145-
" df_new[\"Query\"] = query\n",
139+
" df_new[\"MLP_method\"] = mlp_title(mlp)\n",
140+
" df_new[\"Query\"] = query.upper()\n",
146141
" df_table = pd.concat([df_table, df_new], ignore_index=True)"
147142
]
148143
},
@@ -154,7 +149,8 @@
154149
"outputs": [],
155150
"source": [
156151
"df_edges = df_table.groupby([\"Query\", \"MLP_method\"]).first()\n",
157-
"latex = df_edges.to_latex(float_format=\"{:0.1f}\".format)\n",
152+
"# latex = df_edges.to_latex(float_format=\"{:0.1f}\".format)\n",
153+
"latex = df_edges.to_latex(float_format=format_tex, escape=False)\n",
158154
"df_edges"
159155
]
160156
},
@@ -180,16 +176,162 @@
180176
"latex_list.insert(len(latex_list)-8, '\\midrule')\n",
181177
"latex_list.insert(len(latex_list)-5, '\\midrule')\n",
182178
"latex_new = '\\n'.join(latex_list)\n",
183-
"with open(\"edges.tex\", \"w\") as latex_file:\n",
179+
"latex_new = latex_new.replace(\"MLP_method\", \"MLP-method\")\n",
180+
"with open(OUTPUT + \"-edges.tex\", \"w\") as latex_file:\n",
184181
" latex_file.writelines(latex_new)"
185182
]
186183
},
184+
{
185+
"cell_type": "markdown",
186+
"id": "approximate-belgium",
187+
"metadata": {},
188+
"source": [
189+
"# level"
190+
]
191+
},
192+
{
193+
"cell_type": "code",
194+
"execution_count": null,
195+
"id": "funny-tennis",
196+
"metadata": {},
197+
"outputs": [],
198+
"source": [
199+
"OUTPUT = \"level\"\n",
200+
"EVAL_DIR = \"/home/felix/todo/algohol/level\"\n",
201+
"MLP_METHODS = [\"merge\"]\n",
202+
"MLP_LEVELS = [[int(2 ** i)] for i in np.arange(9.0, 11.5, 1.0)]\n",
203+
"MLP_LEVELS = MLP_LEVELS + [[int(2 ** i), 4] for i in np.arange(9.0, 11.5, 1.0)]\n",
204+
"MLP_LEVELS = MLP_LEVELS + [[int(2 ** i), 16] for i in np.arange(9.0, 11.5, 1.0)]\n",
205+
"MLP_LEVELS = MLP_LEVELS + [[int(2 ** i), 4, 4] for i in np.arange(9.0, 11.5, 1.0)]\n",
206+
"FAST_QUERY_METHODS = [\"pcrp\", \"pch\", \"prp\"]\n",
207+
"QUERY_METHODS = [\"normal\"] + FAST_QUERY_METHODS\n",
208+
"AREAS = [\"baden-wuerttemberg\"]\n",
209+
"print(MLP_METHODS, \"with\", MLP_LEVELS)"
210+
]
211+
},
212+
{
213+
"cell_type": "code",
214+
"execution_count": null,
215+
"id": "bottom-bottom",
216+
"metadata": {},
217+
"outputs": [],
218+
"source": [
219+
"df_graph = pd.DataFrame()\n",
220+
"for area in AREAS:\n",
221+
" for mlp_method in MLP_METHODS:\n",
222+
" for level in MLP_LEVELS:\n",
223+
" for query in QUERY_METHODS:\n",
224+
" df_new = pd.read_json(EVAL_DIR + \"/\" + area + \"-\" + mlp_method + \"-\" + \"_\".join(map(str, level)) + \"-\" + query + \"-info.json\", typ='series')\n",
225+
" df_new = pd.DataFrame([df_new])\n",
226+
" df_new[\"Area\"] = area\n",
227+
" df_new[\"MLP_method\"] = mlp_method\n",
228+
" df_new[\"Levels\"] = \"_\".join(map(str, level))\n",
229+
" df_new[\"Query\"] = query\n",
230+
" df_graph = pd.concat([df_graph, df_new], ignore_index=True)"
231+
]
232+
},
233+
{
234+
"cell_type": "code",
235+
"execution_count": null,
236+
"id": "champion-spray",
237+
"metadata": {},
238+
"outputs": [],
239+
"source": [
240+
"assert((df_graph.groupby([\"amount_edges\"]).size() == len(QUERY_METHODS)).all())"
241+
]
242+
},
243+
{
244+
"cell_type": "code",
245+
"execution_count": null,
246+
"id": "popular-birth",
247+
"metadata": {},
248+
"outputs": [],
249+
"source": [
250+
"df_graph[\"amount_used_edges\"] = pd.to_numeric(df_graph[\"amount_used_edges\"], downcast=\"float\")"
251+
]
252+
},
187253
{
188254
"cell_type": "code",
189255
"execution_count": null,
190256
"id": "accessible-courage",
191257
"metadata": {},
192258
"outputs": [],
259+
"source": [
260+
"df_table = pd.DataFrame()\n",
261+
"for area in AREAS:\n",
262+
" dijkstra = df_graph[(df_graph.Query == \"normal\") & (df_graph.Area == area)][\"amount_used_edges\"].iloc[0]\n",
263+
" print(\"original edge amount:\", dijkstra)\n",
264+
" for mlp in MLP_METHODS:\n",
265+
" for partitions in MLP_LEVELS:\n",
266+
" line = dict()\n",
267+
" for query in FAST_QUERY_METHODS:\n",
268+
" tmp = df_graph[(df_graph.Area == area) & (df_graph.Query == query) & (df_graph.MLP_method == mlp) & (df_graph.Levels == \"_\".join(map(str, partitions)))]\n",
269+
" line[query.upper()] = tmp[\"amount_used_edges\"].values[0]\n",
270+
" df_new = pd.DataFrame([line])\n",
271+
" df_new[\"partitions\"] = \"_\".join(map(str, partitions))\n",
272+
" df_table = pd.concat([df_table, df_new], ignore_index=True)"
273+
]
274+
},
275+
{
276+
"cell_type": "code",
277+
"execution_count": null,
278+
"id": "blind-saint",
279+
"metadata": {},
280+
"outputs": [],
281+
"source": [
282+
"def special_hacky_sort(x):\n",
283+
" splited = x.str.split(\"-\", expand=True)\n",
284+
" return pd.DataFrame(splited).astype(float).sum(axis=1)"
285+
]
286+
},
287+
{
288+
"cell_type": "code",
289+
"execution_count": null,
290+
"id": "attached-physics",
291+
"metadata": {},
292+
"outputs": [],
293+
"source": [
294+
"df_table['partitions'] = df_table['partitions'].str.replace('_','-')\n",
295+
"df_edges = df_table.groupby(\"partitions\").first()\n",
296+
"df_edges = df_edges.sort_values(by=\"partitions\", key=special_hacky_sort)\n",
297+
"latex = df_edges.to_latex(float_format=format_tex, escape=False)\n",
298+
"df_edges"
299+
]
300+
},
301+
{
302+
"cell_type": "code",
303+
"execution_count": null,
304+
"id": "confidential-decade",
305+
"metadata": {},
306+
"outputs": [],
307+
"source": [
308+
"# fixup ugly latex code to have single line header\n",
309+
"latex_list = latex.splitlines()\n",
310+
"\n",
311+
"latex_list[0] = latex_list[0].replace('lr', 'l|r', 1)\n",
312+
"\n",
313+
"columns = latex_list[2].split(\"&\")\n",
314+
"indices = latex_list[3].split(\"&\")\n",
315+
"\n",
316+
"latex_list[2] = \" & \\multicolumn{\" + str(len(FAST_QUERY_METHODS)) + \"}{c}{Dijkstra-Query} \\\\\\\\\"\n",
317+
"\n",
318+
"latex_list[3] = \"&\".join(indices[:1] + columns[1:])\n",
319+
"\n",
320+
"\n",
321+
"latex_list.insert(len(latex_list)-10, '\\midrule')\n",
322+
"latex_list.insert(len(latex_list)-6, '\\midrule')\n",
323+
"latex_new = '\\n'.join(latex_list)\n",
324+
"\n",
325+
"with open(OUTPUT + \"-edges.tex\", \"w\") as latex_file:\n",
326+
" latex_file.writelines(latex_new)"
327+
]
328+
},
329+
{
330+
"cell_type": "code",
331+
"execution_count": null,
332+
"id": "fresh-nitrogen",
333+
"metadata": {},
334+
"outputs": [],
193335
"source": []
194336
}
195337
],
@@ -209,7 +351,7 @@
209351
"name": "python",
210352
"nbconvert_exporter": "python",
211353
"pygments_lexer": "ipython3",
212-
"version": "3.8.9"
354+
"version": "3.8.11"
213355
}
214356
},
215357
"nbformat": 4,

analysis/helper.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,22 +14,43 @@
1414
markers = ['s', 'X', '*', '+', 'o', '^']
1515

1616
identifiert = dict()
17+
mapping_colors = dict()
1718

1819
for method, color in zip(MLP_METHODS, colors):
1920
identifiert[method] = color
2021

2122
for method, marker in zip(QUERY_METHODS, markers):
2223
identifiert[method] = marker
2324

25+
mapping_colors["pch"] = "kmeans"
26+
mapping_colors["pcrp"] = "gonzalez"
27+
mapping_colors["prp"] = "merge"
28+
29+
TEXT_WIDTH = 426.0
30+
2431

2532
def ns_to_ms(value):
2633
return value / 1e6
2734

2835

36+
def sec_to_min(value):
37+
return value / 60
38+
39+
2940
def plot_get(method):
3041
return identifiert[method]
3142

3243

44+
def plot_color_get(method):
45+
return identifiert[mapping_colors[method]]
46+
47+
48+
def mlp_title(method):
49+
if method == "kmeans":
50+
return "K-means"
51+
return method.title()
52+
53+
3354
def shell_execute(command, EVAL_DIR):
3455
start_time = time.time()
3556
result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
@@ -75,6 +96,38 @@ def not_created_yet(file, EVAL_DIR):
7596
return False
7697

7798

99+
def set_pgf_size(width_pt, fraction=1, subplots=(1, 1)):
100+
"""Set figure dimensions to sit nicely in our document.
101+
102+
Parameters
103+
----------
104+
width_pt: float
105+
Document width in points
106+
fraction: float, optional
107+
Fraction of the width which you wish the figure to occupy
108+
subplots: array-like, optional
109+
The number of rows and columns of subplots.
110+
Returns
111+
-------
112+
fig_dim: tuple
113+
Dimensions of figure in inches
114+
"""
115+
# Width of figure (in pts)
116+
fig_width_pt = width_pt * fraction
117+
# Convert from pt to inches
118+
inches_per_pt = 1 / 72.27
119+
120+
# Golden ratio to set aesthetic figure height
121+
golden_ratio = (5**.5 - 1) / 2
122+
123+
# Figure width in inches
124+
fig_width_in = fig_width_pt * inches_per_pt
125+
# Figure height in inches
126+
fig_height_in = fig_width_in * golden_ratio * (subplots[0] / subplots[1])
127+
128+
return (fig_width_in, fig_height_in)
129+
130+
78131
def main():
79132
pass
80133

0 commit comments

Comments
 (0)