Skip to content

Commit 940fc9a

Browse files
committed
analysis scripts
1 parent 9ef93d4 commit 940fc9a

16 files changed

+2303
-0
lines changed
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import pandas as pd
2+
from pathlib import Path
3+
from utils import move_tag_to_new_column
4+
RESULTDIR="analysis_results"
5+
DATADIR="data"
6+
root = Path(__file__).parents[1] # 0524
7+
data = root / DATADIR
8+
analysis = Path(__file__).parent # 0830
9+
resultDir = analysis / RESULTDIR
10+
# .
11+
# ├── README.md
12+
# ├── [d] analysis
13+
# │ ├── [d] analysis_results
14+
# │ │ ├── analysis_new.csv
15+
# │ │ └── analysis.csv
16+
# │ ├── [d] images
17+
# │ ├── merge_0913.py
18+
# │ ├── plot_0810_dotprod_extend.py
19+
# ├── [d] data
20+
# │ ├── [d] cluster_hhpai_na61-pod_hhpai_na61-0820_no_time
21+
# │ │ ├── [d] 01-FragShare
22+
# │ │ │ ├── [d] 0.6
23+
# │ │ │ │ ├── [d] 42
24+
# ......
25+
26+
def exit_and_save_to_csv(dflist):
27+
dfo = pd.concat(dflist)
28+
resultDir.mkdir(exist_ok=True)
29+
csvfile = resultDir / 'analysis_allo_discrete.csv'
30+
dfo.to_csv(csvfile)
31+
print("%d rows saved to: %s" % (len(dfo), csvfile))
32+
exit()
33+
34+
fileDirs = sorted([x for x in data.iterdir() if x.is_dir()])
35+
dflist = []
36+
for fdir in fileDirs:
37+
policyDirs = sorted([x for x in fdir.iterdir() if x.is_dir()])
38+
for pdir in policyDirs:
39+
tuneDirs = sorted([x for x in pdir.iterdir() if x.is_dir()])
40+
for tdir in tuneDirs:
41+
seedDirs = sorted([x for x in tdir.iterdir() if x.is_dir()])
42+
for sdir in seedDirs:
43+
afile = fdir / pdir / tdir / sdir / 'analysis_allo.csv'
44+
print(afile)
45+
if not afile.is_file():
46+
continue
47+
try:
48+
df = pd.read_csv(afile)
49+
df.columns = [x.split('-')[-1] for x in df.columns]
50+
dfd = df.to_dict(orient="list")
51+
52+
total_gpu_num = df.total_gpus.values[0]
53+
df['arrive_ratio'] = df.arrived_gpu_milli / total_gpu_num / 10
54+
df['arrive_ratio'] = df['arrive_ratio'].apply(lambda x: round(x, 0))
55+
df['alloc_ratio'] = df.used_gpu_milli / total_gpu_num / 10
56+
df['alloc_ratio'] = df['alloc_ratio'].apply(lambda x: round(x, 2))
57+
58+
59+
60+
dfn = dict()
61+
# for k, v in dfd.items():
62+
# dfn[k] = "-".join(str(x) for x in v)
63+
dfn["workload"] = fdir.name
64+
dfn["sc_policy"] = pdir.name
65+
dfn['tune']= tdir.name
66+
dfn['seed'] = sdir.name
67+
dfn['total_gpus'] = total_gpu_num
68+
for arrr in range(0, 131, 1):
69+
dfv = df[df.arrive_ratio==arrr]
70+
if len(dfv) == 0:
71+
dfv = df[(df.arrive_ratio>=arrr-1)&(df.arrive_ratio<=arrr+1)]
72+
if len(dfv) == 0:
73+
# print("No data:", arrr)
74+
continue
75+
val = round(dfv.alloc_ratio.mean(), 2)
76+
dfn[arrr] = val
77+
78+
dfo = pd.DataFrame(dfn, index=[len(dflist)]).set_index(["workload", "sc_policy", "tune", "seed"])
79+
dflist.append(dfo)
80+
81+
# if len(dflist) > 1:
82+
# exit_and_save_to_csv(dflist)
83+
84+
except Exception as e:
85+
exit("ERROR file: %s\n%s" % (afile, e))
86+
87+
exit_and_save_to_csv(dflist)

experiments/analysis/merge_bash.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
DATE="2023_0511"
3+
TRACE="openb_pod_list_default"
4+
ln -s "../${DATE}/${TRACE}" ../data/
5+
6+
python3 merge_alloc_discrete_0920.py &
7+
python3 merge_frag_discrete_0928.py &
8+
python3 merge_frag_ratio_discrete_0928.py &
9+
python3 merge_fail_pods_1010.py &
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import pandas as pd
2+
from pathlib import Path
3+
from utils import get_total_num_gpu
4+
5+
DATADIR="data"
6+
IN_FILE="analysis_fail.out"
7+
RESULTDIR="analysis_results"
8+
OUT_CSV="analysis_fail.csv"
9+
root = Path(__file__).parents[1] # 0524
10+
data = root / DATADIR
11+
analysis = Path(__file__).parent # 0830
12+
resultDir = analysis / RESULTDIR
13+
# .
14+
# ├── README.md
15+
# ├── [d] analysis
16+
# │ ├── [d] analysis_results
17+
# │ │ ├── analysis_new.csv
18+
# │ │ └── analysis.csv
19+
# │ ├── [d] images
20+
# │ ├── merge_0913.py
21+
# │ ├── plot_0810_dotprod_extend.py
22+
# ├── [d] data
23+
# │ ├── [d] cluster_hhpai_na61-pod_hhpai_na61-0820_no_time
24+
# │ │ ├── [d] 01-FragShare
25+
# │ │ │ ├── [d] 0.6
26+
# │ │ │ │ ├── [d] 42
27+
# ......
28+
29+
def parse_fail_out_file_to_df(ifile):
30+
with open(ifile, 'r') as f:
31+
start_signal = 0
32+
out_list = []
33+
for line in f.readlines():
34+
beef = ""
35+
if line.startswith('log-'):
36+
start_signal = 1
37+
continue
38+
elif line.startswith('Failed No'):
39+
break
40+
elif start_signal == 1:
41+
beef = line.strip()
42+
else:
43+
continue
44+
45+
num_pod, request = beef.split(';')
46+
num_pod = int(num_pod)
47+
cpu_q, gpu_q = request.split(', ')
48+
cpu_m = int(float(cpu_q.split(':')[1].strip()) * 1000)
49+
gpu_n = int(gpu_q.split(':')[1].split('x')[0].strip())
50+
gpu_m = int(gpu_q.split(':')[1].split('x {')[1].split('}m')[0].strip())
51+
gpu_t = gpu_q.split(':')[1].split('m (')[1].split(')>')[0].strip()
52+
gpu_t = "<none>" if len(gpu_t) == 0 else gpu_t
53+
result = [num_pod, cpu_m, gpu_n, gpu_m, gpu_t]
54+
out_list.append(result.copy())
55+
56+
df = pd.DataFrame(out_list)
57+
if len(df) != 0: # has failed pods
58+
df.columns = ['num_pod', 'cpu_milli', 'num_gpu', 'gpu_milli', 'gpu_type_req']
59+
return df
60+
61+
def exit_and_save_to_csv(dflist):
62+
dfo = pd.concat(dflist)
63+
resultDir.mkdir(exist_ok=True)
64+
csvfile = resultDir / OUT_CSV
65+
dfo.to_csv(csvfile)
66+
print("%d rows saved to: %s" % (len(dfo), csvfile))
67+
exit()
68+
69+
fileDirs = sorted([x for x in data.iterdir() if x.is_dir()])
70+
dflist = []
71+
for fdir in fileDirs:
72+
policyDirs = sorted([x for x in fdir.iterdir() if x.is_dir()])
73+
for pdir in policyDirs:
74+
tuneDirs = sorted([x for x in pdir.iterdir() if x.is_dir()])
75+
for tdir in tuneDirs:
76+
seedDirs = sorted([x for x in tdir.iterdir() if x.is_dir()])
77+
for sdir in seedDirs:
78+
ifile = fdir / pdir / tdir / sdir / IN_FILE
79+
print(ifile)
80+
if not ifile.is_file():
81+
continue
82+
try:
83+
dff = parse_fail_out_file_to_df(ifile)
84+
fail_pod_cols = list(dff.columns)
85+
dff["workload"] = fdir.name
86+
dff["sc_policy"] = pdir.name
87+
dff['tune']= tdir.name
88+
dff['seed'] = sdir.name
89+
dff.index.names = ['order']
90+
meta_cols = ['workload', 'sc_policy', 'tune', 'seed']
91+
dff = dff[meta_cols + fail_pod_cols]
92+
dflist.append(dff)
93+
94+
# if len(dflist) > 1:
95+
# exit_and_save_to_csv(dflist)
96+
97+
except Exception as e:
98+
exit("ERROR file: %s\n%s" % (ifile, e))
99+
100+
exit_and_save_to_csv(dflist)
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import pandas as pd
2+
from pathlib import Path
3+
from utils import get_total_num_gpu
4+
5+
RESULTDIR="analysis_results"
6+
DATADIR="data"
7+
root = Path(__file__).parents[1] # 0524
8+
data = root / DATADIR
9+
analysis = Path(__file__).parent # 0830
10+
resultDir = analysis / RESULTDIR
11+
# .
12+
# ├── README.md
13+
# ├── [d] analysis
14+
# │ ├── [d] analysis_results
15+
# │ │ ├── analysis_new.csv
16+
# │ │ └── analysis.csv
17+
# │ ├── [d] images
18+
# │ ├── merge_0913.py
19+
# │ ├── plot_0810_dotprod_extend.py
20+
# ├── [d] data
21+
# │ ├── [d] cluster_hhpai_na61-pod_hhpai_na61-0820_no_time
22+
# │ │ ├── [d] 01-FragShare
23+
# │ │ │ ├── [d] 0.6
24+
# │ │ │ │ ├── [d] 42
25+
# ......
26+
27+
def exit_and_save_to_csv(dflist):
28+
dfo = pd.concat(dflist)
29+
resultDir.mkdir(exist_ok=True)
30+
csvfile = resultDir / 'analysis_frag_discrete.csv'
31+
dfo.to_csv(csvfile)
32+
print("%d rows saved to: %s" % (len(dfo), csvfile))
33+
exit()
34+
35+
fileDirs = sorted([x for x in data.iterdir() if x.is_dir()])
36+
dflist = []
37+
for fdir in fileDirs:
38+
policyDirs = sorted([x for x in fdir.iterdir() if x.is_dir()])
39+
for pdir in policyDirs:
40+
tuneDirs = sorted([x for x in pdir.iterdir() if x.is_dir()])
41+
for tdir in tuneDirs:
42+
seedDirs = sorted([x for x in tdir.iterdir() if x.is_dir()])
43+
for sdir in seedDirs:
44+
afile = fdir / pdir / tdir / sdir / 'analysis_allo.csv'
45+
ffile = fdir / pdir / tdir / sdir / 'analysis_frag.csv'
46+
print(afile)
47+
if not afile.is_file():
48+
continue
49+
try:
50+
dfa = pd.read_csv(afile)
51+
dfa.columns = [x.split('-')[-1] for x in dfa.columns]
52+
# [used_nodes, used_gpus, used_gpu_milli, total_gpus, arrived_gpu_milli]
53+
54+
total_gpu_num = dfa.total_gpus.values[0]
55+
dfa['arrive_ratio'] = dfa.arrived_gpu_milli / total_gpu_num / 10
56+
dfa['arrive_ratio'] = dfa['arrive_ratio'].apply(lambda x: round(x, 0))
57+
dfa['alloc_ratio'] = dfa.used_gpu_milli / total_gpu_num / 10
58+
dfa['alloc_ratio'] = dfa['alloc_ratio'].apply(lambda x: round(x, 2))
59+
60+
dff = pd.read_csv(ffile)
61+
dff.columns = [x.split('-')[-1] for x in dff.columns]
62+
# [origin_milli, origin_ratio, origin_q124, bellman_milli, bellman_ratio]
63+
64+
df = dfa.join(dff)
65+
66+
dfn = dict()
67+
dfn["workload"] = fdir.name
68+
dfn["sc_policy"] = pdir.name
69+
dfn['tune']= tdir.name
70+
dfn['seed'] = sdir.name
71+
72+
for arrr in range(0, 131, 1):
73+
dfv = df[df.arrive_ratio==arrr]
74+
if len(dfv) == 0:
75+
dfv = df[(df.arrive_ratio>=arrr-1)&(df.arrive_ratio<=arrr+1)]
76+
if len(dfv) == 0:
77+
# print("No data:", arrr)
78+
continue
79+
80+
frag_milli = dfv.origin_milli.mean()
81+
frag_ratio = dfv.origin_ratio.mean()
82+
# frag_milli = idle_milli * frag_ratio
83+
val = round(100 * frag_milli / 1000 / total_gpu_num, 2)
84+
dfn[arrr] = val
85+
86+
dfo = pd.DataFrame(dfn, index=[len(dflist)]).set_index(["workload", "sc_policy", "tune", "seed"])
87+
dflist.append(dfo)
88+
89+
# if len(dflist) > 1:
90+
# exit_and_save_to_csv(dflist)
91+
92+
except Exception as e:
93+
exit("ERROR file: %s\n%s" % (afile, e))
94+
95+
exit_and_save_to_csv(dflist)
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
import pandas as pd
2+
from pathlib import Path
3+
from utils import get_total_num_gpu
4+
5+
RESULTDIR="analysis_results"
6+
DATADIR="data"
7+
root = Path(__file__).parents[1] # 0524
8+
data = root / DATADIR
9+
analysis = Path(__file__).parent # 0830
10+
resultDir = analysis / RESULTDIR
11+
# .
12+
# ├── README.md
13+
# ├── [d] analysis
14+
# │ ├── [d] analysis_results
15+
# │ │ ├── analysis_new.csv
16+
# │ │ └── analysis.csv
17+
# │ ├── [d] images
18+
# │ ├── merge_0913.py
19+
# │ ├── plot_0810_dotprod_extend.py
20+
# ├── [d] data
21+
# │ ├── [d] cluster_hhpai_na61-pod_hhpai_na61-0820_no_time
22+
# │ │ ├── [d] 01-FragShare
23+
# │ │ │ ├── [d] 0.6
24+
# │ │ │ │ ├── [d] 42
25+
# ......
26+
27+
def exit_and_save_to_csv(dflist):
28+
dfo = pd.concat(dflist)
29+
resultDir.mkdir(exist_ok=True)
30+
csvfile = resultDir / 'analysis_frag_ratio_discrete.csv'
31+
dfo.to_csv(csvfile)
32+
print("%d rows saved to: %s" % (len(dfo), csvfile))
33+
exit()
34+
35+
fileDirs = sorted([x for x in data.iterdir() if x.is_dir()])
36+
dflist = []
37+
for fdir in fileDirs:
38+
policyDirs = sorted([x for x in fdir.iterdir() if x.is_dir()])
39+
for pdir in policyDirs:
40+
tuneDirs = sorted([x for x in pdir.iterdir() if x.is_dir()])
41+
for tdir in tuneDirs:
42+
seedDirs = sorted([x for x in tdir.iterdir() if x.is_dir()])
43+
for sdir in seedDirs:
44+
afile = fdir / pdir / tdir / sdir / 'analysis_allo.csv'
45+
ffile = fdir / pdir / tdir / sdir / 'analysis_frag.csv'
46+
print(afile)
47+
if not afile.is_file():
48+
continue
49+
try:
50+
dfa = pd.read_csv(afile)
51+
dfa.columns = [x.split('-')[-1] for x in dfa.columns]
52+
# [used_nodes, used_gpus, used_gpu_milli, total_gpus, arrived_gpu_milli]
53+
54+
total_gpu_num = dfa.total_gpus.values[0]
55+
dfa['arrive_ratio'] = dfa.arrived_gpu_milli / total_gpu_num / 10
56+
dfa['arrive_ratio'] = dfa['arrive_ratio'].apply(lambda x: round(x, 0))
57+
dfa['alloc_ratio'] = dfa.used_gpu_milli / total_gpu_num / 10
58+
dfa['alloc_ratio'] = dfa['alloc_ratio'].apply(lambda x: round(x, 2))
59+
60+
dff = pd.read_csv(ffile)
61+
dff.columns = [x.split('-')[-1] for x in dff.columns]
62+
# [origin_milli, origin_ratio, origin_q124, bellman_milli, bellman_ratio]
63+
64+
df = dfa.join(dff)
65+
66+
dfn = dict()
67+
dfn["workload"] = fdir.name
68+
dfn["sc_policy"] = pdir.name
69+
dfn['tune']= tdir.name
70+
dfn['seed'] = sdir.name
71+
72+
for arrr in range(0, 131, 1):
73+
dfv = df[df.arrive_ratio==arrr]
74+
if len(dfv) == 0:
75+
dfv = df[(df.arrive_ratio>=arrr-1)&(df.arrive_ratio<=arrr+1)]
76+
if len(dfv) == 0:
77+
# print("No data:", arrr)
78+
continue
79+
80+
frag_milli = dfv.origin_milli.mean()
81+
frag_ratio = dfv.origin_ratio.mean()
82+
# frag_milli = idle_milli * frag_ratio
83+
dfn[arrr] = frag_ratio
84+
85+
dfo = pd.DataFrame(dfn, index=[len(dflist)]).set_index(["workload", "sc_policy", "tune", "seed"])
86+
dflist.append(dfo)
87+
88+
# if len(dflist) > 1:
89+
# exit_and_save_to_csv(dflist)
90+
91+
except Exception as e:
92+
exit("ERROR file: %s\n%s" % (afile, e))
93+
94+
exit_and_save_to_csv(dflist)

0 commit comments

Comments
 (0)