Skip to content

Commit 01a36d6

Browse files
committed
Update code
1 parent b111dc9 commit 01a36d6

File tree

21 files changed

+593
-75
lines changed

21 files changed

+593
-75
lines changed

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,10 @@ if(WITH_TIMEOUT_WARNING)
148148
set(SALUS_ENABLE_TIMEOUT_WARNING 1)
149149
endif(WITH_TIMEOUT_WARNING)
150150

151+
if(USE_TENSORFLOW)
152+
set(SALUS_ENABLE_TENSORFLOW 1)
153+
endif(USE_TENSORFLOW)
154+
151155
configure_file(src/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h)
152156
include_directories(${CMAKE_CURRENT_BINARY_DIR})
153157

benchmarks/driver/runner.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,8 @@ def __call__(self, executor, output_file):
213213
else:
214214
output_file.parent.mkdir(exist_ok=True, parents=True)
215215
with output_file.open('w') as f:
216-
return execute(cmd, cwd=str(cwd), env=env, stdout=f, stderr=sp.STDOUT)
216+
# return execute(cmd, cwd=str(cwd), env=env, stdout=f, stderr=sp.STDOUT)
217+
return execute(cmd, cwd=str(cwd), env=env, stdout=f, stderr=None)
217218

218219
def _construct_test_name(self, executor):
219220
# type: (Executor) -> Tuple[str, str]
@@ -239,7 +240,7 @@ def _construct_test_name(self, executor):
239240
})
240241
}
241242

242-
variable_batch_size_models = {'vae', 'superres'}
243+
variable_batch_size_models = {'vae', 'superres', 'seq2seq', 'mnistsf', 'mnistcv', 'mnistlg'}
243244
if remove_suffix(self.wl.name, 'eval') not in variable_batch_size_models:
244245
if self.wl.batch_size not in self.wl.wtl.available_batch_sizes():
245246
raise ValueError(f"Batch size `{self.wl.batch_size}' is not supported for {self.wl.name},"
@@ -273,6 +274,8 @@ def _construct_test_name(self, executor):
273274
}
274275

275276
postfix = names.get(self.wl.batch_size, '0')
277+
if model_name == 'seq2seq' and postfix == '0':
278+
postfix = '2_large'
276279

277280
method = f'{cls}.{prefix}{postfix}'
278281
return pkg, method

benchmarks/driver/server/config/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from builtins import super
2222

2323
from absl import flags
24-
from copy import copy
24+
from copy import deepcopy
2525

2626
from ...utils import maybe_path
2727
from ...utils.compatiblity import pathlib
@@ -77,7 +77,7 @@ def __setattr__(self, key, value):
7777
def copy(self, **kwargs):
7878
# type: (...) -> SalusConfig
7979
"""Return a new copy of the tuple"""
80-
return copy(self).update(**kwargs)
80+
return deepcopy(self).update(**kwargs)
8181

8282
def update(self, d=None, **kwargs):
8383
# type: (...) -> SalusConfig

benchmarks/exps/__init__.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
import time
2424
import re
2525
import logging
26+
import string
27+
import random
2628
from absl import flags
2729
from typing import Union, Iterable, List, TypeVar, Callable, Optional
2830

@@ -379,3 +381,8 @@ def release_on_pipe(pipe):
379381
def sync_on_pipe(pipe):
380382
wait_on_pipe(pipe)
381383
release_on_pipe(pipe)
384+
385+
386+
def random_id(size=6, chars=string.ascii_uppercase + string.digits):
387+
"""Generate a random ID"""
388+
return ''.join(random.choice(chars) for _ in range(size))

benchmarks/exps/smtracing.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,31 @@ def tfmps(argv):
170170
)
171171

172172

173+
def train_alone(argv):
174+
"""Run training workload alone take note of SM usage"""
175+
sm_factors = [float(v) for v in argv]
176+
if not sm_factors:
177+
sm_factors = [1.0, 1.5, 2.0, 2.5, 3.0]
178+
179+
logger.info(f"Running Salus with sm factors: {sm_factors}")
180+
181+
# run salus
182+
for factor in sm_factors:
183+
with tempfile.TemporaryDirectory() as td:
184+
scfg = maybe_forced_preset(presets.OpTracing)
185+
scfg.logconf = 'smtracing'
186+
scfg.extra_args += [
187+
'--sm-factor', f'{factor:.2f}'
188+
]
189+
logger.info(f"Running Salus with sm factor: {factor}")
190+
# the background training job
191+
wl, pipe = create_train(Executor.Salus, 0, td)
192+
run_seq(scfg.copy(output_dir=FLAGS.save_dir / "alone" / f"{factor:.2f}"),
193+
wl,
194+
RunFn(lambda *args, **kwargs: wait_on_pipe(pipe)),
195+
RunFn(lambda *args, **kwargs: release_on_pipe(pipe)))
196+
197+
173198
@case_switch_main
174199
def main():
175-
return salus, tfmps
200+
return salus, tfmps, train_alone, salus_factor

0 commit comments

Comments
 (0)