seung-lab · xiuliren · May 13, 2016 · May 13, 2016 · May 13, 2016 · May 13, 2016
diff --git a/.travis.yml b/.travis.yml
@@ -53,19 +53,19 @@ script:
        # check patch using single precision, this check will create the net_current.h5 file for testing loading
        - cd python
        # test affinity training patch, network initialization, network with even field of view
-       - python train.py -c ../testsuit/affinity/config.cfg -d single -k yes
+       - python ztrain.py -c ../testsuit/affinity/config.cfg -d single -k yes
        # test boundary map training, patch matching, network initialization
-       - python train.py -c ../testsuit/boundary/config.cfg -d single -k yes
+       - python ztrain.py -c ../testsuit/boundary/config.cfg -d single -k yes
        # second check to test the network loading
-       - python train.py -c ../testsuit/boundary/config.cfg -d single -k yes
+       - python ztrain.py -c ../testsuit/boundary/config.cfg -d single -k yes
        # check the double precision
        # compile the core with double precision
        - cd core; make double -j 4
        # return to `python`
        - cd ..
-       - python train.py -c ../testsuit/boundary/config.cfg -d double -k yes
+       - python ztrain.py -c ../testsuit/boundary/config.cfg -d double -k yes
        # test forward pass
-       - python forward.py -c ../testsuit/forward/config.cfg
+       - python zforward.py -c ../testsuit/forward/config.cfg
        # return to root directory
        - cd ..
 

diff --git a/python/tifffile.py → python/emirt/tifffile.py b/python/tifffile.py → python/emirt/tifffile.py
diff --git a/python/front_end/__init__.py b/python/front_end/__init__.py
@@ -3,4 +3,9 @@
 import zlog
 import znetio
 import zsample
-import zshow
+import zshow
+import zutils
+import zaws
+import zcost
+import ztest
+import zcheck
diff --git a/python/front_end/zaws.py b/python/front_end/zaws.py
@@ -0,0 +1,30 @@
+import os
+
+def s3download(s3fname, tmpdir="/tmp/"):
+    """
+    download aws s3 file
+
+    params:
+    - fname: string, file name in s3
+    - tmpdir: string, temporary directory
+
+    return:
+    - lcfname: string, local file name
+    """
+    if s3fname and "s3://" in s3fname:
+        # base name
+        bn = os.path.basename(s3fname)
+        # local directory
+        lcdir = os.path.dirname( s3fname )
+        lcdir = lcdir.replace("s3://", "")
+        lcdir = os.path.join(tmpdir, lcdir)
+        # make directory
+        os.makedirs(lcdir)
+        # local file name
+        lcfname = os.path.join( tmpdir, bn )
+        # copy file from s3
+        os.system("aws s3 cp {} {}".format(s3fname, lcfname))
+        return lcfname
+    else:
+        # this is not a s3 file, just a local file
+        return s3fname
diff --git a/python/zcheck.py → python/front_end/zcheck.py b/python/zcheck.py → python/front_end/zcheck.py
@@ -5,7 +5,7 @@
 """
 import os
 import numpy as np
-import utils
+import zutils
 import emirt
 
 def check_gradient(pars, net, smp, h=0.00001):
@@ -21,7 +21,7 @@ def check_gradient(pars, net, smp, h=0.00001):
 
     # numerical gradient
     # apply the transformations in memory rather than array view
-    vol_ins = utils.make_continuous(vol_ins)
+    vol_ins = zutils.make_continuous(vol_ins)
     # shift the input to compute the analytical gradient
     vol_ins1 = dict()
     vol_ins2 = dict()

diff --git a/python/front_end/zconfig.py b/python/front_end/zconfig.py
@@ -10,9 +10,10 @@
 import ConfigParser
 import numpy as np
 import os
-import cost_fn
-import utils
+import zcost
+import zutils
 from emirt import volume_util
+import zaws
 
 def parser(conf_fname):
     # parse config file to get parameters
@@ -78,11 +79,11 @@ def parse_cfg( conf_fname ):
     #TRAINING OPTIONS
     #Samples to use for training
     if config.has_option('parameters', 'train_range'):
-        pars['train_range'] = utils.parseIntSet( config.get('parameters',   'train_range') )
+        pars['train_range'] = zutils.parseIntSet( config.get('parameters',   'train_range') )
 
     #Samples to use for cross-validation
     if config.has_option('parameters', 'test_range'):
-        pars['test_range']  = utils.parseIntSet( config.get('parameters',   'test_range') )
+        pars['test_range']  = zutils.parseIntSet( config.get('parameters',   'test_range') )
     #Learning Rate
     if config.has_option('parameters', 'eta'):
         pars['eta']         = config.getfloat('parameters', 'eta')
@@ -241,7 +242,7 @@ def parse_cfg( conf_fname ):
 
     #FULL FORWARD PASS PARAMETERS
     #Which samples to use
-    pars['forward_range'] = utils.parseIntSet( config.get('parameters', 'forward_range') )
+    pars['forward_range'] = zutils.parseIntSet( config.get('parameters', 'forward_range') )
     #Which network file to load
     pars['forward_net']   = config.get('parameters', 'forward_net')
     #Output Patch Size
@@ -258,26 +259,36 @@ def autoset_pars(pars):
         # automatic choosing of cost function
         if 'boundary' in pars['out_type']:
             pars['cost_fn_str'] = 'softmax_loss'
-            pars['cost_fn'] = cost_fn.softmax_loss
+            pars['cost_fn'] = zcost.softmax_loss
         elif 'affin' in pars['out_type']:
             pars['cost_fn_str'] = 'binomial_cross_entropy'
-            pars['cost_fn'] = cost_fn.binomial_cross_entropy
+            pars['cost_fn'] = zcost.binomial_cross_entropy
         elif 'semantic' in pars['out_type']:
             pars['cost_fn_str'] = 'softmax_loss'
-            pars['cost_fn'] = cost_fn.softmax_loss
+            pars['cost_fn'] = zcost.softmax_loss
         else:
             raise NameError("no matching cost function for out_type!")
     elif "square-square" in pars['cost_fn_str']:
-        pars['cost_fn'] = cost_fn.square_square_loss
+        pars['cost_fn'] = zcost.square_square_loss
     elif "square" in pars['cost_fn_str']:
-        pars['cost_fn'] = cost_fn.square_loss
+        pars['cost_fn'] = zcost.square_loss
     elif  "binomial" in pars['cost_fn_str']:
-        pars['cost_fn'] = cost_fn.binomial_cross_entropy
+        pars['cost_fn'] = zcost.binomial_cross_entropy
     elif "softmax" in pars['cost_fn_str']:
-        pars['cost_fn'] = cost_fn.softmax_loss
+        pars['cost_fn'] = zcost.softmax_loss
     else:
         raise NameError('unknown type of cost function')
 
+    # aws s3 filehandling
+    pars['fnet_spec']  = zaws.s3download( pars['fnet_spec'] )
+    pars['fdata_spec'] = zaws.s3download( pars['fdata_spec'] )
+    # local file name
+    if "s3://" in pars['train_net_prefix']:
+        # copy the path as a backup
+        pars['s3_train_net_prefix'] = pars['train_net_prefix']
+        bn = os.path.basename( pars['train_net_prefix'] )
+        # replace with local path
+        pars['train_net_prefix'] = "/tmp/{}".format(bn)
     return pars
 
 def check_pars(pars):
@@ -353,6 +364,9 @@ def autoset_dspec(pars, dspec):
 
 # parse args
 def parse_args(args):
+    # s3 to local
+    args['config'] = zaws.s3download( args['config'] )
+    args['seed'] = zaws.s3download( args['seed'] )
     #%% parameters
     if not os.path.exists( args['config'] ):
         raise NameError("config file not exist!")

diff --git a/python/cost_fn.py → python/front_end/zcost.py b/python/cost_fn.py → python/front_end/zcost.py
@@ -5,7 +5,7 @@
 """
 import numpy as np
 import emirt
-import utils
+import zutils
 from core import pyznn
 
 def get_cls(props, lbls, mask=None):
@@ -25,8 +25,8 @@ def get_cls(props, lbls, mask=None):
     c = 0.0
 
     #Applying mask if it exists
-    props = utils.mask_dict_vol(props, mask)
-    lbls = utils.mask_dict_vol(lbls, mask)
+    props = zutils.mask_dict_vol(props, mask)
+    lbls = zutils.mask_dict_vol(lbls, mask)
 
     for name, prop in props.iteritems():
         lbl = lbls[name]
@@ -53,8 +53,8 @@ def square_loss(props, lbls, mask=None):
     err = 0
 
     #Applying mask if it exists
-    props = utils.mask_dict_vol(props, mask)
-    lbls = utils.mask_dict_vol(lbls, mask)
+    props = zutils.mask_dict_vol(props, mask)
+    lbls = zutils.mask_dict_vol(lbls, mask)
 
     for name, prop in props.iteritems():
         lbl = lbls[name]
@@ -74,8 +74,8 @@ def square_square_loss(props, lbls, mask=None, margin=0.2):
     error = 0
 
     #Applying mask if it exists
-    props = utils.mask_dict_vol(props, mask)
-    lbls = utils.mask_dict_vol(lbls, mask)
+    props = zutils.mask_dict_vol(props, mask)
+    lbls = zutils.mask_dict_vol(lbls, mask)
 
     for name, propagation in props.iteritems():
         lbl = lbls[name]
@@ -120,8 +120,8 @@ def binomial_cross_entropy(props, lbls, mask=None):
         entropy[name] = -lbl*np.log(prop) - (1-lbl)*np.log(1-prop)
 
     #Applying mask if it exists
-    grdts = utils.mask_dict_vol(grdts, mask)
-    entropy = utils.mask_dict_vol(entropy, mask)
+    grdts = zutils.mask_dict_vol(grdts, mask)
+    entropy = zutils.mask_dict_vol(entropy, mask)
 
     for name, vol in entropy.iteritems():
         err += np.sum( vol )
@@ -186,7 +186,7 @@ def multinomial_cross_entropy(props, lbls, mask=None):
         entropy[name] = -lbl * np.log(prop)
 
     #Applying mask if it exists
-    entropy = utils.mask_dict_vol(entropy, mask)
+    entropy = zutils.mask_dict_vol(entropy, mask)
 
     for name, vol in entropy.iteritems():
         cost += np.sum( vol )
@@ -399,7 +399,7 @@ def get_grdt(pars, history, props, lbl_outs, msks, wmsks, vn):
       #  history['re']  += pyznn.get_rand_error( props.values(), lbl_outs.values() )
        # print  're: {}'.format( history['re'] )
 
-    num_mask_voxels = utils.sum_over_dict(msks)
+    num_mask_voxels = zutils.sum_over_dict(msks)
     if num_mask_voxels > 0:
         history['err'] += cerr / num_mask_voxels
         history['cls'] += get_cls(props, lbl_outs) / num_mask_voxels
@@ -408,30 +408,30 @@ def get_grdt(pars, history, props, lbl_outs, msks, wmsks, vn):
         history['cls'] += get_cls(props, lbl_outs) / vn
 
     if pars['is_debug']:
-        c2 = utils.check_dict_nan(lbl_outs)
-        c3 = utils.check_dict_nan(msks)
-        c4 = utils.check_dict_nan(wmsks)
-        c5 = utils.check_dict_nan(props)
-        c6 = utils.check_dict_nan(grdts)
+        c2 = zutils.check_dict_nan(lbl_outs)
+        c3 = zutils.check_dict_nan(msks)
+        c4 = zutils.check_dict_nan(wmsks)
+        c5 = zutils.check_dict_nan(props)
+        c6 = zutils.check_dict_nan(grdts)
         if  not ( c2 and c3 and c4 and c5 and c6):
             # stop training
             raise NameError('nan encountered!')
 
     # gradient reweighting
-    grdts = utils.dict_mul( grdts, msks  )
+    grdts = zutils.dict_mul( grdts, msks  )
     if pars['rebalance_mode']:
-        grdts = utils.dict_mul( grdts, wmsks )
+        grdts = zutils.dict_mul( grdts, wmsks )
 
     if pars['is_malis'] :
         malis_weights, rand_errors, num_non_bdr = cost_fn.malis_weight(pars, props, lbl_outs)
-        grdts = utils.dict_mul(grdts, malis_weights)
-        dmc, dme = utils.get_malis_cost( props, lbl_outs, malis_weights )
+        grdts = zutils.dict_mul(grdts, malis_weights)
+        dmc, dme = zutils.get_malis_cost( props, lbl_outs, malis_weights )
         if num_mask_voxels > 0:
             history['mc'] += dmc.values()[0] / num_mask_voxels
             history['me'] += dme.values()[0] / num_mask_voxels
         else:
             history['mc'] += dmc.values()[0] / vn
             history['me'] += dme.values()[0] / vn
 
-    grdts = utils.make_continuous(grdts)
+    grdts = zutils.make_continuous(grdts)
     return props, grdts, history
diff --git a/python/front_end/zdataset.py b/python/front_end/zdataset.py
@@ -10,7 +10,7 @@
 import sys
 import numpy as np
 import emirt
-import utils
+import zutils
 
 class CDataset(object):
 
@@ -383,7 +383,7 @@ def __init__(self, dspec, pars, sec_name, \
         if pars['is_bd_mirror']:
             if self.pars['is_debug']:
                 print "data shape before mirror: ", self.data.shape
-            self.data = utils.boundary_mirror(self.data, self.mapsz)
+            self.data = zutils.boundary_mirror(self.data, self.mapsz)
             #Modifying the deviation boundaries for the modified dataset
             self.calculate_sizes( )
             if self.pars['is_debug']:
@@ -450,9 +450,6 @@ def __init__(self, dspec, pars, sec_name, outsz, setsz, mapsz ):
         self.sublbl = None
         self.submsk = None
 
-        # rename data as lbl
-        self.lbl = self.data
-
         # deal with mask
         self.msk = np.array([])
         if dspec[sec_name].has_key('fmasks'):
@@ -498,7 +495,10 @@ def get_dataset(self):
         return the whole label for examination
         """
         return self.data
-
+    def get_lbl(self):
+        return self.data
+    def get_msk(self):
+        return self.msk
     def get_candidate_loc( self, low, high ):
         """
         find the candidate location of subvolume

diff --git a/python/front_end/zlog.py b/python/front_end/zlog.py
@@ -10,8 +10,7 @@
 import ConfigParser
 import numpy as np
 import os
-import cost_fn
-import utils
+import zutils
 
 from emirt import volume_util
 
@@ -28,7 +27,7 @@ def record_config_file(params=None, config_filename=None, net_save_filename=None
 
     #Need to specify either a params object, or all of the other optional args
     #"ALL" optional args excludes train
-    utils.assert_arglist(params,
+    zutils.assert_arglist(params,
         [config_filename, net_save_filename]
         )
 
@@ -57,7 +56,7 @@ def record_config_file(params=None, config_filename=None, net_save_filename=None
 
     #Deriving destination filename information
     if timestamp is None:
-        timestamp = utils.timestamp()
+        timestamp = zutils.timestamp()
     mode = "train" if train else "forward"
 
     #Actually saving
@@ -76,7 +75,7 @@ def make_logfile_name(params=None, net_save_filename=None, timestamp = None, tra
     '''
 
     #Need to specify either a params object, or the net save prefix
-    utils.assert_arglist(params,
+    zutils.assert_arglist(params,
         [net_save_filename])
 
     if params is not None:
@@ -94,7 +93,7 @@ def make_logfile_name(params=None, net_save_filename=None, timestamp = None, tra
     assert(save_prefix_valid)
 
     if timestamp is None:
-        timestamp = utils.timestamp()
+        timestamp = zutils.timestamp()
     mode = "train" if train else "forward"
 
     directory_name = os.path.dirname( save_prefix )