@@ -101,12 +101,42 @@ def __init__(self,
101
101
102
102
@property
103
103
def model (self ) -> Namespace :
104
+ """Get configuration namespace of the experiment.
105
+
106
+ The `Experiment` stores the configuration recipe in `self.config`.
107
+ When the configuration is built (meaning the classes referenced from
108
+ the config file are instantiated), it is saved in the `model` property
109
+ of the experiment.
110
+
111
+ Returns:
112
+ The built namespace config object.
113
+
114
+ Raises:
115
+ `RuntimeError` when the configuration model has not been built.
116
+ """
104
117
if self ._model is None :
105
118
raise RuntimeError ("Experiment argument model not initialized" )
106
119
107
120
return self ._model
108
121
109
122
def _bless_graph_executors (self ) -> None :
123
+ """Pre-compute the tensors referenced by the graph executors.
124
+
125
+ Due to the lazy nature of the computational graph related components,
126
+ nothing is actually added to the graph until it is "blessed" (
127
+ referenced, and therefore, executed).
128
+
129
+ "Blessing" is usually implemented in the form of a log or a debug call
130
+ with the blessed tensor as parameter. Referencing a `Tensor` causes the
131
+ whole computational graph that is needed to evaluate the tensor to be
132
+ built.
133
+
134
+ This function "blesses" all tensors that could be potentially used
135
+ using the `fetches` property of the provided runner objects.
136
+
137
+ If the experiment runs in the training mode, this function also
138
+ blesses the tensors fetched by the trainer(s).
139
+ """
110
140
log ("Building TF Graph" )
111
141
if hasattr (self .model , "trainer" ):
112
142
if isinstance (self .model .trainer , List ):
@@ -115,13 +145,31 @@ def _bless_graph_executors(self) -> None:
115
145
trainers = [self .model .trainer ]
116
146
117
147
for trainer in trainers :
118
- debug ("Trainer fetches: {}" .format (trainer .fetches ))
148
+ debug ("Trainer fetches: {}" .format (trainer .fetches ), "bless" )
119
149
120
150
for runner in self .model .runners :
121
- debug ("Runner fetches: {}" .format (runner .fetches ))
151
+ debug ("Runner fetches: {}" .format (runner .fetches ), "bless" )
122
152
log ("TF Graph built" )
123
153
124
154
def build_model (self ) -> None :
155
+ """Build the configuration and the computational graph.
156
+
157
+ This function is invoked by all of the main entrypoints of the
158
+ `Experiment` class (`train`, `evaluate`, `run`). It manages the
159
+ building of the TensorFlow graph.
160
+
161
+ The bulding procedure is executed as follows:
162
+ 1. Random seeds are set.
163
+ 2. Configuration is built (instantiated) and normalized.
164
+ 3. TODO(tf-data) tf.data.Dataset instance is created and registered
165
+ in the model parts. (This is not implemented yet!)
166
+ 4. Graph executors are "blessed". This causes the rest of the TF Graph
167
+ to be built.
168
+ 5. Sessions are initialized using the TF Manager object.
169
+
170
+ Raises:
171
+ `RuntimeError` when the model is already built.
172
+ """
125
173
if self ._model_built :
126
174
raise RuntimeError ("build_model() called twice" )
127
175
@@ -163,6 +211,15 @@ def build_model(self) -> None:
163
211
self ._check_unused_initializers ()
164
212
165
213
def train (self ) -> None :
214
+ """Train model specified by this experiment.
215
+
216
+ This function is one of the main functions (entrypoints) called on
217
+ the experiment. It builds the model (if needed) and runs the training
218
+ procedure.
219
+
220
+ Raises:
221
+ `RuntimeError` when the experiment is not intended for training.
222
+ """
166
223
if not self .train_mode :
167
224
raise RuntimeError ("train() was called, but the experiment was "
168
225
"created with train_mode=False" )
@@ -208,6 +265,14 @@ def train(self) -> None:
208
265
self ._vars_loaded = True
209
266
210
267
def load_variables (self , variable_files : List [str ] = None ) -> None :
268
+ """Load variables from files.
269
+
270
+ Arguments:
271
+ variable_files: A list of checkpoint file prefixes. A TF checkpoint
272
+ is usually three files with a common prefix. This list should
273
+ have the same number of files as there are sessions in the
274
+ `tf_manager` object.
275
+ """
211
276
if not self ._model_built :
212
277
self .build_model ()
213
278
0 commit comments