ufal
diff --git a/‎neuralmonkey/attention/combination.py
Lines changed: 45 additions & 27 deletions b/‎neuralmonkey/attention/combination.py
Lines changed: 45 additions & 27 deletions
diff --git a/‎neuralmonkey/attention/coverage.py
Lines changed: 13 additions & 6 deletions b/‎neuralmonkey/attention/coverage.py
Lines changed: 13 additions & 6 deletions
diff --git a/‎neuralmonkey/attention/scaled_dot_product.py
Lines changed: 25 additions & 10 deletions b/‎neuralmonkey/attention/scaled_dot_product.py
Lines changed: 25 additions & 10 deletions
diff --git a/‎neuralmonkey/checking.py
Lines changed: 1 addition & 1 deletion b/‎neuralmonkey/checking.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎neuralmonkey/dataset.py
Lines changed: 5 additions & 5 deletions b/‎neuralmonkey/dataset.py
Lines changed: 5 additions & 5 deletions
diff --git a/‎neuralmonkey/decoders/autoregressive.py
Lines changed: 16 additions & 9 deletions b/‎neuralmonkey/decoders/autoregressive.py
Lines changed: 16 additions & 9 deletions
diff --git a/‎neuralmonkey/decoders/beam_search_decoder.py
Lines changed: 19 additions & 5 deletions b/‎neuralmonkey/decoders/beam_search_decoder.py
Lines changed: 19 additions & 5 deletions
@@ -24,6 +24,7 @@
 from neuralmonkey.attention.namedtuples import HierarchicalLoopState
 from neuralmonkey.checking import assert_shape
 from neuralmonkey.decorators import tensor
+from neuralmonkey.logging import debug
 from neuralmonkey.model.model_part import ModelPart
 from neuralmonkey.model.parameterized import InitializerSpecs
 from neuralmonkey.tf_utils import get_variable
@@ -138,53 +139,66 @@ def __init__(self,
             load_checkpoint=load_checkpoint,
             initializers=initializers)
         self._encoders = encoders
+    # pylint: enable=too-many-arguments
 
-        # pylint: disable=protected-access
-        self._encoders_tensors = [
-            get_attention_states(e) for e in self._encoders]
-        self._encoders_masks = [get_attention_mask(e) for e in self._encoders]
-        # pylint: enable=protected-access
+    @tensor
+    def _encoders_tensors(self) -> List[tf.Tensor]:
+        tensors = [get_attention_states(e) for e in self._encoders]
+        for e_t in tensors:
+            assert_shape(e_t, [-1, -1, -1])
+        return tensors
 
-        for e_m in self._encoders_masks:
+    @tensor
+    def _encoders_masks(self) -> List[tf.Tensor]:
+        masks = [get_attention_mask(e) for e in self._encoders]
+        for e_m in masks:
             assert_shape(e_m, [-1, -1])
 
-        for e_t in self._encoders_tensors:
-            assert_shape(e_t, [-1, -1, -1])
+        if self._use_sentinels:
+            masks.append(tf.ones([tf.shape(masks[0])[0], 1]))
+        return masks
 
-        with self.use_scope():
-            self.encoder_projections_for_logits = \
-                self.get_encoder_projections("logits_projections")
+    @tensor
+    def encoder_projections_for_logits(self) -> List[tf.Tensor]:
+        return self.get_encoder_projections("logits_projections")
 
-            self.encoder_attn_biases = [
-                get_variable(name="attn_bias_{}".format(i),
-                             shape=[],
+    @tensor
+    def encoder_attn_biases(self) -> List[tf.Variable]:
+        return [get_variable(name="attn_bias_{}".format(i), shape=[],
                              initializer=tf.zeros_initializer())
                 for i in range(len(self._encoders_tensors))]
 
-            if self._share_projections:
-                self.encoder_projections_for_ctx = \
-                    self.encoder_projections_for_logits
-            else:
-                self.encoder_projections_for_ctx = \
-                    self.get_encoder_projections("context_projections")
-
-            if self._use_sentinels:
-                self._encoders_masks.append(
-                    tf.ones([tf.shape(self._encoders_masks[0])[0], 1]))
+    @tensor
+    def encoder_projections_for_ctx(self) -> List[tf.Tensor]:
+        if self._share_projections:
+            return self.encoder_projections_for_logits
+        return self.get_encoder_projections("context_projections")
 
-            self.masks_concat = tf.concat(self._encoders_masks, 1)
-    # pylint: enable=too-many-arguments
+    @tensor
+    def masks_concat(self) -> tf.Tensor:
+        return tf.concat(self._encoders_masks, 1)
 
     def initial_loop_state(self) -> AttentionLoopState:
 
+        # Similarly to the feed_forward attention, we need to build the encoder
+        # projections and masks before the while loop is entered so they are
+        # not created as a part of the loop
+
+        # pylint: disable=not-an-iterable
+        for val in self.encoder_projections_for_logits:
+            debug(val, "bless")
+        debug(self.masks_concat, "bless")
+
         length = sum(tf.shape(s)[1] for s in self._encoders_tensors)
+        # pylint: enable=not-an-iterable
+
         if self._use_sentinels:
             length += 1
 
         return empty_attention_loop_state(self.batch_size, length,
                                           self.context_vector_size)
 
-    def get_encoder_projections(self, scope):
+    def get_encoder_projections(self, scope: str) -> List[tf.Tensor]:
         encoder_projections = []
         with tf.variable_scope(scope):
             for i, encoder_tensor in enumerate(self._encoders_tensors):
@@ -216,9 +230,11 @@ def get_encoder_projections(self, scope):
                 encoder_projections.append(projection)
             return encoder_projections
 
+    # pylint: disable=unsubscriptable-object
     @property
     def context_vector_size(self) -> int:
         return self.encoder_projections_for_ctx[0].get_shape()[2].value
+    # pylint: enable=unsubscriptable-object
 
     # pylint: disable=too-many-locals
     def attention(self,
@@ -280,6 +296,7 @@ def attention(self,
             return contexts, next_loop_state
     # pylint: enable=too-many-locals
 
+    # pylint: disable=not-an-iterable,unsubscriptable-object
     def _tile_encoders_for_beamsearch(self, projected_sentinel):
         sentinel_batch_size = tf.shape(projected_sentinel)[0]
         encoders_batch_size = tf.shape(
@@ -293,6 +310,7 @@ def _tile_encoders_for_beamsearch(self, projected_sentinel):
 
         return [tf.tile(proj, [beam_size, 1, 1])
                 for proj in self.encoder_projections_for_ctx]
+    # pylint: enable=not-an-iterable,unsubscriptable-object
 
     def _renorm_softmax(self, logits):
         """Renormalized softmax wrt. attention mask."""
 
@@ -10,8 +10,10 @@
 
 from neuralmonkey.attention.base_attention import Attendable
 from neuralmonkey.attention.feed_forward import Attention
+from neuralmonkey.decorators import tensor
 from neuralmonkey.model.model_part import ModelPart
 from neuralmonkey.model.parameterized import InitializerSpecs
+from neuralmonkey.tf_utils import get_variable
 
 
 class CoverageAttention(Attention):
@@ -30,17 +32,22 @@ def __init__(self,
         Attention.__init__(self, name, encoder, dropout_keep_prob, state_size,
                            reuse, save_checkpoint, load_checkpoint,
                            initializers)
-
         self.max_fertility = max_fertility
+    # pylint: enable=too-many-arguments
 
-        self.coverage_weights = tf.get_variable(
-            "coverage_matrix", [1, 1, 1, self.state_size])
-        self.fertility_weights = tf.get_variable(
+    @tensor
+    def coverage_weights(self) -> tf.Variable:
+        return get_variable("coverage_matrix", [1, 1, 1, self.state_size])
+
+    @tensor
+    def fertility_weights(self) -> tf.Variable:
+        return get_variable(
             "fertility_matrix", [1, 1, self.context_vector_size])
 
-        self.fertility = 1e-8 + self.max_fertility * tf.sigmoid(
+    @tensor
+    def fertility(self) -> tf.Tensor:
+        return 1e-8 + self.max_fertility * tf.sigmoid(
             tf.reduce_sum(self.fertility_weights * self.attention_states, [2]))
-    # pylint: enable=too-many-arguments
 
     def get_energies(self, y: tf.Tensor, weights_in_time: tf.Tensor):
         weight_sum = tf.cond(
 
@@ -12,12 +12,13 @@
 import tensorflow as tf
 from typeguard import check_argument_types
 
-from neuralmonkey.nn.utils import dropout
-from neuralmonkey.model.model_part import ModelPart
-from neuralmonkey.model.parameterized import InitializerSpecs
 from neuralmonkey.attention.base_attention import (
     BaseAttention, Attendable, get_attention_states, get_attention_mask)
 from neuralmonkey.attention.namedtuples import MultiHeadLoopState
+from neuralmonkey.decorators import tensor
+from neuralmonkey.model.model_part import ModelPart
+from neuralmonkey.model.parameterized import InitializerSpecs
+from neuralmonkey.nn.utils import dropout
 
 
 def split_for_heads(x: tf.Tensor, n_heads: int, head_dim: int) -> tf.Tensor:
@@ -263,23 +264,35 @@ def __init__(self,
         self.n_heads = n_heads
         self.dropout_keep_prob = dropout_keep_prob
 
+        self.keys_encoder = keys_encoder
+
+        if values_encoder is not None:
+            self.values_encoder = values_encoder
+        else:
+            self.values_encoder = self.keys_encoder
+
         if self.n_heads <= 0:
             raise ValueError("Number of heads must be greater than zero.")
 
         if self.dropout_keep_prob <= 0.0 or self.dropout_keep_prob > 1.0:
             raise ValueError("Dropout keep prob must be inside (0,1].")
 
-        if values_encoder is None:
-            values_encoder = keys_encoder
-
-        self.attention_keys = get_attention_states(keys_encoder)
-        self.attention_mask = get_attention_mask(keys_encoder)
-        self.attention_values = get_attention_states(values_encoder)
-
         self._variable_scope.set_initializer(tf.variance_scaling_initializer(
             mode="fan_avg", distribution="uniform"))
     # pylint: enable=too-many-arguments
 
+    @tensor
+    def attention_keys(self) -> tf.Tensor:
+        return get_attention_states(self.keys_encoder)
+
+    @tensor
+    def attention_mask(self) -> tf.Tensor:
+        return get_attention_mask(self.keys_encoder)
+
+    @tensor
+    def attention_values(self) -> tf.Tensor:
+        return get_attention_states(self.values_encoder)
+
     def attention(self,
                   query: tf.Tensor,
                   decoder_prev_state: tf.Tensor,
@@ -346,9 +359,11 @@ def finalize_loop(self, key: str,
             head_weights = last_loop_state.head_weights[i]
             self.histories["{}_head{}".format(key, i)] = head_weights
 
+    # pylint: disable=no-member
     @property
     def context_vector_size(self) -> int:
         return self.attention_values.get_shape()[-1].value
+    # pylint: enable=no-member
 
     def visualize_attention(self, key: str, max_outputs: int = 16) -> None:
         for i in range(self.n_heads):
 
@@ -47,7 +47,7 @@ def check_dataset_and_coders(dataset: Dataset,
     missing = []
 
     for (serie, coder) in data_list:
-        if not dataset.has_series(serie):
+        if serie not in dataset:
             log("dataset {} does not have serie {}".format(
                 dataset.name, serie))
             missing.append((coder, serie))
 
@@ -467,11 +467,7 @@ def __len__(self) -> int:
         assert self.length is not None
         return self.length
 
-    @property
-    def series(self) -> List[str]:
-        return list(sorted(self.iterators.keys()))
-
-    def has_series(self, name: str) -> bool:
+    def __contains__(self, name: str) -> bool:
         """Check if the dataset contains a series of a given name.
 
         Arguments:
@@ -482,6 +478,10 @@ def has_series(self, name: str) -> bool:
         """
         return name in self.iterators
 
+    @property
+    def series(self) -> List[str]:
+        return list(sorted(self.iterators.keys()))
+
     def get_series(self, name: str) -> Iterator:
         """Get the data series with a given name.
 
 
@@ -5,7 +5,7 @@
 The autoregressive decoder uses the while loop to get the outputs.
 Descendants should only specify the initial state and the while loop body.
 """
-from typing import NamedTuple, Callable, Tuple, Optional, Any, List
+from typing import NamedTuple, Callable, Tuple, Optional, Any, List, Dict
 
 import numpy as np
 import tensorflow as tf
@@ -19,7 +19,9 @@
 from neuralmonkey.model.sequence import EmbeddedSequence
 from neuralmonkey.nn.utils import dropout
 from neuralmonkey.tf_utils import get_variable, get_state_shape_invariants
-from neuralmonkey.vocabulary import Vocabulary, START_TOKEN, UNK_TOKEN_INDEX
+from neuralmonkey.vocabulary import (
+    Vocabulary, START_TOKEN, UNK_TOKEN_INDEX, START_TOKEN_INDEX,
+    PAD_TOKEN_INDEX)
 
 
 class LoopState(NamedTuple(
@@ -177,19 +179,25 @@ def embedding_size(self) -> int:
 
         return self.embeddings_source.embedding_matrix.get_shape()[1].value
 
-    # pylint: disable=no-self-use
     @tensor
     def go_symbols(self) -> tf.Tensor:
-        return tf.placeholder(tf.int32, [None], "go_symbols")
+        return tf.fill([self.batch_size], START_TOKEN_INDEX)
+
+    @property
+    def input_types(self) -> Dict[str, tf.DType]:
+        return {self.data_id: tf.int32}
+
+    @property
+    def input_shapes(self) -> Dict[str, tf.TensorShape]:
+        return {self.data_id: tf.TensorShape([None, None])}
 
     @tensor
     def train_inputs(self) -> tf.Tensor:
-        return tf.placeholder(tf.int32, [None, None], "train_inputs")
+        return self.dataset[self.data_id]
 
     @tensor
     def train_mask(self) -> tf.Tensor:
-        return tf.placeholder(tf.float32, [None, None], "train_mask")
-    # pylint: enable=no-self-use
+        return tf.to_float(tf.not_equal(self.train_inputs, PAD_TOKEN_INDEX))
 
     @tensor
     def decoding_w(self) -> tf.Variable:
@@ -479,12 +487,11 @@ def feed_dict(self, dataset: Dataset, train: bool = False) -> FeedDict:
         if sentences is not None:
             sentences_list = list(sentences)
             # train_mode=False, since we don't want to <unk>ize target words!
-            inputs, weights = self.vocabulary.sentences_to_tensor(
+            inputs, _ = self.vocabulary.sentences_to_tensor(
                 sentences_list, self.max_output_len, train_mode=False,
                 add_start_symbol=False, add_end_symbol=True,
                 pad_to_max_len=False)
 
             fd[self.train_inputs] = inputs
-            fd[self.train_mask] = weights
 
         return fd
@@ -21,7 +21,10 @@
 """
 # pylint: disable=too-many-lines
 # Maybe move the definitions of the named tuple structures to a separate file?
-from typing import NamedTuple, List, Callable, Any
+from typing import Any, Callable, List, NamedTuple
+# pylint: disable=unused-import
+from typing import Optional
+# pylint: enable=unused-import
 
 import tensorflow as tf
 from typeguard import check_argument_types
@@ -157,13 +160,16 @@ def __init__(self,
         # Create a placeholder for maximum number of steps that is necessary
         # during ensembling, when the decoder is called repetitively with the
         # max_steps attribute set to one.
-        self.max_steps = tf.placeholder_with_default(max_steps, [])
+        self.max_steps = tf.placeholder_with_default(self.max_steps_int, [])
 
+        self._initial_loop_state = None  # type: Optional[BeamSearchLoopState]
+
+    @tensor
+    def outputs(self) -> tf.Tensor:
         # This is an ugly hack for handling the whole graph when expanding to
         # the beam. We need to access all the inner states of the network in
         # the graph, replace them with beam-size-times copied originals, create
         # the beam search graph, and then replace the inner states back.
-        self._building = False
 
         enc_states = self.parent_decoder.encoder_states
         enc_masks = self.parent_decoder.encoder_masks
@@ -175,13 +181,21 @@ def __init__(self,
 
         # Create the beam search symbolic graph.
         with self.use_scope():
-            self.initial_loop_state = self.get_initial_loop_state()
-            self.outputs = self.decoding_loop()
+            self._initial_loop_state = self.get_initial_loop_state()
+            outputs = self.decoding_loop()
 
         # Reassign the original encoder states and mask back
         setattr(self.parent_decoder, "encoder_states", enc_states)
         setattr(self.parent_decoder, "encoder_masks", enc_masks)
 
+        return outputs
+
+    @property
+    def initial_loop_state(self) -> BeamSearchLoopState:
+        if self._initial_loop_state is None:
+            raise RuntimeError("Initial loop state was not initialized")
+        return self._initial_loop_state
+
     @property
     def vocabulary(self) -> Vocabulary:
         return self.parent_decoder.vocabulary