Adding explicit channels and shape parameters to image and numpy readers

jindrahelcl · jindrahelcl · commit 2adfeb41b825 · 2019-01-06T13:16:59.000+01:00
diff --git a/neuralmonkey/readers/image_reader.py b/neuralmonkey/readers/image_reader.py
@@ -1,13 +1,19 @@
 from typing import Callable, Iterable, List
 import os
-from typeguard import check_argument_types
+
 import numpy as np
+from typeguard import check_argument_types
 from PIL import Image, ImageFile
+
+from neuralmonkey.logging import warn
+
+
 ImageFile.LOAD_TRUNCATED_IMAGES = True
 
 
 def image_reader(pad_w: int,
                  pad_h: int,
+                 channels: int = 3,
                  prefix: str = "",
                  rescale_w: bool = False,
                  rescale_h: bool = False,
@@ -17,7 +23,8 @@ def image_reader(pad_w: int,
 
     Args:
         pad_w: Width to which the images will be padded/cropped/resized.
-        pad_h: Height to with the images will be padded/corpped/resized.
+        pad_h: Height to with the images will be padded/cropped/resized.
+        channels: Number of channels in each image (default 3 for RGB)
         prefix: Prefix of the paths that are listed in a image files.
         rescale_w: If true, image is rescaled to have given width. It is
             cropped/padded otherwise.
@@ -57,6 +64,8 @@ def load(list_files: List[str]) -> Iterable[np.ndarray]:
                     try:
                         image = Image.open(path).convert(mode)
                     except IOError:
+                        warn("Skipping image from file '{}' no. '{}'.".format(
+                            path, i + 1))
                         image = Image.new(mode, (pad_w, pad_h))
 
                     image = _rescale_or_crop(image, pad_w, pad_h,
@@ -65,16 +74,22 @@ def load(list_files: List[str]) -> Iterable[np.ndarray]:
                     image_np = np.array(image)
 
                     if len(image_np.shape) == 2:
-                        channels = 1
+                        img_channels = 1
                         image_np = np.expand_dims(image_np, 2)
                     elif len(image_np.shape) == 3:
-                        channels = image_np.shape[2]
+                        img_channels = image_np.shape[2]
                     else:
                         raise ValueError(
                             ("Image should have either 2 (black and white) "
                              "or three dimensions (color channels), has {} "
                              "dimension.").format(len(image_np.shape)))
 
+                    if channels != img_channels:
+                        raise ValueError(
+                            "Image does not have the pre-declared number of "
+                            "channels {}, but {}.".format(
+                                channels, img_channels))
+
                     yield _pad(image_np, pad_w, pad_h, channels)
 
     return load
diff --git a/neuralmonkey/readers/numpy_reader.py b/neuralmonkey/readers/numpy_reader.py
@@ -15,12 +15,14 @@ def single_tensor(files: List[str]) -> np.ndarray:
 
 
 def from_file_list(prefix: str,
+                   shape: List[int],
                    suffix: str = "",
                    default_tensor_name: str = "arr_0") -> Callable:
     """Load a list of numpy arrays from a list of .npz numpy files.
 
     Args:
         prefix: A common prefix for the files in the list.
+        shape: The shape of the numpy arrays stored in the referenced files.
         suffix: An optional suffix that will be appended to each path
         default_tensor_name: Key of the tensors to load from the npz files.
 
@@ -35,10 +37,11 @@ def load(files: List[str]) -> Iterable[np.ndarray]:
                 for line in f_list:
                     path = os.path.join(prefix, line.rstrip()) + suffix
                     with np.load(path) as npz:
-                        yield npz[default_tensor_name]
-
+                        arr = npz[default_tensor_name]
+                        arr_shape = list(arr.shape)
+                        if arr_shape != shape:
+                            raise ValueError(
+                                "Shapes do not match: expected {}, found {}"
+                                .format(shape, arr_shape))
+                        yield arr
     return load
-
-
-# pylint: disable=invalid-name
-numpy_file_list_reader = from_file_list(prefix="")
diff --git a/tests/flat-multiattention.ini b/tests/flat-multiattention.ini
@@ -24,7 +24,7 @@ num_sessions=1
 [numpy_reader]
 class=readers.numpy_reader.from_file_list
 prefix="tests/data/flickr30k"
-# shape=[8, 8, 2048]
+shape=[8, 8, 2048]
 
 [train_data]
 class=dataset.load
diff --git a/tests/hier-multiattention.ini b/tests/hier-multiattention.ini
@@ -32,6 +32,7 @@ prefix="tests/data/flickr30k"
 pad_h=32
 pad_w=32
 mode="RGB"
+channels=3
 
 [train_data]
 class=dataset.load
diff --git a/tests/str.ini b/tests/str.ini
@@ -29,6 +29,7 @@ pad_w=256
 rescale_w=True
 rescale_h=True
 mode="F"
+channels=1
 
 [train_data]
 class=dataset.load