Skip to content

Commit 669f0b5

Browse files
Merge pull request #1326 from rickwierenga:master
PiperOrigin-RevId: 292867212
2 parents e1c9535 + cf8fe68 commit 669f0b5

File tree

31 files changed

+224
-2
lines changed

31 files changed

+224
-2
lines changed

docs/release_notes.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,8 @@
1818
* Add e-SNLI dataset from the paper
1919
[e-SNLI](http://papers.nips.cc/paper/8163-e-snli-natural-language-inference-with-natural-language-explanations.pdf).
2020
* Add SCAN dataset introduced [here](https://arxiv.org/pdf/1711.00350.pdf).
21+
* Add [Imagewang](https://github.com/fastai/imagenette) dataset.
2122
* Add DIV2K dataset from the paper
2223
[DIV2K](http://www.vision.ee.ethz.ch/~timofter/publications/Agustsson-CVPRW-2017.pdf)
2324
* Add CFQ (Compositional Freebase Questions) dataset from
2425
[this paper](https://openreview.net/pdf?id=SygcCnNKwr).
25-
26-

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
'image/imagenet2012_labels.txt',
9191
'image/imagenet2012_validation_labels.txt',
9292
'image/imagenette_labels.txt',
93+
'image/imagewang_labels.txt',
9394
'image/inaturalist_labels.txt',
9495
'image/inaturalist_supercategories.txt',
9596
'image/open_images_classes_all.txt',

tensorflow_datasets/image/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
from tensorflow_datasets.image.imagenet2012_corrupted import Imagenet2012Corrupted
6262
from tensorflow_datasets.image.imagenet_resized import ImagenetResized
6363
from tensorflow_datasets.image.imagenette import Imagenette
64+
from tensorflow_datasets.image.imagewang import Imagewang
6465
from tensorflow_datasets.image.inaturalist import INaturalist2017
6566
from tensorflow_datasets.image.lfw import LFW
6667
from tensorflow_datasets.image.lost_and_found import LostAndFound
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
# coding=utf-8
2+
# Copyright 2020 The TensorFlow Datasets Authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
# -*- coding: utf-8 -*-
17+
"""Imagewang contains Imagenette and Imagewoof combined."""
18+
19+
from __future__ import absolute_import
20+
from __future__ import division
21+
from __future__ import print_function
22+
23+
import os
24+
25+
import tensorflow.compat.v2 as tf
26+
import tensorflow_datasets.public_api as tfds
27+
28+
_CITATION = """
29+
@misc{imagewang,
30+
author = "Jeremy Howard",
31+
title = "Imagewang",
32+
url = "https://github.com/fastai/imagenette/"
33+
}
34+
"""
35+
36+
_DESCRIPTION = """\
37+
Imagewang contains Imagenette and Imagewoof combined
38+
Image网 (pronounced "Imagewang"; 网 means "net" in Chinese) contains Imagenette
39+
and Imagewoof combined, but with some twists that make it into a tricky
40+
semi-supervised unbalanced classification problem:
41+
42+
* The validation set is the same as Imagewoof (i.e. 30% of Imagewoof images);
43+
there are no Imagenette images in the validation set (they're all in the
44+
training set)
45+
* Only 10% of Imagewoof images are in the training set!
46+
* The remaining are in the unsup ("unsupervised") directory, and you can not
47+
use their labels in training!
48+
* It's even hard to type and hard to say!
49+
50+
The dataset comes in three variants:
51+
* Full size
52+
* 320 px
53+
* 160 px
54+
This dataset consists of the Imagenette dataset {size} variant.
55+
"""
56+
57+
_DESCRIPTION_SHORT = """\
58+
Imagewang contains Imagenette and Imagewoof combined.
59+
"""
60+
61+
_LABELS_FNAME = "image/imagewang_labels.txt"
62+
_URL_PREFIX = "https://s3.amazonaws.com/fast-ai-imageclas"
63+
_SIZES = ["full-size", "320px", "160px"]
64+
65+
_SIZE_TO_DIRNAME = {
66+
"full-size": "imagewang",
67+
"320px": "imagewang-320",
68+
"160px": "imagewang-160"
69+
}
70+
71+
72+
class ImagewangConfig(tfds.core.BuilderConfig):
73+
"""BuilderConfig for Imagewang."""
74+
75+
def __init__(self, size, **kwargs):
76+
super(ImagewangConfig, self).__init__(
77+
version=tfds.core.Version("2.0.0"), **kwargs)
78+
self.size = size
79+
80+
81+
def _make_builder_configs():
82+
configs = []
83+
for size in _SIZES:
84+
configs.append(
85+
ImagewangConfig(name=size, size=size, description=_DESCRIPTION_SHORT))
86+
return configs
87+
88+
89+
class Imagewang(tfds.core.GeneratorBasedBuilder):
90+
"""Imagewang contains Imagenette and Imagewoof combined."""
91+
92+
BUILDER_CONFIGS = _make_builder_configs()
93+
94+
def _info(self):
95+
names_file = tfds.core.get_tfds_path(_LABELS_FNAME)
96+
return tfds.core.DatasetInfo(
97+
builder=self,
98+
description=_DESCRIPTION,
99+
features=tfds.features.FeaturesDict({
100+
"image": tfds.features.Image(),
101+
"label": tfds.features.ClassLabel(names_file=names_file)
102+
}),
103+
supervised_keys=("image", "label"),
104+
homepage="https://github.com/fastai/imagenette",
105+
citation=_CITATION,
106+
)
107+
108+
def _split_generators(self, dl_manager):
109+
"""Returns SplitGenerators."""
110+
size = self.builder_config.size
111+
if size in _SIZES:
112+
size_str = "" if size == "full-size" else "-" + size[:-2]
113+
url = os.path.join(_URL_PREFIX, "imagewang%s.tgz" % size_str)
114+
path = dl_manager.download_and_extract(url)
115+
train_path = os.path.join(path, _SIZE_TO_DIRNAME[size], "train")
116+
val_path = os.path.join(path, _SIZE_TO_DIRNAME[size], "val")
117+
else:
118+
raise ValueError("size must be one of %s" % _SIZES)
119+
120+
return [
121+
tfds.core.SplitGenerator(
122+
name=tfds.Split.TRAIN,
123+
gen_kwargs={
124+
"datapath": train_path,
125+
},
126+
),
127+
tfds.core.SplitGenerator(
128+
name=tfds.Split.VALIDATION,
129+
gen_kwargs={
130+
"datapath": val_path,
131+
},
132+
),
133+
]
134+
135+
def _generate_examples(self, datapath):
136+
"""Yields examples."""
137+
for label in tf.io.gfile.listdir(datapath):
138+
for fpath in tf.io.gfile.glob(os.path.join(datapath, label, "*.JPEG")):
139+
fname = os.path.basename(fpath)
140+
record = {
141+
"image": fpath,
142+
"label": label,
143+
}
144+
yield fname, record
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
n03394916
2+
n03417042
3+
n02089973
4+
n02086240
5+
n02105641
6+
n02087394
7+
n02099601
8+
n02115641
9+
n03445777
10+
n02102040
11+
n03425413
12+
n03888257
13+
n03028079
14+
n03000684
15+
n01440764
16+
n02093754
17+
n02111889
18+
n02096294
19+
n02088364
20+
n02979186
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# coding=utf-8
2+
# Copyright 2020 The TensorFlow Datasets Authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""Tests for Imagewang."""
17+
18+
from __future__ import absolute_import
19+
from __future__ import division
20+
from __future__ import print_function
21+
22+
from tensorflow_datasets import testing
23+
from tensorflow_datasets.image import imagewang
24+
25+
26+
class ImagewangFullSizeTest(testing.DatasetBuilderTestCase):
27+
DATASET_CLASS = imagewang.Imagewang
28+
BUILDER_CONFIG_NAMES_TO_TEST = ["full-size"]
29+
SPLITS = {
30+
"train": 4,
31+
"validation": 4,
32+
}
33+
34+
35+
class Imagewang320Test(testing.DatasetBuilderTestCase):
36+
DATASET_CLASS = imagewang.Imagewang
37+
BUILDER_CONFIG_NAMES_TO_TEST = ["320px"]
38+
SPLITS = {
39+
"train": 4,
40+
"validation": 4,
41+
}
42+
43+
44+
class Imagewang160Test(testing.DatasetBuilderTestCase):
45+
DATASET_CLASS = imagewang.Imagewang
46+
BUILDER_CONFIG_NAMES_TO_TEST = ["160px"]
47+
SPLITS = {
48+
"train": 4,
49+
"validation": 4,
50+
}
51+
52+
53+
if __name__ == "__main__":
54+
testing.test_main()

0 commit comments

Comments
 (0)