Skip to content

Commit 16c9bb3

Browse files
TensorFlow Datasets Teamcopybara-github
authored andcommitted
Add beans dataset to tfds.
PiperOrigin-RevId: 292305284
1 parent 077788c commit 16c9bb3

File tree

8 files changed

+218
-0
lines changed

8 files changed

+218
-0
lines changed

tensorflow_datasets/image/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from tensorflow_datasets.image.abstract_reasoning import AbstractReasoning
1919
from tensorflow_datasets.image.aflw2k3d import Aflw2k3d
2020
from tensorflow_datasets.image.arc import ARC
21+
from tensorflow_datasets.image.beans import Beans
2122
from tensorflow_datasets.image.bigearthnet import Bigearthnet
2223
from tensorflow_datasets.image.binarized_mnist import BinarizedMNIST
2324
from tensorflow_datasets.image.binary_alpha_digits import BinaryAlphaDigits

tensorflow_datasets/image/beans.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
# coding=utf-8
2+
# Copyright 2020 The TensorFlow Datasets Authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""Beans leaf dataset with images of diseased and health leaves."""
17+
18+
import tensorflow_datasets.public_api as tfds
19+
20+
_CITATION = """\
21+
@ONLINE {beansdata,
22+
author="Makerere AI Lab",
23+
title="Bean disease dataset",
24+
month="January",
25+
year="2020",
26+
url="https://github.com/AI-Lab-Makerere/ibean/"
27+
}
28+
"""
29+
30+
_DESCRIPTION = """\
31+
Beans is a dataset of images of beans taken in the field using smartphone
32+
cameras. It consists of 3 classes: 2 disease classes and the healthy class.
33+
Diseases depicted include Angular Leaf Spot and Bean Rust. Data was annotated
34+
by experts from the National Crops Resources Research Institute (NaCRRI) in
35+
Uganda and collected by the Makerere AI research lab.
36+
"""
37+
38+
_TRAIN_URL = "https://storage.googleapis.com/ibeans/train.zip"
39+
_VALIDATION_URL = "https://storage.googleapis.com/ibeans/validation.zip"
40+
_TEST_URL = "https://storage.googleapis.com/ibeans/test.zip"
41+
42+
_IMAGE_SIZE = 500
43+
_IMAGE_SHAPE = (_IMAGE_SIZE, _IMAGE_SIZE, 3)
44+
45+
_LABELS = ["angular_leaf_spot", "bean_rust", "healthy"]
46+
47+
48+
class Beans(tfds.core.GeneratorBasedBuilder):
49+
"""Beans plant leaf images dataset."""
50+
51+
VERSION = tfds.core.Version("0.1.0")
52+
53+
def _info(self):
54+
return tfds.core.DatasetInfo(
55+
builder=self,
56+
description=_DESCRIPTION,
57+
features=tfds.features.FeaturesDict({
58+
"image": tfds.features.Image(shape=_IMAGE_SHAPE),
59+
"label": tfds. features.ClassLabel(names=_LABELS)
60+
}),
61+
supervised_keys=("image", "label"),
62+
homepage="https://github.com/AI-Lab-Makerere/ibean/",
63+
citation=_CITATION,
64+
)
65+
66+
def _split_generators(self, dl_manager):
67+
train_path, val_path, test_path = dl_manager.download(
68+
[_TRAIN_URL, _VALIDATION_URL, _TEST_URL])
69+
70+
return [
71+
tfds.core.SplitGenerator(
72+
name=tfds.Split.TRAIN,
73+
gen_kwargs={
74+
"archive": dl_manager.iter_archive(train_path)},
75+
),
76+
tfds.core.SplitGenerator(
77+
name=tfds.Split.VALIDATION,
78+
gen_kwargs={
79+
"archive": dl_manager.iter_archive(val_path)},
80+
),
81+
tfds.core.SplitGenerator(
82+
name=tfds.Split.TEST,
83+
gen_kwargs={
84+
"archive": dl_manager.iter_archive(test_path)},
85+
),
86+
]
87+
88+
def _generate_examples(self, archive):
89+
"""Yields examples."""
90+
for fname, fobj in archive:
91+
if not fname.endswith(".jpg"):
92+
continue
93+
label = fname.split("/")[-2]
94+
record = {
95+
"image": fobj,
96+
"label": label,
97+
}
98+
yield fname, record
99+
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# coding=utf-8
2+
# Copyright 2020 The TensorFlow Datasets Authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""Tests for Beans dataset."""
17+
18+
from tensorflow_datasets.image import beans
19+
import tensorflow_datasets.testing as tfds_test
20+
21+
beans._IMAGE_SHAPE = (None, None, 3) # pylint: disable=protected-access
22+
23+
24+
class BeansTest(tfds_test.DatasetBuilderTestCase):
25+
DATASET_CLASS = beans.Beans
26+
27+
SPLITS = {
28+
'train': 3,
29+
'test': 3,
30+
'validation': 3,
31+
}
32+
33+
DL_EXTRACT_RESULT = ['beans_train.zip', 'beans_validation.zip',
34+
'beans_test.zip']
35+
36+
37+
class BeansS3Test(BeansTest):
38+
VERSION = 'experimental_latest'
39+
40+
41+
if __name__ == '__main__':
42+
tfds_test.test_main()
43+

tensorflow_datasets/testing/beans.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# coding=utf-8
2+
# Copyright 2020 The TensorFlow Datasets Authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
r"""Generate fake data for beans dataset.
17+
18+
"""
19+
20+
# from __future__ import absolute_import
21+
# from __future__ import division
22+
# from __future__ import print_function
23+
24+
import os
25+
import zipfile
26+
27+
from absl import app
28+
from absl import flags
29+
30+
import tensorflow.compat.v2 as tf
31+
from tensorflow_datasets.core import utils
32+
from tensorflow_datasets.core.utils import py_utils
33+
from tensorflow_datasets.testing import fake_data_utils
34+
35+
flags.DEFINE_string('tfds_dir', py_utils.tfds_dir(),
36+
'Path to tensorflow_datasets directory')
37+
38+
FLAGS = flags.FLAGS
39+
40+
41+
def _output_dir():
42+
return os.path.join(FLAGS.tfds_dir, 'testing', 'test_data', 'fake_examples',
43+
'beans')
44+
45+
46+
def _get_jpeg(height, width):
47+
"""Returns jpeg picture."""
48+
image = fake_data_utils.get_random_picture(height, width)
49+
jpeg = tf.image.encode_jpeg(image)
50+
with utils.nogpu_session() as sess:
51+
res = sess.run(jpeg)
52+
return res
53+
54+
55+
def create_zip(fname):
56+
out_path = os.path.join(_output_dir(), fname)
57+
jpeg = _get_jpeg(height=5, width=5)
58+
with zipfile.ZipFile(out_path, 'w') as myzip:
59+
myzip.writestr('angular_leaf_spot/0.jpg', jpeg)
60+
myzip.writestr('bean_rust/0.jpg', jpeg)
61+
myzip.writestr('healthy/0.jpg', jpeg)
62+
63+
64+
def main(argv):
65+
del argv
66+
create_zip('beans_train.zip')
67+
create_zip('beans_validation.zip')
68+
create_zip('beans_test.zip')
69+
70+
71+
if __name__ == '__main__':
72+
app.run(main)
Binary file not shown.
Binary file not shown.
Binary file not shown.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
https://storage.googleapis.com/ibeans/test.zip 17708541 ca67b15d960d1e2fd9d23fd8498ce86818ead90755c630a43baf19ec4af09312
2+
https://storage.googleapis.com/ibeans/train.zip 143812152 284fe8456ce20687f4367ae7ad94a64577e7f9fde2c2c6b1c74340ab5dc82715
3+
https://storage.googleapis.com/ibeans/validation.zip 18504213 90b7aa1c26d91d9afff07a30bbc67a5ea34f1f1397f068d8675be09d7d0c602d

0 commit comments

Comments
 (0)