Skip to content

Commit c138fef

Browse files
jpuigcervercopybara-github
authored andcommitted
Add PASCAL VOC 2007 dataset to Tensorflow Datasets.
PiperOrigin-RevId: 240320453
1 parent 61e04ca commit c138fef

File tree

20 files changed

+519
-0
lines changed

20 files changed

+519
-0
lines changed

tensorflow_datasets/image/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,4 @@
4141
from tensorflow_datasets.image.quickdraw import QuickdrawBitmap
4242
from tensorflow_datasets.image.rock_paper_scissors import RockPaperScissors
4343
from tensorflow_datasets.image.svhn import SvhnCropped
44+
from tensorflow_datasets.image.voc import Voc2007

tensorflow_datasets/image/voc.py

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
# coding=utf-8
2+
# Copyright 2019 The TensorFlow Datasets Authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""PASCAL VOC datasets."""
17+
18+
from __future__ import absolute_import
19+
from __future__ import division
20+
from __future__ import print_function
21+
22+
import os
23+
import xml.etree.ElementTree
24+
25+
import tensorflow as tf
26+
import tensorflow_datasets.public_api as tfds
27+
28+
29+
_VOC2007_CITATION = """\
30+
@misc{pascal-voc-2007,
31+
author = "Everingham, M. and Van~Gool, L. and Williams, C. K. I. and Winn, J. and Zisserman, A.",
32+
title = "The {PASCAL} {V}isual {O}bject {C}lasses {C}hallenge 2007 {(VOC2007)} {R}esults",
33+
howpublished = "http://www.pascal-network.org/challenges/VOC/voc2007/workshop/index.html"}
34+
"""
35+
_VOC2007_DESCRIPTION = """\
36+
This dataset contains the data from the PASCAL Visual Object Classes Challenge
37+
2007, a.k.a. VOC2007, corresponding to the Classification and Detection
38+
competitions.
39+
A total of 9,963 images are included in this dataset, where each image contains
40+
a set of objects, out of 20 different classes, making a total of 24,640
41+
annotated objects.
42+
In the Classification competition, the goal is to predict the set of labels
43+
contained in the image, while in the Detection competition the goal is to
44+
predict the bounding box and label of each individual object.
45+
"""
46+
_VOC2007_URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/"
47+
# Original site, it is down very often.
48+
# _VOC2007_DATA_URL = "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/"
49+
# Data mirror:
50+
_VOC2007_DATA_URL = "http://pjreddie.com/media/files/"
51+
_VOC2007_LABELS = (
52+
"aeroplane",
53+
"bicycle",
54+
"bird",
55+
"boat",
56+
"bottle",
57+
"bus",
58+
"car",
59+
"cat",
60+
"chair",
61+
"cow",
62+
"diningtable",
63+
"dog",
64+
"horse",
65+
"motorbike",
66+
"person",
67+
"pottedplant",
68+
"sheep",
69+
"sofa",
70+
"train",
71+
"tvmonitor",
72+
)
73+
_VOC2007_POSES = (
74+
"frontal",
75+
"rear",
76+
"left",
77+
"right",
78+
"unspecified",
79+
)
80+
81+
82+
class Voc2007(tfds.core.GeneratorBasedBuilder):
83+
"""Pascal VOC 2007."""
84+
85+
VERSION = tfds.core.Version("1.0.0")
86+
87+
def _info(self):
88+
return tfds.core.DatasetInfo(
89+
builder=self,
90+
description=_VOC2007_DESCRIPTION,
91+
features=tfds.features.FeaturesDict({
92+
"image": tfds.features.Image(),
93+
"image/filename": tfds.features.Text(),
94+
"objects": tfds.features.SequenceDict({
95+
"label": tfds.features.ClassLabel(names=_VOC2007_LABELS),
96+
"bbox": tfds.features.BBoxFeature(),
97+
"pose": tfds.features.ClassLabel(names=_VOC2007_POSES),
98+
"is_truncated": tf.bool,
99+
"is_difficult": tf.bool,
100+
}),
101+
"labels": tfds.features.Sequence(
102+
tfds.features.ClassLabel(names=_VOC2007_LABELS)),
103+
"labels_no_difficult": tfds.features.Sequence(
104+
tfds.features.ClassLabel(names=_VOC2007_LABELS)),
105+
}),
106+
urls=[_VOC2007_URL],
107+
citation=_VOC2007_CITATION)
108+
109+
def _split_generators(self, dl_manager):
110+
trainval_path = dl_manager.download_and_extract(
111+
os.path.join(_VOC2007_DATA_URL, "VOCtrainval_06-Nov-2007.tar"))
112+
test_path = dl_manager.download_and_extract(
113+
os.path.join(_VOC2007_DATA_URL, "VOCtest_06-Nov-2007.tar"))
114+
return [
115+
tfds.core.SplitGenerator(
116+
name=tfds.Split.TEST,
117+
num_shards=1,
118+
gen_kwargs=dict(data_path=test_path, set_name="test")),
119+
tfds.core.SplitGenerator(
120+
name=tfds.Split.TRAIN,
121+
num_shards=1,
122+
gen_kwargs=dict(data_path=trainval_path, set_name="train")),
123+
tfds.core.SplitGenerator(
124+
name=tfds.Split.VALIDATION,
125+
num_shards=1,
126+
gen_kwargs=dict(data_path=trainval_path, set_name="val")),
127+
]
128+
129+
def _generate_examples(self, data_path, set_name):
130+
set_filepath = os.path.join(
131+
data_path, "VOCdevkit/VOC2007/ImageSets/Main/{}.txt".format(set_name))
132+
with tf.io.gfile.GFile(set_filepath, "r") as f:
133+
for line in f:
134+
image_id = line.strip()
135+
yield self._generate_example(data_path, image_id)
136+
137+
def _generate_example(self, data_path, image_id):
138+
image_filepath = os.path.join(
139+
data_path, "VOCdevkit/VOC2007/JPEGImages", "{}.jpg".format(image_id))
140+
annon_filepath = os.path.join(
141+
data_path, "VOCdevkit/VOC2007/Annotations", "{}.xml".format(image_id))
142+
143+
def _get_example_objects():
144+
"""Function to get all the objects from the annotation XML file."""
145+
with tf.io.gfile.GFile(annon_filepath, "r") as f:
146+
root = xml.etree.ElementTree.parse(f).getroot()
147+
148+
size = root.find("size")
149+
width = float(size.find("width").text)
150+
height = float(size.find("height").text)
151+
152+
for obj in root.findall("object"):
153+
# Get object's label name.
154+
label = obj.find("name").text.lower()
155+
# Get objects' pose name.
156+
pose = obj.find("pose").text.lower()
157+
is_truncated = (obj.find("truncated").text == "1")
158+
is_difficult = (obj.find("difficult").text == "1")
159+
bndbox = obj.find("bndbox")
160+
xmax = float(bndbox.find("xmax").text)
161+
xmin = float(bndbox.find("xmin").text)
162+
ymax = float(bndbox.find("ymax").text)
163+
ymin = float(bndbox.find("ymin").text)
164+
yield {
165+
"label": label,
166+
"pose": pose,
167+
"bbox": tfds.features.BBox(
168+
ymin / height, xmin / width, ymax / height, xmax / width),
169+
"is_truncated": is_truncated,
170+
"is_difficult": is_difficult,
171+
}
172+
173+
objects = list(_get_example_objects())
174+
labels = sorted(list(set([obj["label"] for obj in objects])))
175+
labels_no_difficult = sorted(list(set(
176+
[obj["label"] for obj in objects if obj["is_difficult"] == 0])))
177+
return {
178+
"image": image_filepath,
179+
"image/filename": image_id + ".jpg",
180+
"objects": objects,
181+
"labels": labels,
182+
"labels_no_difficult": labels_no_difficult,
183+
}

tensorflow_datasets/image/voc_test.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# coding=utf-8
2+
# Copyright 2019 The TensorFlow Datasets Authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""Tests for PASCAL VOC image data loading."""
17+
18+
from __future__ import absolute_import
19+
from __future__ import division
20+
from __future__ import print_function
21+
22+
from tensorflow_datasets import testing
23+
from tensorflow_datasets.image import voc
24+
25+
26+
class Voc2007Test(testing.DatasetBuilderTestCase):
27+
DATASET_CLASS = voc.Voc2007
28+
SPLITS = {
29+
'train': 1,
30+
'validation': 2,
31+
'test': 3,
32+
}
33+
34+
if __name__ == '__main__':
35+
testing.test_main()
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<annotation>
2+
<size>
3+
<width>15</width>
4+
<height>13</height>
5+
</size>
6+
<object>
7+
<name>person</name>
8+
<pose>left</pose>
9+
<truncated>0</truncated>
10+
<difficult>0</difficult>
11+
<bndbox>
12+
<xmin>10</xmin>
13+
<ymin>6</ymin>
14+
<xmax>13</xmax>
15+
<ymax>11</ymax>
16+
</bndbox>
17+
</object>
18+
</annotation>
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<annotation>
2+
<size>
3+
<width>10</width>
4+
<height>14</height>
5+
</size>
6+
<object>
7+
<name>train</name>
8+
<pose>left</pose>
9+
<truncated>0</truncated>
10+
<difficult>0</difficult>
11+
<bndbox>
12+
<xmin>0</xmin>
13+
<ymin>3</ymin>
14+
<xmax>5</xmax>
15+
<ymax>9</ymax>
16+
</bndbox>
17+
</object>
18+
</annotation>
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
<annotation>
2+
<size>
3+
<width>15</width>
4+
<height>11</height>
5+
</size>
6+
<object>
7+
<name>train</name>
8+
<pose>frontal</pose>
9+
<truncated>1</truncated>
10+
<difficult>0</difficult>
11+
<bndbox>
12+
<xmin>2</xmin>
13+
<ymin>3</ymin>
14+
<xmax>6</xmax>
15+
<ymax>9</ymax>
16+
</bndbox>
17+
</object>
18+
</annotation>
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
<annotation>
2+
<size>
3+
<width>13</width>
4+
<height>10</height>
5+
</size>
6+
<object>
7+
<name>cat</name>
8+
<pose>frontal</pose>
9+
<truncated>1</truncated>
10+
<difficult>0</difficult>
11+
<bndbox>
12+
<xmin>1</xmin>
13+
<ymin>1</ymin>
14+
<xmax>7</xmax>
15+
<ymax>5</ymax>
16+
</bndbox>
17+
</object>
18+
<object>
19+
<name>cow</name>
20+
<pose>right</pose>
21+
<truncated>0</truncated>
22+
<difficult>1</difficult>
23+
<bndbox>
24+
<xmin>2</xmin>
25+
<ymin>0</ymin>
26+
<xmax>8</xmax>
27+
<ymax>7</ymax>
28+
</bndbox>
29+
</object>
30+
</annotation>
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
<annotation>
2+
<size>
3+
<width>11</width>
4+
<height>13</height>
5+
</size>
6+
<object>
7+
<name>train</name>
8+
<pose>rear</pose>
9+
<truncated>0</truncated>
10+
<difficult>0</difficult>
11+
<bndbox>
12+
<xmin>0</xmin>
13+
<ymin>6</ymin>
14+
<xmax>6</xmax>
15+
<ymax>9</ymax>
16+
</bndbox>
17+
</object>
18+
<object>
19+
<name>sofa</name>
20+
<pose>rear</pose>
21+
<truncated>0</truncated>
22+
<difficult>0</difficult>
23+
<bndbox>
24+
<xmin>1</xmin>
25+
<ymin>6</ymin>
26+
<xmax>7</xmax>
27+
<ymax>10</ymax>
28+
</bndbox>
29+
</object>
30+
</annotation>
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
<annotation>
2+
<size>
3+
<width>11</width>
4+
<height>14</height>
5+
</size>
6+
<object>
7+
<name>tvmonitor</name>
8+
<pose>right</pose>
9+
<truncated>1</truncated>
10+
<difficult>0</difficult>
11+
<bndbox>
12+
<xmin>2</xmin>
13+
<ymin>5</ymin>
14+
<xmax>7</xmax>
15+
<ymax>10</ymax>
16+
</bndbox>
17+
</object>
18+
<object>
19+
<name>train</name>
20+
<pose>unspecified</pose>
21+
<truncated>0</truncated>
22+
<difficult>0</difficult>
23+
<bndbox>
24+
<xmin>6</xmin>
25+
<ymin>1</ymin>
26+
<xmax>10</xmax>
27+
<ymax>7</ymax>
28+
</bndbox>
29+
</object>
30+
</annotation>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
000003
2+
000004
3+
000005

0 commit comments

Comments
 (0)