Skip to content

Commit 37eb94b

Browse files
Merge of e41e984
PiperOrigin-RevId: 257244747
2 parents d992821 + e41e984 commit 37eb94b

File tree

13 files changed

+214
-1
lines changed

13 files changed

+214
-1
lines changed

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@
112112
],
113113
'groove': ['pretty_midi', 'pydub'],
114114
'librispeech': ['pydub'], # and ffmpeg installed
115+
'pet_finder': ['pandas'],
115116
'svhn': ['scipy'],
116117
'wikipedia': ['mwparserfromhell', 'apache_beam'],
117118
}

tensorflow_datasets/core/lazy_imports.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,12 @@ def mwparserfromhell(cls):
7171

7272
@utils.classproperty
7373
@classmethod
74-
def PIL_Image(cls): # pylint: disable=invalid-name
74+
def pandas(cls):
75+
return _try_import("pandas")
76+
77+
@utils.classproperty
78+
@classmethod
79+
def PIL_Image(cls): # pylint: disable=invalid-name
7580
# TiffImagePlugin need to be activated explicitly on some systems
7681
# https://github.com/python-pillow/Pillow/blob/5.4.x/src/PIL/Image.py#L407
7782
_try_import("PIL.TiffImagePlugin")

tensorflow_datasets/core/lazy_imports_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class LazyImportsTest(testing.TestCase, parameterized.TestCase):
3232
"matplotlib",
3333
"mwparserfromhell",
3434
"os",
35+
"pandas",
3536
"pretty_midi",
3637
"pydub",
3738
"scipy",

tensorflow_datasets/image/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
from tensorflow_datasets.image.oxford_flowers102 import OxfordFlowers102
5454
from tensorflow_datasets.image.oxford_iiit_pet import OxfordIIITPet
5555
from tensorflow_datasets.image.patch_camelyon import PatchCamelyon
56+
from tensorflow_datasets.image.pet_finder import PetFinder
5657
from tensorflow_datasets.image.quickdraw import QuickdrawBitmap
5758
from tensorflow_datasets.image.resisc45 import Resisc45
5859
from tensorflow_datasets.image.rock_paper_scissors import RockPaperScissors
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
# coding=utf-8
2+
# Copyright 2019 The TensorFlow Datasets Authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""PetFinder Dataset."""
17+
18+
from __future__ import absolute_import
19+
from __future__ import division
20+
from __future__ import print_function
21+
22+
import os
23+
import tensorflow as tf
24+
import tensorflow_datasets.public_api as tfds
25+
26+
# petfinder: BibTeX citation
27+
_CITATION = """
28+
@ONLINE {kaggle-petfinder-adoption-prediction,
29+
author = "Kaggle and PetFinder.my",
30+
title = "PetFinder.my Adoption Prediction",
31+
month = "april",
32+
year = "2019",
33+
url = "https://www.kaggle.com/c/petfinder-adoption-prediction/data/"
34+
}
35+
"""
36+
37+
_URL = ("https://storage.googleapis.com/petfinder_dataset/")
38+
_DATA_OPTIONS = [
39+
"test_metadata", "test_images", "test_sentiment", "train_metadata",
40+
"train_images", "train_sentiment"
41+
]
42+
_LABEL_OPTIONS = [
43+
"test", "train", "breed_labels", "state_labels", "color_labels"
44+
]
45+
46+
_DL_URLS = {name: _URL + name + ".zip" for name in _DATA_OPTIONS}
47+
_DL_URLS.update({label: _URL + label + ".csv" for label in _LABEL_OPTIONS})
48+
49+
_INT_FEATS = [
50+
"Type", "Age", "Breed1", "Breed2", "Gender", "Color1", "Color2", "Color3",
51+
"MaturitySize", "FurLength", "Vaccinated", "Dewormed", "Sterilized",
52+
"Health", "Quantity", "Fee", "State", "VideoAmt"
53+
]
54+
_FLOAT_FEATS = ["PhotoAmt"]
55+
_OBJ_FEATS = ["name", "Type", "PetID", "RescurID"]
56+
_DESCRIPTION = ((
57+
"A large set of images of cats and dogs."
58+
"Together with the metadata information of sentiment information."))
59+
60+
61+
class PetFinder(tfds.core.GeneratorBasedBuilder):
62+
"""Pet Finder."""
63+
VERSION = tfds.core.Version(
64+
"1.0.0", experiments={tfds.core.Experiment.S3: True})
65+
66+
def _info(self):
67+
return tfds.core.DatasetInfo(
68+
builder=self,
69+
description="Dataset with images from 5 classes (see config name for "
70+
"information on the specific class)",
71+
features=tfds.features.FeaturesDict({
72+
"image": tfds.features.Image(),
73+
"image/filename": tfds.features.Text(),
74+
"PetID": tfds.features.Text(),
75+
"attributes": {name: tf.int64 for name in _INT_FEATS},
76+
"label": tfds.features.ClassLabel(num_classes=5),
77+
}),
78+
supervised_keys=("attributes", "label"),
79+
urls=[_URL],
80+
citation=_CITATION,
81+
)
82+
83+
def _split_generators(self, dl_manager):
84+
"""Returns SplitGenerators."""
85+
# petfinder: Downloads the data and defines the splits
86+
# dl_manager is a tfds.download.DownloadManager that can be used to
87+
# download and extract URLs
88+
# dl_paths = dl_manager.download_kaggle_data(url)
89+
dl_paths = dl_manager.download_and_extract(_DL_URLS)
90+
91+
return [
92+
tfds.core.SplitGenerator(
93+
name=tfds.Split.TRAIN,
94+
num_shards=10,
95+
gen_kwargs={
96+
"csv_name": "train.csv",
97+
"csv_paths": dl_paths["train"],
98+
"img_paths": dl_paths["train_images"],
99+
},
100+
),
101+
tfds.core.SplitGenerator(
102+
name=tfds.Split.TEST,
103+
num_shards=10,
104+
gen_kwargs={
105+
"csv_name": "test.csv",
106+
"csv_paths": dl_paths["test"],
107+
"img_paths": dl_paths["test_images"],
108+
},
109+
),
110+
]
111+
112+
def _generate_examples(self, csv_name, csv_paths, img_paths):
113+
"""Yields examples.
114+
115+
Args:
116+
csv_name: file name for the csv file used in the split
117+
csv_paths: Path to csv files containing the label and attributes
118+
information.
119+
img_paths: Path to images.
120+
"""
121+
pd = tfds.core.lazy_imports.pandas
122+
123+
if not tf.io.gfile.exists(csv_paths):
124+
raise AssertionError("{} not exist".format(csv_name))
125+
with tf.io.gfile.GFile(csv_paths) as csv_file:
126+
dataframe = pd.read_csv(csv_file)
127+
# add a dummy label for test set
128+
if csv_name == "test.csv":
129+
dataframe["AdoptionSpeed"] = -1
130+
131+
images = tf.io.gfile.listdir(img_paths)
132+
for image in images:
133+
pet_id = image.split("-")[0]
134+
image_path = os.path.join(img_paths, image)
135+
attr_dict = dataframe.loc[dataframe["PetID"] == pet_id]
136+
record = {
137+
"image": image_path,
138+
"image/filename": image,
139+
"PetID": pet_id,
140+
"attributes": attr_dict[_INT_FEATS].to_dict("records")[0],
141+
"label": attr_dict["AdoptionSpeed"].values[0]
142+
}
143+
if self.version.implements(tfds.core.Experiment.S3):
144+
yield image, record
145+
else:
146+
yield record
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# coding=utf-8
2+
# Copyright 2019 The TensorFlow Datasets Authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""Test for PetFinder."""
17+
18+
from __future__ import absolute_import
19+
from __future__ import division
20+
from __future__ import print_function
21+
22+
from tensorflow_datasets import testing
23+
from tensorflow_datasets.image import pet_finder
24+
25+
26+
class PetFinderTest(testing.DatasetBuilderTestCase):
27+
# petfinder:
28+
DATASET_CLASS = pet_finder.PetFinder
29+
SPLITS = {
30+
'train': 2, # Number of fake train example
31+
'test': 2, # Number of fake test example
32+
}
33+
DL_EXTRACT_RESULT = {
34+
'train': 'train.csv',
35+
'train_images': 'train_images',
36+
'test': 'test.csv',
37+
'test_images': 'test_images',
38+
}
39+
40+
41+
if __name__ == '__main__':
42+
testing.test_main()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Type,Name,Age,Breed1,Breed2,Gender,Color1,Color2,Color3,MaturitySize,FurLength,Vaccinated,Dewormed,Sterilized,Health,Quantity,Fee,State,RescuerID,VideoAmt,Description,PetID,PhotoAmt
2+
2,Dopey & Grey,8,266,266,1,2,6,7,1,1,1,1,2,1,2,0,41326,2ece3b2573dcdcebd774e635dca15fd9,0,"Dopey Age: 8mths old Male One half of a pair, Dopey is the reserved one compared to his brother Grey. However, he loves to be petted and is active by nature. Loves to chase balls and plays with anything that is mobile. Favourite hobby: Watching TV near the TV screen. Grey Age: 8mths old Male The wonder twin - Grey and Dopey are very brotherly and protects each other. Grey is more dominant than Dopey as he is the elder one and he is very playful. Favourite hobby: Loves to sit by the door and look outside",000000000,2.0
3+
2,Chi Chi,36,285,264,2,1,4,7,2,3,1,1,1,2,1,0,41326,2ece3b2573dcdcebd774e635dca15fd9,0,"Please note that Chichi has been neutered, therefore cannot breed. ChiChi is a Persian with a difference : She is a silent cat. She loves to be petted but needs regular grooming and cleaning. She has a defective tearduct on the right eye that requires daily cleaning. She has been neutered and goes through vaccinated routine regularly. Favourite hobby: Loves to roam and enjoys outside scenery. Please email if interested, comments are harder to keep track of.",aaaaaaaaa,1.0
Loading
Loading
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Type,Name,Age,Breed1,Breed2,Gender,Color1,Color2,Color3,MaturitySize,FurLength,Vaccinated,Dewormed,Sterilized,Health,Quantity,Fee,State,RescuerID,VideoAmt,Description,PetID,PhotoAmt,AdoptionSpeed
2+
1,Alger,3,307,0,1,1,2,7,2,2,1,1,2,1,1,0,41326,fa90fa5b1ee11c86938398b60abc32cb,0,He is very intelligent and cute. Fluffy and looks much better in real life than in the photo. He deserves a good home. No tying or caging for long hours except for precautionary purposes Serious adopter pls call ,ddddddddd,7.0,2
3+
1,Terry,24,179,307,1,2,3,7,2,2,3,3,2,1,1,0,41326,719987dce7aeb027fdfa91b480800199,0,been at my place for a while..am hoping to find it a good home,666666666,0.0,4

0 commit comments

Comments
 (0)