Skip to content

Commit e41e984

Browse files
TensorFlow Datasets Teamtfds-copybara
authored andcommitted
Internal change
PiperOrigin-RevId: 255940384
1 parent d992821 commit e41e984

File tree

10 files changed

+211
-0
lines changed

10 files changed

+211
-0
lines changed

tensorflow_datasets/image/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
from tensorflow_datasets.image.oxford_flowers102 import OxfordFlowers102
5454
from tensorflow_datasets.image.oxford_iiit_pet import OxfordIIITPet
5555
from tensorflow_datasets.image.patch_camelyon import PatchCamelyon
56+
from tensorflow_datasets.image.pet_finder import PetFinder
5657
from tensorflow_datasets.image.quickdraw import QuickdrawBitmap
5758
from tensorflow_datasets.image.resisc45 import Resisc45
5859
from tensorflow_datasets.image.rock_paper_scissors import RockPaperScissors
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
# coding=utf-8
2+
# Copyright 2019 The TensorFlow Datasets Authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""PetFinder Dataset."""
17+
18+
from __future__ import absolute_import
19+
from __future__ import division
20+
from __future__ import print_function
21+
22+
import os
23+
import pandas as pd
24+
import tensorflow as tf
25+
import tensorflow_datasets.public_api as tfds
26+
27+
# petfinder: BibTeX citation
28+
_CITATION = """
29+
@ONLINE {kaggle-petfinder-adoption-prediction,
30+
author = "Kaggle and PetFinder.my",
31+
title = "PetFinder.my Adoption Prediction",
32+
month = "april",
33+
year = "2019",
34+
url = "https://www.kaggle.com/c/petfinder-adoption-prediction/data/"
35+
}
36+
"""
37+
38+
_URL = ("https://storage.googleapis.com/petfinder_dataset/")
39+
_DATA_OPTIONS = [
40+
"test_metadata", "test_images", "test_sentiment", "train_metadata",
41+
"train_images", "train_sentiment"
42+
]
43+
_LABEL_OPTIONS = [
44+
"test", "train", "breed_labels", "state_labels", "color_labels"
45+
]
46+
47+
_DL_URLS = {name: _URL + name + ".zip" for name in _DATA_OPTIONS}
48+
_DL_URLS.update({label: _URL + label + ".csv" for label in _LABEL_OPTIONS})
49+
50+
_INT_FEATS = [
51+
"Type", "Age", "Breed1", "Breed2", "Gender", "Color1", "Color2", "Color3",
52+
"MaturitySize", "FurLength", "Vaccinated", "Dewormed", "Sterilized",
53+
"Health", "Quantity", "Fee", "State", "VideoAmt"
54+
]
55+
_FLOAT_FEATS = ["PhotoAmt"]
56+
_OBJ_FEATS = ["name", "Type", "PetID", "RescurID"]
57+
_DESCRIPTION = ((
58+
"A large set of images of cats and dogs."
59+
"Together with the metadata information of sentiment information."))
60+
61+
62+
class PetFinder(tfds.core.GeneratorBasedBuilder):
63+
"""Pet Finder."""
64+
VERSION = tfds.core.Version("1.0.0")
65+
SUPPORTED_VERSIONS = [
66+
tfds.core.Version("1.0.0", experiments={tfds.core.Experiment.S3: True}),
67+
]
68+
69+
def _info(self):
70+
return tfds.core.DatasetInfo(
71+
builder=self,
72+
description="Dataset with images from 5 classes (see config name for "
73+
"information on the specific class)",
74+
features=tfds.features.FeaturesDict({
75+
"image": tfds.features.Image(),
76+
"image/filename": tfds.features.Text(),
77+
"PetID": tfds.features.Text(),
78+
"attributes": {name: tf.int64 for name in _INT_FEATS},
79+
"label": tfds.features.ClassLabel(num_classes=5),
80+
}),
81+
supervised_keys=("attributes", "label"),
82+
urls=[_URL],
83+
citation=_CITATION,
84+
)
85+
86+
def _split_generators(self, dl_manager):
87+
"""Returns SplitGenerators."""
88+
# petfinder: Downloads the data and defines the splits
89+
# dl_manager is a tfds.download.DownloadManager that can be used to
90+
# download and extract URLs
91+
# dl_paths = dl_manager.download_kaggle_data(url)
92+
dl_paths = dl_manager.download_and_extract(_DL_URLS)
93+
94+
return [
95+
tfds.core.SplitGenerator(
96+
name=tfds.Split.TRAIN,
97+
num_shards=10,
98+
gen_kwargs={
99+
"csv_name": "train.csv",
100+
"csv_paths": dl_paths["train"],
101+
"img_paths": dl_paths["train_images"],
102+
},
103+
),
104+
tfds.core.SplitGenerator(
105+
name=tfds.Split.TEST,
106+
num_shards=10,
107+
gen_kwargs={
108+
"csv_name": "test.csv",
109+
"csv_paths": dl_paths["test"],
110+
"img_paths": dl_paths["test_images"],
111+
},
112+
),
113+
]
114+
115+
def _generate_examples(self, csv_name, csv_paths, img_paths):
116+
"""Yields examples.
117+
118+
Args:
119+
csv_name: file name for the csv file used in the split
120+
csv_paths: Path to csv files containing the label and attributes
121+
information.
122+
img_paths: Path to images.
123+
"""
124+
if not tf.io.gfile.exists(csv_paths):
125+
raise AssertionError("{} not exist".format(csv_name))
126+
with tf.io.gfile.GFile(csv_paths) as csv_file:
127+
dataframe = pd.read_csv(csv_file)
128+
# add a dummy label for test set
129+
if csv_name == "test.csv":
130+
dataframe["AdoptionSpeed"] = -1
131+
132+
images = tf.io.gfile.listdir(img_paths)
133+
for image in images:
134+
pet_id = image.split("-")[0]
135+
image_path = os.path.join(img_paths, image)
136+
attr_dict = dataframe.loc[dataframe["PetID"] == pet_id]
137+
record = {
138+
"image": image_path,
139+
"image/filename": image,
140+
"PetID": pet_id,
141+
"attributes": attr_dict[_INT_FEATS].to_dict("records")[0],
142+
"label": attr_dict["AdoptionSpeed"].values[0]
143+
}
144+
if self.version.implements(tfds.core.Experiment.S3):
145+
yield image, record
146+
else:
147+
yield record
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# coding=utf-8
2+
# Copyright 2019 The TensorFlow Datasets Authors.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""Test for PetFinder."""
17+
18+
from __future__ import absolute_import
19+
from __future__ import division
20+
from __future__ import print_function
21+
22+
from tensorflow_datasets import testing
23+
from tensorflow_datasets.image import pet_finder
24+
25+
26+
class PetFinderTest(testing.DatasetBuilderTestCase):
27+
# petfinder:
28+
DATASET_CLASS = pet_finder.PetFinder
29+
SPLITS = {
30+
'train': 2, # Number of fake train example
31+
'test': 2, # Number of fake test example
32+
}
33+
DL_EXTRACT_RESULT = {
34+
'train': 'train.csv',
35+
'train_images': 'train_images',
36+
'test': 'test.csv',
37+
'test_images': 'test_images',
38+
}
39+
40+
41+
class PetFinderS3Test(PetFinderTest):
42+
VERSION = '1.0.0'
43+
44+
45+
if __name__ == '__main__':
46+
testing.test_main()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Type,Name,Age,Breed1,Breed2,Gender,Color1,Color2,Color3,MaturitySize,FurLength,Vaccinated,Dewormed,Sterilized,Health,Quantity,Fee,State,RescuerID,VideoAmt,Description,PetID,PhotoAmt
2+
2,Dopey & Grey,8,266,266,1,2,6,7,1,1,1,1,2,1,2,0,41326,2ece3b2573dcdcebd774e635dca15fd9,0,"Dopey Age: 8mths old Male One half of a pair, Dopey is the reserved one compared to his brother Grey. However, he loves to be petted and is active by nature. Loves to chase balls and plays with anything that is mobile. Favourite hobby: Watching TV near the TV screen. Grey Age: 8mths old Male The wonder twin - Grey and Dopey are very brotherly and protects each other. Grey is more dominant than Dopey as he is the elder one and he is very playful. Favourite hobby: Loves to sit by the door and look outside",000000000,2.0
3+
2,Chi Chi,36,285,264,2,1,4,7,2,3,1,1,1,2,1,0,41326,2ece3b2573dcdcebd774e635dca15fd9,0,"Please note that Chichi has been neutered, therefore cannot breed. ChiChi is a Persian with a difference : She is a silent cat. She loves to be petted but needs regular grooming and cleaning. She has a defective tearduct on the right eye that requires daily cleaning. She has been neutered and goes through vaccinated routine regularly. Favourite hobby: Loves to roam and enjoys outside scenery. Please email if interested, comments are harder to keep track of.",aaaaaaaaa,1.0
Loading
Loading
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Type,Name,Age,Breed1,Breed2,Gender,Color1,Color2,Color3,MaturitySize,FurLength,Vaccinated,Dewormed,Sterilized,Health,Quantity,Fee,State,RescuerID,VideoAmt,Description,PetID,PhotoAmt,AdoptionSpeed
2+
1,Alger,3,307,0,1,1,2,7,2,2,1,1,2,1,1,0,41326,fa90fa5b1ee11c86938398b60abc32cb,0,He is very intelligent and cute. Fluffy and looks much better in real life than in the photo. He deserves a good home. No tying or caging for long hours except for precautionary purposes Serious adopter pls call ,ddddddddd,7.0,2
3+
1,Terry,24,179,307,1,2,3,7,2,2,3,3,2,1,1,0,41326,719987dce7aeb027fdfa91b480800199,0,been at my place for a while..am hoping to find it a good home,666666666,0.0,4
Loading
Loading
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
https://storage.googleapis.com/petfinder_dataset/breed_labels.csv 6984 88d869b56a8325c2d5e3a9ebcd2c8faa2129b448c600f630b23fd1700a6e0d2a
2+
https://storage.googleapis.com/petfinder_dataset/color_labels.csv 88 63fe1ef79e344ed85c8b3595438ecbec434e793c5c74a65c6d6cb7f04d1c3f5e
3+
https://storage.googleapis.com/petfinder_dataset/state_labels.csv 285 088a4b4d1464fa8e7209f43ab91fa0734a52a5970767a3c8b264ffa22579199a
4+
https://storage.googleapis.com/petfinder_dataset/test.csv 1647264 49839d898e4b5db68bddf8d8876d29233eb0b10c5cc23963296d964f0ca17ac9
5+
https://storage.googleapis.com/petfinder_dataset/test_images.zip 389440419 d8dc0914e6ad894a20e41ce86e3d392dfc2a2eb968b792358a80d6b3c3d4b5df
6+
https://storage.googleapis.com/petfinder_dataset/test_metadata.zip 13909287 ad9ee23eebddd270b33e1566649cda0310c65d580b3c5fcd644642d9fa736f6e
7+
https://storage.googleapis.com/petfinder_dataset/test_sentiment.zip 3026657 1435b837d361ded939ea87b6b978870497055b8ef810a7381524c21fb62c261e
8+
https://storage.googleapis.com/petfinder_dataset/train.csv 6690166 589f0edc5cac690b64e123528025f7a5a571ffa096f620260187917b1c7c6e2b
9+
https://storage.googleapis.com/petfinder_dataset/train_images.zip 1595336815 64779a06655e62902a1b9081543d315e896ea5a8cdf568509110e2de42112f90
10+
https://storage.googleapis.com/petfinder_dataset/train_metadata.zip 56196604 6eb691ac7636eed691d1d06ec64864d0dede9a0515e467bbff889cd3d233975d
11+
https://storage.googleapis.com/petfinder_dataset/train_sentiment.zip 11878318 b906f664913239480ae3ec0a0150ea1cfedaf05f0bb9132713d1e40513b4cce4

0 commit comments

Comments
 (0)