Skip to content

Commit babfc50

Browse files
Conchylicultorcopybara-github
authored andcommitted
Update dataset documentation template
PiperOrigin-RevId: 295789042
1 parent 3108149 commit babfc50

File tree

3 files changed

+190
-118
lines changed

3 files changed

+190
-118
lines changed

tensorflow_datasets/scripts/document_datasets_test.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,25 @@
1818
from __future__ import print_function
1919

2020
from tensorflow_datasets import testing
21+
from tensorflow_datasets.core import dataset_builder
22+
from tensorflow_datasets.core import utils
2123
from tensorflow_datasets.scripts import document_datasets
2224

2325
DummyMnist = testing.DummyMnist
2426

2527

28+
class DummyMnistConfigs(DummyMnist):
29+
"""Builder with config and manual instructions."""
30+
MANUAL_DOWNLOAD_INSTRUCTIONS = """Some manual instructions."""
31+
BUILDER_CONFIGS = [
32+
dataset_builder.BuilderConfig(
33+
name="config_name",
34+
version=utils.Version("0.0.1"),
35+
description="Config description.",
36+
),
37+
]
38+
39+
2640
class DocumentDatasetsTest(testing.TestCase):
2741

2842
@classmethod
@@ -50,6 +64,16 @@ def test_schema_org(self):
5064
'content="https://www.tensorflow.org'
5165
'/datasets/catalog/%s" />' % self.builder.name, schema_str)
5266

67+
def test_with_config(self):
68+
"""Test that builder with configs are correctly generated."""
69+
with testing.tmp_dir() as tmp_dir:
70+
builder = DummyMnistConfigs(data_dir=tmp_dir)
71+
builder.download_and_prepare()
72+
doc_str = document_datasets.document_single_builder(builder)
73+
74+
self.assertIn("Some manual instructions.", doc_str)
75+
self.assertIn("Mnist description.", doc_str) # Shared description.
76+
self.assertIn("Config description.", doc_str) # Config-specific description
5377

5478
if __name__ == "__main__":
5579
testing.test_main()
Lines changed: 165 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -1,159 +1,206 @@
11
<%!
2+
"""Dataset catalog documentation template.
3+
4+
Displayed in https://www.tensorflow.org/datasets/catalog/.
5+
6+
"""
7+
8+
import collections
29
import tensorflow_datasets as tfds
310
from tensorflow_datasets.core.utils.py_utils import get_class_path
411
from tensorflow_datasets.core.utils.py_utils import get_class_url
12+
513
%>
614

7-
## Print URLs
8-
<%def name="display_homepage(builder, level)">\
9-
${'#' * level} Homepage
10-
* [${builder.info.homepage}](${builder.info.homepage})
15+
<%def name="echo(obj)">\
16+
${obj}
1117
</%def>
1218

13-
## Print features
14-
<%def name="display_features(builder, level)">\
15-
${'#' * level} Features
16-
```python
17-
${builder.info.features}
18-
```
19+
## --------------------------- Builder sections ---------------------------
20+
21+
<%def name="display_description(builder)">\
22+
* **Description**:
23+
24+
${builder.info.description}
25+
1926
</%def>
2027

21-
## Print Supervised keys
22-
<%def name="display_supervised_keys(builder, level)">\
23-
%if builder.info.supervised_keys:
24-
${'#' * level} Supervised keys (for `as_supervised=True`)
25-
`${str(builder.info.supervised_keys)}`
26-
%endif
28+
<%def name="display_config_description(builder)">\
29+
% if builder.builder_config:
30+
* **Config description**: ${builder.builder_config.description}
31+
% endif
32+
</%def>
33+
34+
<%def name="display_homepage(builder)">\
35+
* **Homepage**: [${builder.info.homepage}](${builder.info.homepage})
36+
</%def>
37+
38+
<%def name="display_source(builder)">\
39+
* **Source code**: [`${get_class_path(builder)}`](${get_class_url(builder)})
2740
</%def>
2841

29-
## Print list of supported versions minus default.
30-
<%def name="supported_versions(builder, level)">\
42+
<%def name="display_versions(builder)">\
3143
<%
32-
versions = (builder.SUPPORTED_VERSIONS if hasattr(builder, 'SUPPORTED_VERSIONS')
33-
else builder.supported_versions)
44+
def list_versions(builder):
45+
# List all available versions
46+
# Sort them in order
47+
# Get the default version
48+
for v in builder.versions:
49+
if v == builder.version: # Highlight the default version
50+
version_name = '**`{}`** (default)'.format(str(v))
51+
else:
52+
version_name = '`{}`'.format(str(v))
53+
yield '{}: {}'.format(version_name, v.description or 'No release notes.')
3454
%>\
35-
%for version in versions:
36-
${' '*level|n}* `${str(version)}`: ${version.description}
37-
%endfor
55+
* **Versions**:
56+
% for version_str in list_versions(builder):
57+
* ${version_str}
58+
% endfor
3859
</%def>
3960

40-
## Print the bullet points + features specific to builder with a single version.
41-
<%def name="print_general_info_one_config(builder)">
42-
${display_description(builder)}
61+
<%def name="display_size(builder)">\
62+
* **Download size**: `${tfds.units.size_str(builder.info.download_size)}`
63+
* **Dataset size**: `${tfds.units.size_str(builder.info.dataset_size)}`
64+
</%def>
4365

44-
* URL: [${builder.info.homepage}](${builder.info.homepage})
45-
* `DatasetBuilder`: [`${get_class_path(builder)}`](${get_class_url(builder)})
46-
* Version: `v${str(builder.info.version)}`
47-
* Versions:
48-
* **`${builder.info.version}`** (default): ${builder.info.version.description or ''}
49-
${supported_versions(builder, level=1)}
50-
* Download size: `${tfds.units.size_str(builder.info.download_size)}`
51-
* Dataset size: `${tfds.units.size_str(builder.info.dataset_size)}`
66+
<%def name="display_manual(builder)">\
67+
% if builder.MANUAL_DOWNLOAD_INSTRUCTIONS:
68+
* **Manual download instructions**: This dataset requires you to download the
69+
source data manually into `download_config.manual_dir`
70+
(defaults to `~/tensorflow_datasets/manual/${builder.info.name}/`):<br/>
71+
${builder.MANUAL_DOWNLOAD_INSTRUCTIONS}
72+
% endif
73+
</%def>
5274

53-
%if builder.MANUAL_DOWNLOAD_INSTRUCTIONS:
54-
WARNING: This dataset requires you to download the source data manually into manual_dir
55-
(defaults to `~/tensorflow_datasets/manual/${builder.info.name}/`):
56-
${builder.MANUAL_DOWNLOAD_INSTRUCTIONS}
57-
%endif
75+
<%def name="display_splits(builder)">\
76+
* **Splits**:
77+
<%
78+
def get_num_examples(split_info):
79+
if split_info.num_examples:
80+
return '{:,}'.format(split_info.num_examples)
81+
else:
82+
return 'Not computed'
83+
%>\
84+
85+
Split | Examples
86+
:----- | -------:
87+
%for split_name, split_info in sorted(builder.info.splits.items()):
88+
'${split_name}' | ${get_num_examples(split_info)}
89+
%endfor
5890

59-
${display_features(builder, level=2)}
60-
${display_stats(builder, level=2)}
61-
${display_homepage(builder, level=2)}
62-
${display_supervised_keys(builder, level=2)}
63-
${display_citation(builder.info.citation, level=2)}
6491
</%def>
6592

66-
## Print the configs: list with name/version/size/description + doc for each.
67-
<%def name="print_builder_configs(builder, config_builders)">
68-
<%
69-
len_conf_descs = len(set([c.description for c in builder.BUILDER_CONFIGS] + [
70-
builder.info.description]))
71-
%>
72-
%if len_conf_descs == 1 or len_conf_descs > len(builder.BUILDER_CONFIGS):
73-
${display_description(builder)}
74-
%endif
93+
<%def name="display_features(builder)">\
94+
* **Features**:
7595

76-
* URL: [${builder.info.homepage}](${builder.info.homepage})
77-
* `DatasetBuilder`: [`${get_class_path(builder)}`](${get_class_url(builder)})
96+
```python
97+
${builder.info.features}
98+
```
99+
</%def>
100+
101+
<%def name="display_supervised(builder)">\
102+
* **Supervised keys** (See
103+
[`as_supervised` doc](https://www.tensorflow.org/datasets/api_docs/python/tfds/load)):
104+
`${str(builder.info.supervised_keys)}`
105+
</%def>
106+
107+
<%def name="display_citation(builder)">\
108+
% if builder.info.citation:
109+
* **Citation**:
78110

79-
`${builder.name}` is configured with `${get_class_path(builder.builder_config)}` and has
80-
the following configurations predefined (defaults to the first one):
111+
```
112+
${builder.info.citation}
113+
```
114+
% endif
115+
</%def>
81116

82-
%for config, config_builder in zip(builder.BUILDER_CONFIGS, config_builders):
83117
<%
84-
size = tfds.units.size_str(config_builder.info.dataset_size)
118+
119+
Section = collections.namedtuple('Section', 'get_signature, make')
120+
121+
# Getter function returns a hashable signature of the section value
122+
# which allow to detect sections shared accross all builders.
123+
def get_description(builder): builder.info.description
124+
def get_config_description(builder):
125+
return builder.builder_config.description
126+
def get_homepage(builder): builder.info.homepage
127+
def get_source(builder): True # Always common to all configs
128+
def get_versions(builder):
129+
return tuple((str(v), v.description) for v in builder.versions)
130+
def get_size(builder): (builder.info.download_size, builder.info.dataset_size)
131+
def get_manual(builder): builder.MANUAL_DOWNLOAD_INSTRUCTIONS
132+
def get_splits(builder):
133+
return tuple(
134+
(str(s.name), int(s.num_examples)) for s in builder.info.splits.values()
135+
)
136+
def get_features(builder): repr(builder.info.features)
137+
def get_supervised(builder): builder.info.supervised_keys
138+
def get_citation(builder): builder.info.citation
139+
140+
all_sections = [
141+
Section(get_description, display_description),
142+
Section(get_config_description, display_config_description),
143+
Section(get_homepage, display_homepage),
144+
Section(get_source, display_source),
145+
Section(get_versions, display_versions),
146+
Section(get_size, display_size),
147+
Section(get_manual, display_manual),
148+
Section(get_splits, display_splits),
149+
Section(get_features, display_features),
150+
Section(get_supervised, display_supervised),
151+
Section(get_citation, display_citation),
152+
]
153+
85154
%>
86-
* `${config.name}` (`v${str(config.version)}`) (`Size: ${size}`): ${config.description}
87-
%endfor
88155

89-
%for config, config_builder in zip(builder.BUILDER_CONFIGS, config_builders):
90-
${'##'} `${builder.name}/${config.name}`
91-
${config.description}
156+
## --------------------------- Single builder ---------------------------
92157

93-
Versions:
158+
<%def name="display_builder(builder, sections)">\
159+
% for section in sections:
160+
${section.make(builder)}\
161+
% endfor
162+
</%def>
94163

95-
* **`${config.version}`** (default): ${getattr(config.version, 'description', '') or ''}
96-
${supported_versions(config, level=0)}
164+
## --------------------------- Builder builder ---------------------------
97165

98-
%if builder.MANUAL_DOWNLOAD_INSTRUCTIONS:
99-
WARNING: This dataset requires you to download the source data manually into manual_dir
100-
(defaults to `~/tensorflow_datasets/manual/${builder.info.name}/`):
101-
${builder.MANUAL_DOWNLOAD_INSTRUCTIONS}
102-
%endif
166+
<%def name="display_all_builders(builders)">\
167+
<%
103168

104-
${display_stats(config_builder, level=3)}
105-
${display_features(config_builder, level=3)}
106-
${display_homepage(config_builder, level=3)}
107-
${display_supervised_keys(config_builder, level=3)}
108-
%endfor
109-
${display_citation(config_builder.info.citation, level=2)}
110-
</%def>
169+
# For each fields, extract if the field is shared or unique accross builder.
170+
common_sections = []
171+
unique_sections = []
172+
for section in all_sections:
173+
if len(set(section.get_signature(b) for b in builders)) == 1:
174+
common_sections.append(section)
175+
else:
176+
unique_sections.append(section)
111177

112-
## Display the description of a builder.
113-
<%def name="display_description(builder)">\
114-
${builder.info.description}
115-
</%def>
178+
%>
116179

117-
## Display stats for a split.
118-
<%def name="display_stats(builder, level)">\
180+
${display_builder(next(iter(builders)), common_sections)}
181+
182+
% for i, builder in enumerate(builders):
119183
<%
120-
splits = builder.info.splits
121-
size_name = [(split_info.num_examples, split_name)
122-
for (split_name, split_info) in splits.items()]
184+
header_suffix = ' (default config)' if i == 0 else ''
123185
%>\
124-
${'#' * level} Statistics
125-
%if builder.info.splits.total_num_examples:
126-
Split | Examples
127-
:----- | ---:
128-
ALL | ${"{:,}".format(splits.total_num_examples)}
129-
%for split_size, split_name in sorted(size_name, key=lambda x:(-x[0], x[1])):
130-
${split_name.upper()} | ${"{:,}".format(split_size)}
131-
%endfor
132-
%else:
133-
None computed
134-
%endif
135-
</%def>
186+
${'##'} ${builder.name}/${builder.builder_config.name}${header_suffix}
136187

137-
## Display a citation.
138-
<%def name="display_citation(citation, level)">\
139-
%if citation:
140-
${'#' * level} Citation
141-
```
142-
${citation}
143-
```
144-
%endif
188+
${display_builder(builder, unique_sections)}
189+
% endfor
145190
</%def>
146191

147-
%if builder.MANUAL_DOWNLOAD_INSTRUCTIONS:
148-
# `${builder.name}` (Manual download)
149-
%else:
150-
# `${builder.name}`
151-
%endif
192+
## --------------------------- Main page ---------------------------
152193

194+
${'#'} `${builder.name}`
153195

154-
%if builder.builder_config:
155-
${print_builder_configs(builder, config_builders)}
156-
%else:
157-
${print_general_info_one_config(builder)}
196+
%if builder.MANUAL_DOWNLOAD_INSTRUCTIONS:
197+
Warning: Manual download required. See instructions bellow.
158198
%endif
159-
---
199+
200+
<%doc>First case: Single builder.</%doc>\
201+
% if not builder.builder_config:
202+
${display_builder(builder, all_sections)}
203+
<%doc>Second case: Builder configs.</%doc>\
204+
% else:
205+
${display_all_builders(config_builders)}
206+
% endif

tensorflow_datasets/testing/test_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,7 @@ def _info(self):
401401
"image": features.Image(shape=(28, 28, 1)),
402402
"label": features.ClassLabel(num_classes=10),
403403
}),
404+
description="Mnist description.",
404405
)
405406

406407
def _split_generators(self, dl_manager):

0 commit comments

Comments
 (0)