Skip to content

Commit 3065442

Browse files
pierrot0copybara-github
authored andcommitted
introduce shard_lengths in dataset_info proto.
PiperOrigin-RevId: 251832533
1 parent 2a7f229 commit 3065442

File tree

2 files changed

+23
-12
lines changed

2 files changed

+23
-12
lines changed

tensorflow_datasets/core/proto/dataset_info.proto

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,13 @@ message SplitInfo {
2626

2727
// The number of shards in this splits on-disk representation.
2828
int64 num_shards = 2;
29+
// The number of examples in each shard.
30+
repeated int64 shard_lengths = 4;
2931

3032
// The concrete statistics about this split.
3133
tensorflow.metadata.v0.DatasetFeatureStatistics statistics = 3;
34+
35+
// Next available: 5.
3236
}
3337

3438
// This message indicates which feature in the dataset schema is the input and

tensorflow_datasets/core/proto/dataset_info_generated_pb2.py

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
package='tensorflow_datasets',
3939
syntax='proto3',
4040
serialized_options=_b('\370\001\001'),
41-
serialized_pb=_b('\n\x12\x64\x61taset_info.proto\x12\x13tensorflow_datasets\x1a-tensorflow_metadata/proto/v0/statistics.proto\x1a)tensorflow_metadata/proto/v0/schema.proto\"\x1f\n\x0f\x44\x61tasetLocation\x12\x0c\n\x04urls\x18\x01 \x03(\t\"s\n\tSplitInfo\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nnum_shards\x18\x02 \x01(\x03\x12\x44\n\nstatistics\x18\x03 \x01(\x0b\x32\x30.tensorflow.metadata.v0.DatasetFeatureStatistics\"/\n\x0eSupervisedKeys\x12\r\n\x05input\x18\x01 \x01(\t\x12\x0e\n\x06output\x18\x02 \x01(\t\"%\n\x12RedistributionInfo\x12\x0f\n\x07license\x18\x01 \x01(\t\"\x99\x04\n\x0b\x44\x61tasetInfo\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x02 \x01(\t\x12\x0f\n\x07version\x18\t \x01(\t\x12\x10\n\x08\x63itation\x18\x03 \x01(\t\x12\x15\n\rsize_in_bytes\x18\x04 \x01(\x03\x12\x36\n\x08location\x18\x05 \x01(\x0b\x32$.tensorflow_datasets.DatasetLocation\x12W\n\x12\x64ownload_checksums\x18\n \x03(\x0b\x32\x37.tensorflow_datasets.DatasetInfo.DownloadChecksumsEntryB\x02\x18\x01\x12.\n\x06schema\x18\x06 \x01(\x0b\x32\x1e.tensorflow.metadata.v0.Schema\x12.\n\x06splits\x18\x07 \x03(\x0b\x32\x1e.tensorflow_datasets.SplitInfo\x12<\n\x0fsupervised_keys\x18\x08 \x01(\x0b\x32#.tensorflow_datasets.SupervisedKeys\x12\x44\n\x13redistribution_info\x18\x0b \x01(\x0b\x32\'.tensorflow_datasets.RedistributionInfo\x1a\x38\n\x16\x44ownloadChecksumsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x03\xf8\x01\x01\x62\x06proto3')
41+
serialized_pb=_b('\n\x12\x64\x61taset_info.proto\x12\x13tensorflow_datasets\x1a-tensorflow_metadata/proto/v0/statistics.proto\x1a)tensorflow_metadata/proto/v0/schema.proto\"\x1f\n\x0f\x44\x61tasetLocation\x12\x0c\n\x04urls\x18\x01 \x03(\t\"\x8a\x01\n\tSplitInfo\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nnum_shards\x18\x02 \x01(\x03\x12\x15\n\rshard_lengths\x18\x04 \x03(\x03\x12\x44\n\nstatistics\x18\x03 \x01(\x0b\x32\x30.tensorflow.metadata.v0.DatasetFeatureStatistics\"/\n\x0eSupervisedKeys\x12\r\n\x05input\x18\x01 \x01(\t\x12\x0e\n\x06output\x18\x02 \x01(\t\"%\n\x12RedistributionInfo\x12\x0f\n\x07license\x18\x01 \x01(\t\"\x99\x04\n\x0b\x44\x61tasetInfo\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x02 \x01(\t\x12\x0f\n\x07version\x18\t \x01(\t\x12\x10\n\x08\x63itation\x18\x03 \x01(\t\x12\x15\n\rsize_in_bytes\x18\x04 \x01(\x03\x12\x36\n\x08location\x18\x05 \x01(\x0b\x32$.tensorflow_datasets.DatasetLocation\x12W\n\x12\x64ownload_checksums\x18\n \x03(\x0b\x32\x37.tensorflow_datasets.DatasetInfo.DownloadChecksumsEntryB\x02\x18\x01\x12.\n\x06schema\x18\x06 \x01(\x0b\x32\x1e.tensorflow.metadata.v0.Schema\x12.\n\x06splits\x18\x07 \x03(\x0b\x32\x1e.tensorflow_datasets.SplitInfo\x12<\n\x0fsupervised_keys\x18\x08 \x01(\x0b\x32#.tensorflow_datasets.SupervisedKeys\x12\x44\n\x13redistribution_info\x18\x0b \x01(\x0b\x32\'.tensorflow_datasets.RedistributionInfo\x1a\x38\n\x16\x44ownloadChecksumsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\x42\x03\xf8\x01\x01\x62\x06proto3')
4242
,
4343
dependencies=[tensorflow__metadata_dot_proto_dot_v0_dot_statistics__pb2.DESCRIPTOR,tensorflow__metadata_dot_proto_dot_v0_dot_schema__pb2.DESCRIPTOR,])
4444

@@ -98,7 +98,14 @@
9898
is_extension=False, extension_scope=None,
9999
serialized_options=None, file=DESCRIPTOR),
100100
_descriptor.FieldDescriptor(
101-
name='statistics', full_name='tensorflow_datasets.SplitInfo.statistics', index=2,
101+
name='shard_lengths', full_name='tensorflow_datasets.SplitInfo.shard_lengths', index=2,
102+
number=4, type=3, cpp_type=2, label=3,
103+
has_default_value=False, default_value=[],
104+
message_type=None, enum_type=None, containing_type=None,
105+
is_extension=False, extension_scope=None,
106+
serialized_options=None, file=DESCRIPTOR),
107+
_descriptor.FieldDescriptor(
108+
name='statistics', full_name='tensorflow_datasets.SplitInfo.statistics', index=3,
102109
number=3, type=11, cpp_type=10, label=1,
103110
has_default_value=False, default_value=None,
104111
message_type=None, enum_type=None, containing_type=None,
@@ -116,8 +123,8 @@
116123
extension_ranges=[],
117124
oneofs=[
118125
],
119-
serialized_start=166,
120-
serialized_end=281,
126+
serialized_start=167,
127+
serialized_end=305,
121128
)
122129

123130

@@ -154,8 +161,8 @@
154161
extension_ranges=[],
155162
oneofs=[
156163
],
157-
serialized_start=283,
158-
serialized_end=330,
164+
serialized_start=307,
165+
serialized_end=354,
159166
)
160167

161168

@@ -185,8 +192,8 @@
185192
extension_ranges=[],
186193
oneofs=[
187194
],
188-
serialized_start=332,
189-
serialized_end=369,
195+
serialized_start=356,
196+
serialized_end=393,
190197
)
191198

192199

@@ -223,8 +230,8 @@
223230
extension_ranges=[],
224231
oneofs=[
225232
],
226-
serialized_start=853,
227-
serialized_end=909,
233+
serialized_start=877,
234+
serialized_end=933,
228235
)
229236

230237
_DATASETINFO = _descriptor.Descriptor(
@@ -323,8 +330,8 @@
323330
extension_ranges=[],
324331
oneofs=[
325332
],
326-
serialized_start=372,
327-
serialized_end=909,
333+
serialized_start=396,
334+
serialized_end=933,
328335
)
329336

330337
_SPLITINFO.fields_by_name['statistics'].message_type = tensorflow__metadata_dot_proto_dot_v0_dot_statistics__pb2._DATASETFEATURESTATISTICS

0 commit comments

Comments
 (0)