|
| 1 | +syntax = "proto3"; |
| 2 | + |
| 3 | +package tensorflow.data; |
| 4 | + |
| 5 | +// Represents the type of auto-sharding we enable. |
| 6 | +enum AutoShardPolicy { |
| 7 | + // AUTO: Attempts FILE-based sharding, falling back to DATA-based sharding. |
| 8 | + AUTO = 0; |
| 9 | + // FILE: Shards by input files (i.e. each worker will get a set of files to |
| 10 | + // process). When this option is selected, make sure that there is at least as |
| 11 | + // many files as workers. If there are fewer input files than workers, a |
| 12 | + // runtime error will be raised. |
| 13 | + FILE = 1; |
| 14 | + // DATA: Shards by elements produced by the dataset. Each worker will process |
| 15 | + // the whole dataset and discard the portion that is not for itself. Note that |
| 16 | + // for this mode to correctly partitions the dataset elements, the dataset |
| 17 | + // needs to produce elements in a deterministic order. |
| 18 | + DATA = 2; |
| 19 | + // HINT: Looks for the presence of `shard(SHARD_HINT, ...)` which is treated |
| 20 | + // as a placeholder to replace with `shard(num_workers, worker_index)`. |
| 21 | + HINT = 3; |
| 22 | + // OFF: No sharding will be performed. |
| 23 | + OFF = -1; |
| 24 | +} |
| 25 | + |
| 26 | +message DistributeOptions { |
| 27 | + AutoShardPolicy auto_shard_policy = 1; |
| 28 | + // The number of devices attached to this input pipeline. |
| 29 | + oneof optional_num_devices { |
| 30 | + int32 num_devices = 2; |
| 31 | + } |
| 32 | +} |
| 33 | + |
| 34 | +message MapVectorization { |
| 35 | + // Whether to vectorize map transformations. |
| 36 | + oneof optional_enabled { |
| 37 | + bool enabled = 1; |
| 38 | + } |
| 39 | + // Whether to use ChooseFastestBranchDataset with this transformation. If |
| 40 | + // True, the pipeline picks between the vectorized and original segment at |
| 41 | + // runtime based on their iterations speed. |
| 42 | + oneof optional_use_choose_fastest { |
| 43 | + bool use_choose_fastest = 2; |
| 44 | + } |
| 45 | +} |
| 46 | + |
| 47 | +message OptimizationOptions { |
| 48 | + // Whether to apply default graph optimizations. If False, only graph |
| 49 | + // optimizations that have been explicitly enabled will be applied. |
| 50 | + oneof optional_apply_default_optimizations { |
| 51 | + bool apply_default_optimizations = 1; |
| 52 | + } |
| 53 | + // Whether to automatically tune performance knobs. |
| 54 | + oneof optional_autotune { |
| 55 | + bool autotune = 2; |
| 56 | + } |
| 57 | + // When autotuning is enabled (through autotune), determines whether to also |
| 58 | + // autotune buffer sizes for datasets with parallelism. |
| 59 | + oneof optional_autotune_buffers { |
| 60 | + bool autotune_buffers = 3; |
| 61 | + } |
| 62 | + // When autotuning is enabled (through autotune), determines the CPU budget to |
| 63 | + // use. Values greater than the number of schedulable CPU cores are allowed |
| 64 | + // but may result in CPU contention. |
| 65 | + oneof optional_autotune_cpu_budget { |
| 66 | + int32 autotune_cpu_budget = 4; |
| 67 | + } |
| 68 | + // When autotuning is enabled (through autotune), determines the RAM budget to |
| 69 | + // use. Values greater than the available RAM in bytes may result in OOM. If |
| 70 | + // 0, defaults to half of the available RAM in bytes. |
| 71 | + oneof optional_autotune_ram_budget { |
| 72 | + int32 autotune_ram_budget = 5; |
| 73 | + } |
| 74 | + // Whether to fuse filter transformations. |
| 75 | + oneof optional_filter_fusion { |
| 76 | + bool filter_fusion = 6; |
| 77 | + } |
| 78 | + // Whether to fuse filter dataset that predicts random_uniform < rate into a |
| 79 | + // sampling dataset. |
| 80 | + oneof optional_filter_with_random_uniform_fusion { |
| 81 | + bool filter_with_random_uniform_fusion = 7; |
| 82 | + } |
| 83 | + // Whether to hoist tf.random_uniform() ops out of map transformations. |
| 84 | + oneof optional_hoist_random_uniform { |
| 85 | + bool hoist_random_uniform = 8; |
| 86 | + } |
| 87 | + // Whether to fuse map and batch transformations. |
| 88 | + oneof optional_map_and_batch_fusion { |
| 89 | + bool map_and_batch_fusion = 9; |
| 90 | + } |
| 91 | + // Whether to fuse map and filter transformations. |
| 92 | + oneof optional_map_and_filter_fusion { |
| 93 | + bool map_and_filter_fusion = 10; |
| 94 | + } |
| 95 | + // Whether to fuse map transformations. |
| 96 | + oneof optional_map_fusion { |
| 97 | + bool map_fusion = 11; |
| 98 | + } |
| 99 | + // Whether to parallelize stateless map transformations. |
| 100 | + oneof optional_map_parallelization { |
| 101 | + bool map_parallelization = 12; |
| 102 | + } |
| 103 | + // The map vectorization options associated with the dataset. |
| 104 | + MapVectorization map_vectorization = 13; |
| 105 | + // Whether to eliminate no-op transformations. |
| 106 | + oneof optional_noop_elimination { |
| 107 | + bool noop_elimination = 14; |
| 108 | + } |
| 109 | + // Whether to parallelize copying of batch elements. This optimization is |
| 110 | + // highly experimental and can cause performance degradation (e.g. when the |
| 111 | + // parallelization overhead exceeds the benefits of performing the data copies |
| 112 | + // in parallel). You should only enable this optimization if a) your input |
| 113 | + // pipeline is bottlenecked on batching and b) you have validated that this |
| 114 | + // optimization improves performance. |
| 115 | + oneof optional_parallel_batch { |
| 116 | + bool parallel_batch = 15; |
| 117 | + } |
| 118 | + // Whether to reorder ops that will discard data to the front of unary |
| 119 | + // cardinality preserving transformations, e.g. dataset.map(...).take(3) will |
| 120 | + // be optimized to dataset.take(3).map(...). For now this optimization will |
| 121 | + // move `skip`, `shard` and `take` to the front of `map` and `prefetch`. This |
| 122 | + // optimization is only for performance; it will not affect the output of the |
| 123 | + // dataset. |
| 124 | + oneof optional_reorder_data_discarding_ops { |
| 125 | + bool reorder_data_discarding_ops = 16; |
| 126 | + } |
| 127 | + // Whether to fuse shuffle and repeat transformations. |
| 128 | + oneof optional_shuffle_and_repeat_fusion { |
| 129 | + bool shuffle_and_repeat_fusion = 17; |
| 130 | + } |
| 131 | +} |
| 132 | + |
| 133 | +message ThreadingOptions { |
| 134 | + // If set, it overrides the maximum degree of intra-op parallelism. |
| 135 | + oneof optional_max_intra_op_parallelism { |
| 136 | + int32 max_intra_op_parallelism = 1; |
| 137 | + } |
| 138 | + // If set, the dataset will use a private threadpool of the given size. |
| 139 | + oneof optional_private_threadpool_size { |
| 140 | + int32 private_threadpool_size = 2; |
| 141 | + } |
| 142 | +} |
| 143 | + |
| 144 | +// Represents how to handle external state during serialization. |
| 145 | +enum ExternalStatePolicy { |
| 146 | + POLICY_WARN = 0; |
| 147 | + POLICY_IGNORE = 1; |
| 148 | + POLICY_FAIL = 2; |
| 149 | +} |
| 150 | + |
| 151 | +// Message stored with Dataset objects to control how datasets are processed and |
| 152 | +// optimized. |
| 153 | +message Options { |
| 154 | + // Whether the outputs need to be produced in deterministic order. |
| 155 | + oneof optional_deterministic { |
| 156 | + bool deterministic = 1; |
| 157 | + } |
| 158 | + // The distribution strategy options associated with the dataset. |
| 159 | + DistributeOptions distribute_options = 2; |
| 160 | + // The optimization options associated with the dataset. |
| 161 | + OptimizationOptions optimization_options = 3; |
| 162 | + // Whether to introduce 'slack' in the last `prefetch` of the input pipeline, |
| 163 | + // if it exists. This may reduce CPU contention with accelerator host-side |
| 164 | + // activity at the start of a step. The slack frequency is determined by the |
| 165 | + // number of devices attached to this input pipeline. |
| 166 | + oneof optional_slack { |
| 167 | + bool slack = 4; |
| 168 | + } |
| 169 | + // The threading options associated with the dataset. |
| 170 | + ThreadingOptions threading_options = 5; |
| 171 | + // This option can be used to override the default policy for how to handle |
| 172 | + // external state when serializing a dataset or checkpointing its iterator. |
| 173 | + // There are three settings available - IGNORE: External state is ignored |
| 174 | + // without a warning; WARN: External state is ignored and a warning is logged; |
| 175 | + // FAIL: External state results in an error. |
| 176 | + oneof optional_external_state_policy { |
| 177 | + ExternalStatePolicy external_state_policy = 6; |
| 178 | + } |
| 179 | +} |
0 commit comments