From 46809a52e1b9b710cae71b70704a44a1be4aef0a Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 6 May 2025 13:36:35 +0200 Subject: [PATCH 01/10] add timedelta64 data type --- data-types/timedelta64/README.md | 94 ++++++++++++++++++++++++++++++ data-types/timedelta64/schema.json | 29 +++++++++ 2 files changed, 123 insertions(+) create mode 100644 data-types/timedelta64/README.md create mode 100644 data-types/timedelta64/schema.json diff --git a/data-types/timedelta64/README.md b/data-types/timedelta64/README.md new file mode 100644 index 0000000..10d7228 --- /dev/null +++ b/data-types/timedelta64/README.md @@ -0,0 +1,94 @@ +# timedelta64 data type + +Defines a Zarr data type to model the `timedelta64` data type defined by Numpy. + +## Background + +`timedelta64` is based on a data type defined in [Numpy](https://numpy.org/). Thus this document begins by describing how `timedelta64` works in Numpy. Numpy's implementation is necessary context for making sense of the Zarr implementation. +The following references to Numpy are current with version 2.2 of that library. + +Numpy defines a data type called `"timedelta64"` to represent signed temporal durations. These durations arise when taking a difference between moments in time. Numpy models moments in time with a related data type called `"datetime64"`. Both data types are described in the [Numpy documentation](https://numpy.org/doc/stable/reference/arrays.datetime.html), which should be considered authoritative. + +`timedelta64` data types are parametrized by a physical unit of duration, like seconds or minutes, and a positive integral step size. For example, given a `timedelta64` data type defined with a unit of seconds and a duration 10, the scalar value `1` in that data type represents a duration of 10 seconds. + +Numpy represents `timedelta64` scalars with 64 bit signed integers. Negative values are permitted. The smallest 64 bit signed integer, i.e. `-2^63`, is reserved to represent a non-duration value called "Not a Time", or `NaT`. The `NaT` value serves a role similar to the "Not a Number" value used floating point data types. + +### Numpy data type parameters + +#### Step size +The Numpy `timedelta64` data type takes a step size parameter. It must be an integer in the range `[1, 2147483647]`, i.e. `[1, 2^31 - 1]`. + +While it is possible to construct a Numpy `timedelta64` data type with a step size of `0`, Numpy will not permit this data type to be associated with arrays that contain non-`NaT` scalars. Thus for arrays that contain actual durations, the smallest step size is effectively `1`. + +#### Unit +The Numpy `timedelta64` data type takes a unit parameter, which must be one of the following temporal units: + +| Identifier | Meaning | +|------------|----------| +| Y | year | +| M | month | +| W | week | +| D | day | +| h | hour | +| m | minute | +| s | second | +| ms | millisecond | +| us | microsecond | +| μs | microsecond | +| ns | nanosecond | +| ps | picosecond | +| fs | femtosecond | +| as | attosecond | + +> Note: "us" and "μs" are both valid representations for microseconds. + +> Note: Numpy permits the creation of `timedelta64` data types with an unspecified unit. In this case, the unit is set to the special value `"generic"`. + +#### Endianness +The Numpy `timedelta64` data type takes an byte order parameter, which must be either little-endian or big-endian. + +## Data type representation + +### Name + +The name of this data type is the string `"timedelta64"`. + +### Configuration + +The configuration for this data type is a JSON object with the following fields: + +| field name | type | required | notes | +|------------|----------|---|---| +| `"unit"` | one of: `"Y"`, `"M"` , `"W"`, `"D"` , `"h"` , `"m"` , `"s"` , `"ms"` , `"us"` , `"μs"` , `"ns"` , `"ps"` , `"fs"` , `"as"`, `"generic"` | yes | None | +| `"scale_factor"` | `integer` | yes | The number must represent an integer from the inclusive range `[1, 2147483647]` | + +> Note: the Numpy `timedelta64` data type is parametrized by an endianness (little or big), but the Zarr `timedelta64` data type is not. In Zarr, the endianness of `timedelta64` arrays is determined by the configuration of the `codecs` metadata and is thus not part of the data type configuration. + +> Note: as per Numpy, `"us"` and `"μs"` are equivalent and interchangeable representations of microseconds. + +No additional fields are permitted. + +### Examples +The metadata representation of a `timedelta64` with a unit of microseconds and a scale factor of 10, equivalent to the Numpy data type `timedelta64[10us]`: + +```json +{ + "name": "timedelta64", + "configuration": { + "unit": "us", + "scale_factor": 10 + } +} +``` + +## Fill value representation + +`timedelta64` fill values are represented as one of: +- a JSON number with no fraction or exponent part that is within the range `[-2^63, 2^63 - 1]`. +- the string `"NaT"`, which denotes the value `NaT`. + +> Note: the `NaT` value can also be encoded as the JSON number `-9223372036854775808`, i.e. `-2 ^ 63`. + +## Codec compatibility + +This data type is compatible with any codec that supports arrays of signed 64 bit integers. diff --git a/data-types/timedelta64/schema.json b/data-types/timedelta64/schema.json new file mode 100644 index 0000000..86cfc1f --- /dev/null +++ b/data-types/timedelta64/schema.json @@ -0,0 +1,29 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "timedelta64", + "type": "object", + "properties": { + "name": { + "const": "timedelta64" + }, + "configuration": { + "type": "object", + "properties": { + "unit": { + "type": "string", + "enum": ["Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as", "generic"] + }, + "scale_factor": { + "type": "integer", + "minimum": 1, + "maximum": 2147483647 + + } + }, + "required": ["unit", "scale_factor"], + "additionalProperties": false + } + }, + "required": ["name", "configuration"], + "additionalProperties": false +} From 30f7e460e2f34da99cc08f93140b101b6e0f764b Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 6 May 2025 13:45:36 +0200 Subject: [PATCH 02/10] clarify step size lower bound --- data-types/timedelta64/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-types/timedelta64/README.md b/data-types/timedelta64/README.md index 10d7228..3b38282 100644 --- a/data-types/timedelta64/README.md +++ b/data-types/timedelta64/README.md @@ -18,7 +18,7 @@ Numpy represents `timedelta64` scalars with 64 bit signed integers. Negative val #### Step size The Numpy `timedelta64` data type takes a step size parameter. It must be an integer in the range `[1, 2147483647]`, i.e. `[1, 2^31 - 1]`. -While it is possible to construct a Numpy `timedelta64` data type with a step size of `0`, Numpy will not permit this data type to be associated with arrays that contain non-`NaT` scalars. Thus for arrays that contain actual durations, the smallest step size is effectively `1`. +While it is possible to construct a Numpy `timedelta64` data type with a step size of `0`, Numpy will internally normalize this to `1`. #### Unit The Numpy `timedelta64` data type takes a unit parameter, which must be one of the following temporal units: From 94422888d2a71cfc20a71a93a4880d35750f9b2e Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 6 May 2025 13:56:23 +0200 Subject: [PATCH 03/10] prose --- data-types/timedelta64/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-types/timedelta64/README.md b/data-types/timedelta64/README.md index 3b38282..ac04203 100644 --- a/data-types/timedelta64/README.md +++ b/data-types/timedelta64/README.md @@ -4,7 +4,7 @@ Defines a Zarr data type to model the `timedelta64` data type defined by Numpy. ## Background -`timedelta64` is based on a data type defined in [Numpy](https://numpy.org/). Thus this document begins by describing how `timedelta64` works in Numpy. Numpy's implementation is necessary context for making sense of the Zarr implementation. +`timedelta64` is based on a data type with the same name defined in [Numpy](https://numpy.org/). Thus this document begins by describing how `timedelta64` works in Numpy. Numpy's implementation is necessary context for making sense of the Zarr implementation. The following references to Numpy are current with version 2.2 of that library. Numpy defines a data type called `"timedelta64"` to represent signed temporal durations. These durations arise when taking a difference between moments in time. Numpy models moments in time with a related data type called `"datetime64"`. Both data types are described in the [Numpy documentation](https://numpy.org/doc/stable/reference/arrays.datetime.html), which should be considered authoritative. From 62797478639fd0f0558a6e7be219f645c52c8494 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 6 May 2025 14:07:30 +0200 Subject: [PATCH 04/10] prose --- data-types/timedelta64/README.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/data-types/timedelta64/README.md b/data-types/timedelta64/README.md index ac04203..aa4e3f4 100644 --- a/data-types/timedelta64/README.md +++ b/data-types/timedelta64/README.md @@ -1,10 +1,11 @@ # timedelta64 data type -Defines a Zarr data type to model the `timedelta64` data type defined by Numpy. +This document defines a Zarr data type to model the `timedelta64` data type from Numpy. The `timedelta64` data type represents signed temporal durations. ## Background -`timedelta64` is based on a data type with the same name defined in [Numpy](https://numpy.org/). Thus this document begins by describing how `timedelta64` works in Numpy. Numpy's implementation is necessary context for making sense of the Zarr implementation. +`timedelta64` is based on a data type with the same name defined in [Numpy](https://numpy.org/). To provide necessary context, this document first describes how `timedelta64` works in Numpy before detailing its specification in Zarr. + The following references to Numpy are current with version 2.2 of that library. Numpy defines a data type called `"timedelta64"` to represent signed temporal durations. These durations arise when taking a difference between moments in time. Numpy models moments in time with a related data type called `"datetime64"`. Both data types are described in the [Numpy documentation](https://numpy.org/doc/stable/reference/arrays.datetime.html), which should be considered authoritative. @@ -18,7 +19,7 @@ Numpy represents `timedelta64` scalars with 64 bit signed integers. Negative val #### Step size The Numpy `timedelta64` data type takes a step size parameter. It must be an integer in the range `[1, 2147483647]`, i.e. `[1, 2^31 - 1]`. -While it is possible to construct a Numpy `timedelta64` data type with a step size of `0`, Numpy will internally normalize this to `1`. +While it is possible to construct a Numpy `timedelta64` data type with a step size of `0`, Numpy will automatically normalize this to `1`. #### Unit The Numpy `timedelta64` data type takes a unit parameter, which must be one of the following temporal units: @@ -40,12 +41,12 @@ The Numpy `timedelta64` data type takes a unit parameter, which must be one of t | fs | femtosecond | | as | attosecond | -> Note: "us" and "μs" are both valid representations for microseconds. +> Note: "us" and "μs" are treated as equivalent by Numpy. > Note: Numpy permits the creation of `timedelta64` data types with an unspecified unit. In this case, the unit is set to the special value `"generic"`. #### Endianness -The Numpy `timedelta64` data type takes an byte order parameter, which must be either little-endian or big-endian. +The Numpy `timedelta64` data type takes a byte order parameter, which must be either little-endian or big-endian. ## Data type representation @@ -55,7 +56,7 @@ The name of this data type is the string `"timedelta64"`. ### Configuration -The configuration for this data type is a JSON object with the following fields: +This data type requires a configuration. The configuration for this data type is a JSON object with the following fields: | field name | type | required | notes | |------------|----------|---|---| @@ -69,7 +70,7 @@ The configuration for this data type is a JSON object with the following fields: No additional fields are permitted. ### Examples -The metadata representation of a `timedelta64` with a unit of microseconds and a scale factor of 10, equivalent to the Numpy data type `timedelta64[10us]`: +The following is an example of the metadata representation of a `timedelta64` with a unit of microseconds and a scale factor of 10. This configuration defines a data type equivalent to the Numpy data type `timedelta64[10us]`: ```json { @@ -87,7 +88,7 @@ The metadata representation of a `timedelta64` with a unit of microseconds and a - a JSON number with no fraction or exponent part that is within the range `[-2^63, 2^63 - 1]`. - the string `"NaT"`, which denotes the value `NaT`. -> Note: the `NaT` value can also be encoded as the JSON number `-9223372036854775808`, i.e. `-2 ^ 63`. +> Note: the `NaT` value MAY be encoded as the JSON number `-9223372036854775808`, i.e. `-2 ^ 63`. ## Codec compatibility From 7d29fb8c9df4eb16d9578a04f864522ba533fff4 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 6 May 2025 14:13:39 +0200 Subject: [PATCH 05/10] lint and prose and typos --- data-types/timedelta64/README.md | 36 +++++++++++++++--------------- data-types/timedelta64/schema.json | 1 - 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/data-types/timedelta64/README.md b/data-types/timedelta64/README.md index aa4e3f4..d7fbce3 100644 --- a/data-types/timedelta64/README.md +++ b/data-types/timedelta64/README.md @@ -1,28 +1,28 @@ # timedelta64 data type -This document defines a Zarr data type to model the `timedelta64` data type from Numpy. The `timedelta64` data type represents signed temporal durations. +This document defines a Zarr data type to model the `timedelta64` data type from NumPy. The `timedelta64` data type represents signed temporal durations. ## Background -`timedelta64` is based on a data type with the same name defined in [Numpy](https://numpy.org/). To provide necessary context, this document first describes how `timedelta64` works in Numpy before detailing its specification in Zarr. +`timedelta64` is based on a data type with the same name defined in [NumPy](https://NumPy.org/). To provide necessary context, this document first describes how `timedelta64` works in NumPy before detailing its specification in Zarr. -The following references to Numpy are current with version 2.2 of that library. +The following references to NumPy are based on version 2.2 of that library. -Numpy defines a data type called `"timedelta64"` to represent signed temporal durations. These durations arise when taking a difference between moments in time. Numpy models moments in time with a related data type called `"datetime64"`. Both data types are described in the [Numpy documentation](https://numpy.org/doc/stable/reference/arrays.datetime.html), which should be considered authoritative. +NumPy defines a data type called `"timedelta64"` to represent signed temporal durations. These durations arise when taking a difference between moments in time. NumPy models moments in time with a related data type called `"datetime64"`. Both data types are described in the [NumPy documentation](https://NumPy.org/doc/stable/reference/arrays.datetime.html), which should be considered authoritative. `timedelta64` data types are parametrized by a physical unit of duration, like seconds or minutes, and a positive integral step size. For example, given a `timedelta64` data type defined with a unit of seconds and a duration 10, the scalar value `1` in that data type represents a duration of 10 seconds. -Numpy represents `timedelta64` scalars with 64 bit signed integers. Negative values are permitted. The smallest 64 bit signed integer, i.e. `-2^63`, is reserved to represent a non-duration value called "Not a Time", or `NaT`. The `NaT` value serves a role similar to the "Not a Number" value used floating point data types. +NumPy represents `timedelta64` scalars with 64 bit signed integers. Negative values are permitted. The smallest 64-bit signed integer, i.e., `-2^63`, represents a non-duration value called "Not a Time", or `NaT`. The `NaT` value serves a role similar to the "Not a Number" value used floating point data types. -### Numpy data type parameters +### NumPy data type parameters #### Step size -The Numpy `timedelta64` data type takes a step size parameter. It must be an integer in the range `[1, 2147483647]`, i.e. `[1, 2^31 - 1]`. +The NumPy `timedelta64` data type takes a step size parameter. It must be an integer in the range `[1, 2147483647]`, i.e. `[1, 2^31 - 1]`. -While it is possible to construct a Numpy `timedelta64` data type with a step size of `0`, Numpy will automatically normalize this to `1`. +While it is possible to construct a NumPy `timedelta64` data type with a step size of `0`, NumPy will automatically normalize this to `1`. #### Unit -The Numpy `timedelta64` data type takes a unit parameter, which must be one of the following temporal units: +The NumPy `timedelta64` data type takes a unit parameter, which must be one of the following temporal units: | Identifier | Meaning | |------------|----------| @@ -41,12 +41,12 @@ The Numpy `timedelta64` data type takes a unit parameter, which must be one of t | fs | femtosecond | | as | attosecond | -> Note: "us" and "μs" are treated as equivalent by Numpy. +> Note: "us" and "μs" are treated as equivalent by NumPy. -> Note: Numpy permits the creation of `timedelta64` data types with an unspecified unit. In this case, the unit is set to the special value `"generic"`. +> Note: NumPy permits the creation of `timedelta64` data types with an unspecified unit. In this case, the unit is set to the special value `"generic"`. #### Endianness -The Numpy `timedelta64` data type takes a byte order parameter, which must be either little-endian or big-endian. +The NumPy `timedelta64` data type takes a byte order parameter, which must be either little-endian or big-endian. ## Data type representation @@ -63,14 +63,14 @@ This data type requires a configuration. The configuration for this data type is | `"unit"` | one of: `"Y"`, `"M"` , `"W"`, `"D"` , `"h"` , `"m"` , `"s"` , `"ms"` , `"us"` , `"μs"` , `"ns"` , `"ps"` , `"fs"` , `"as"`, `"generic"` | yes | None | | `"scale_factor"` | `integer` | yes | The number must represent an integer from the inclusive range `[1, 2147483647]` | -> Note: the Numpy `timedelta64` data type is parametrized by an endianness (little or big), but the Zarr `timedelta64` data type is not. In Zarr, the endianness of `timedelta64` arrays is determined by the configuration of the `codecs` metadata and is thus not part of the data type configuration. +> Note: the NumPy `timedelta64` data type is parametrized by an endianness (little or big), but the Zarr `timedelta64` data type is not. In Zarr, the endianness of `timedelta64` arrays is determined by the configuration of the `codecs` metadata and is thus not part of the data type configuration. -> Note: as per Numpy, `"us"` and `"μs"` are equivalent and interchangeable representations of microseconds. +> Note: as per NumPy, `"us"` and `"μs"` are equivalent and interchangeable representations of microseconds. -No additional fields are permitted. +No additional fields are permitted in the configuration. ### Examples -The following is an example of the metadata representation of a `timedelta64` with a unit of microseconds and a scale factor of 10. This configuration defines a data type equivalent to the Numpy data type `timedelta64[10us]`: +The following is an example of the metadata for a `timedelta64` data type with a unit of microseconds and a scale factor of 10. This configuration defines a data type equivalent to the NumPy data type `timedelta64[10us]`: ```json { @@ -88,8 +88,8 @@ The following is an example of the metadata representation of a `timedelta64` wi - a JSON number with no fraction or exponent part that is within the range `[-2^63, 2^63 - 1]`. - the string `"NaT"`, which denotes the value `NaT`. -> Note: the `NaT` value MAY be encoded as the JSON number `-9223372036854775808`, i.e. `-2 ^ 63`. +> Note: the `NaT` value may optionally be encoded as the JSON number `-9223372036854775808`, i.e., `-2^63`. ## Codec compatibility -This data type is compatible with any codec that supports arrays of signed 64 bit integers. +This data type is compatible with any codec that supports arrays of signed 64-bit integers. diff --git a/data-types/timedelta64/schema.json b/data-types/timedelta64/schema.json index 86cfc1f..2dd22ed 100644 --- a/data-types/timedelta64/schema.json +++ b/data-types/timedelta64/schema.json @@ -17,7 +17,6 @@ "type": "integer", "minimum": 1, "maximum": 2147483647 - } }, "required": ["unit", "scale_factor"], From 0d708def6673119d9c079050a3444972ea9192cc Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 6 May 2025 14:52:05 +0200 Subject: [PATCH 06/10] use scale factor consistently --- data-types/timedelta64/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/data-types/timedelta64/README.md b/data-types/timedelta64/README.md index d7fbce3..1104e40 100644 --- a/data-types/timedelta64/README.md +++ b/data-types/timedelta64/README.md @@ -10,16 +10,16 @@ The following references to NumPy are based on version 2.2 of that library. NumPy defines a data type called `"timedelta64"` to represent signed temporal durations. These durations arise when taking a difference between moments in time. NumPy models moments in time with a related data type called `"datetime64"`. Both data types are described in the [NumPy documentation](https://NumPy.org/doc/stable/reference/arrays.datetime.html), which should be considered authoritative. -`timedelta64` data types are parametrized by a physical unit of duration, like seconds or minutes, and a positive integral step size. For example, given a `timedelta64` data type defined with a unit of seconds and a duration 10, the scalar value `1` in that data type represents a duration of 10 seconds. +`timedelta64` data types are parametrized by a physical unit of duration, like seconds or minutes, and a positive integral scale factor. For example, given a `timedelta64` data type defined with a unit of seconds and a duration 10, the scalar value `1` in that data type represents a duration of 10 seconds. NumPy represents `timedelta64` scalars with 64 bit signed integers. Negative values are permitted. The smallest 64-bit signed integer, i.e., `-2^63`, represents a non-duration value called "Not a Time", or `NaT`. The `NaT` value serves a role similar to the "Not a Number" value used floating point data types. ### NumPy data type parameters -#### Step size -The NumPy `timedelta64` data type takes a step size parameter. It must be an integer in the range `[1, 2147483647]`, i.e. `[1, 2^31 - 1]`. +#### Scale factor +The NumPy `timedelta64` data type takes a scaling factor. It must be an integer in the range `[1, 2147483647]`, i.e. `[1, 2^31 - 1]`. -While it is possible to construct a NumPy `timedelta64` data type with a step size of `0`, NumPy will automatically normalize this to `1`. +While it is possible to construct a NumPy `timedelta64` data type with a scaling factor of `0`, NumPy will automatically normalize this to `1`. #### Unit The NumPy `timedelta64` data type takes a unit parameter, which must be one of the following temporal units: From deedf6ac1a60890f90f01be2bd3741c5e9ca93f9 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 6 May 2025 15:49:52 +0200 Subject: [PATCH 07/10] update fill value section --- data-types/timedelta64/README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/data-types/timedelta64/README.md b/data-types/timedelta64/README.md index 1104e40..b74f42f 100644 --- a/data-types/timedelta64/README.md +++ b/data-types/timedelta64/README.md @@ -84,11 +84,11 @@ The following is an example of the metadata for a `timedelta64` data type with a ## Fill value representation -`timedelta64` fill values are represented as one of: -- a JSON number with no fraction or exponent part that is within the range `[-2^63, 2^63 - 1]`. -- the string `"NaT"`, which denotes the value `NaT`. +For the `"fill_value"` field of array metadata, `timedelta64` scalars must be represented in one of two forms: +- As JSON number with no fraction or exponent part that is within the range `[-2^63, 2^63 - 1]`. +- As the string `"NaT"`, which denotes the value `NaT`. -> Note: the `NaT` value may optionally be encoded as the JSON number `-9223372036854775808`, i.e., `-2^63`. +> Note: the `NaT` value may optionally be encoded as the JSON number `-9223372036854775808`, i.e., `-2^63`. That is, `"fill_value": "NaT"` and `"fill_value": -9223372036854775808` should be treated as equivalent. ## Codec compatibility From e2676ad243827491fca3868df923989ed19c0d79 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 6 May 2025 17:20:44 +0200 Subject: [PATCH 08/10] reflow text --- data-types/timedelta64/README.md | 60 +++++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/data-types/timedelta64/README.md b/data-types/timedelta64/README.md index b74f42f..f5a6665 100644 --- a/data-types/timedelta64/README.md +++ b/data-types/timedelta64/README.md @@ -1,28 +1,44 @@ # timedelta64 data type -This document defines a Zarr data type to model the `timedelta64` data type from NumPy. The `timedelta64` data type represents signed temporal durations. +This document defines a Zarr data type to model the `timedelta64` data type from NumPy. +The `timedelta64` data type represents signed temporal durations. ## Background -`timedelta64` is based on a data type with the same name defined in [NumPy](https://NumPy.org/). To provide necessary context, this document first describes how `timedelta64` works in NumPy before detailing its specification in Zarr. +`timedelta64` is based on a data type with the same name defined in [NumPy](https://NumPy.org/). +To provide necessary context, this document first describes how `timedelta64` works in NumPy before +detailing its specification in Zarr. The following references to NumPy are based on version 2.2 of that library. -NumPy defines a data type called `"timedelta64"` to represent signed temporal durations. These durations arise when taking a difference between moments in time. NumPy models moments in time with a related data type called `"datetime64"`. Both data types are described in the [NumPy documentation](https://NumPy.org/doc/stable/reference/arrays.datetime.html), which should be considered authoritative. +NumPy defines a data type called `"timedelta64"` to represent signed temporal durations. +These durations arise when taking a difference between moments in time. +NumPy models moments in time with a related data type called `"datetime64"`. +Both data types are described in the [NumPy documentation](https://NumPy.org/doc/stable/reference/arrays.datetime.html), +which should be considered authoritative. -`timedelta64` data types are parametrized by a physical unit of duration, like seconds or minutes, and a positive integral scale factor. For example, given a `timedelta64` data type defined with a unit of seconds and a duration 10, the scalar value `1` in that data type represents a duration of 10 seconds. +`timedelta64` data types are parametrized by a physical unit of duration, like seconds or minutes, +and a positive integral scale factor. For example, given a `timedelta64` data type defined with a +unit of seconds and a duration 10, the scalar value `1` in that data type represents a duration of +10 seconds. -NumPy represents `timedelta64` scalars with 64 bit signed integers. Negative values are permitted. The smallest 64-bit signed integer, i.e., `-2^63`, represents a non-duration value called "Not a Time", or `NaT`. The `NaT` value serves a role similar to the "Not a Number" value used floating point data types. +NumPy represents `timedelta64` scalars with 64 bit signed integers. Negative values are permitted. +The smallest 64-bit signed integer, i.e., `-2^63`, represents a non-duration value called +"Not a Time", or `NaT`. The `NaT` value serves a role similar to the "Not a Number" value defined in +some floating point data types. ### NumPy data type parameters #### Scale factor -The NumPy `timedelta64` data type takes a scaling factor. It must be an integer in the range `[1, 2147483647]`, i.e. `[1, 2^31 - 1]`. +The NumPy `timedelta64` data type takes a scaling factor. It must be an integer in the range +`[1, 2147483647]`, i.e. `[1, 2^31 - 1]`. -While it is possible to construct a NumPy `timedelta64` data type with a scaling factor of `0`, NumPy will automatically normalize this to `1`. +While it is possible to construct a NumPy `timedelta64` data type with a scaling factor of `0`, +NumPy will automatically normalize this to `1`. #### Unit -The NumPy `timedelta64` data type takes a unit parameter, which must be one of the following temporal units: +The NumPy `timedelta64` data type takes a unit parameter, which must be one of the following +temporal units: | Identifier | Meaning | |------------|----------| @@ -43,10 +59,13 @@ The NumPy `timedelta64` data type takes a unit parameter, which must be one of t > Note: "us" and "μs" are treated as equivalent by NumPy. -> Note: NumPy permits the creation of `timedelta64` data types with an unspecified unit. In this case, the unit is set to the special value `"generic"`. +> Note: NumPy permits the creation of `timedelta64` data types with an unspecified unit. In this +case, the unit is set to the special value `"generic"`. #### Endianness -The NumPy `timedelta64` data type takes a byte order parameter, which must be either little-endian or big-endian. + +The NumPy `timedelta64` data type takes a byte order parameter, which must be either little-endian +or big-endian. ## Data type representation @@ -56,21 +75,27 @@ The name of this data type is the string `"timedelta64"`. ### Configuration -This data type requires a configuration. The configuration for this data type is a JSON object with the following fields: +This data type requires a configuration. The configuration for this data type is a JSON object with +the following fields: | field name | type | required | notes | |------------|----------|---|---| | `"unit"` | one of: `"Y"`, `"M"` , `"W"`, `"D"` , `"h"` , `"m"` , `"s"` , `"ms"` , `"us"` , `"μs"` , `"ns"` , `"ps"` , `"fs"` , `"as"`, `"generic"` | yes | None | | `"scale_factor"` | `integer` | yes | The number must represent an integer from the inclusive range `[1, 2147483647]` | -> Note: the NumPy `timedelta64` data type is parametrized by an endianness (little or big), but the Zarr `timedelta64` data type is not. In Zarr, the endianness of `timedelta64` arrays is determined by the configuration of the `codecs` metadata and is thus not part of the data type configuration. +> Note: the NumPy `timedelta64` data type is parametrized by an endianness (little or big), but the +Zarr `timedelta64` data type is not. In Zarr, the endianness of `timedelta64` arrays is determined +by the configuration of the `codecs` metadata and is thus not part of the data type configuration. -> Note: as per NumPy, `"us"` and `"μs"` are equivalent and interchangeable representations of microseconds. +> Note: as per NumPy, `"us"` and `"μs"` are equivalent and interchangeable representations of +microseconds. No additional fields are permitted in the configuration. ### Examples -The following is an example of the metadata for a `timedelta64` data type with a unit of microseconds and a scale factor of 10. This configuration defines a data type equivalent to the NumPy data type `timedelta64[10us]`: +The following is an example of the metadata for a `timedelta64` data type with a unit of +microseconds and a scale factor of 10. This configuration defines a data type equivalent to the +NumPy data type `timedelta64[10us]`: ```json { @@ -84,11 +109,14 @@ The following is an example of the metadata for a `timedelta64` data type with a ## Fill value representation -For the `"fill_value"` field of array metadata, `timedelta64` scalars must be represented in one of two forms: +For the `"fill_value"` field of array metadata, `timedelta64` scalars must be represented in one of +two forms: - As JSON number with no fraction or exponent part that is within the range `[-2^63, 2^63 - 1]`. - As the string `"NaT"`, which denotes the value `NaT`. -> Note: the `NaT` value may optionally be encoded as the JSON number `-9223372036854775808`, i.e., `-2^63`. That is, `"fill_value": "NaT"` and `"fill_value": -9223372036854775808` should be treated as equivalent. +> Note: the `NaT` value may optionally be encoded as the JSON number `-9223372036854775808`, i.e., +`-2^63`. That is, `"fill_value": "NaT"` and `"fill_value": -9223372036854775808` should be treated +as equivalent representations of the same scalar value (`NaT`). ## Codec compatibility From 967f4c2896f7c85ba58aba0ba8858025710dafab Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Tue, 6 May 2025 17:28:22 +0200 Subject: [PATCH 09/10] fix typo --- data-types/timedelta64/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-types/timedelta64/README.md b/data-types/timedelta64/README.md index f5a6665..be1fa7f 100644 --- a/data-types/timedelta64/README.md +++ b/data-types/timedelta64/README.md @@ -111,7 +111,7 @@ NumPy data type `timedelta64[10us]`: For the `"fill_value"` field of array metadata, `timedelta64` scalars must be represented in one of two forms: -- As JSON number with no fraction or exponent part that is within the range `[-2^63, 2^63 - 1]`. +- As a JSON number with no fraction or exponent part that is within the range `[-2^63, 2^63 - 1]`. - As the string `"NaT"`, which denotes the value `NaT`. > Note: the `NaT` value may optionally be encoded as the JSON number `-9223372036854775808`, i.e., From c8432bfb08c7195c235866e398a1d716ae117f21 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 9 May 2025 17:12:57 +0200 Subject: [PATCH 10/10] use numpy prefix --- .../README.md | 20 +++++++++---------- .../schema.json | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) rename data-types/{timedelta64 => numpy.timedelta64}/README.md (83%) rename data-types/{timedelta64 => numpy.timedelta64}/schema.json (95%) diff --git a/data-types/timedelta64/README.md b/data-types/numpy.timedelta64/README.md similarity index 83% rename from data-types/timedelta64/README.md rename to data-types/numpy.timedelta64/README.md index be1fa7f..0d4aec0 100644 --- a/data-types/timedelta64/README.md +++ b/data-types/numpy.timedelta64/README.md @@ -1,11 +1,11 @@ -# timedelta64 data type +# numpy.timedelta64 data type This document defines a Zarr data type to model the `timedelta64` data type from NumPy. The `timedelta64` data type represents signed temporal durations. ## Background -`timedelta64` is based on a data type with the same name defined in [NumPy](https://NumPy.org/). +`numpy.timedelta64` is based on a data type with the same name defined in [NumPy](https://NumPy.org/). To provide necessary context, this document first describes how `timedelta64` works in NumPy before detailing its specification in Zarr. @@ -22,7 +22,7 @@ and a positive integral scale factor. For example, given a `timedelta64` data ty unit of seconds and a duration 10, the scalar value `1` in that data type represents a duration of 10 seconds. -NumPy represents `timedelta64` scalars with 64 bit signed integers. Negative values are permitted. +NumPy represents `timedelta64` scalars with 64-bit signed integers. Negative values are permitted. The smallest 64-bit signed integer, i.e., `-2^63`, represents a non-duration value called "Not a Time", or `NaT`. The `NaT` value serves a role similar to the "Not a Number" value defined in some floating point data types. @@ -71,7 +71,7 @@ or big-endian. ### Name -The name of this data type is the string `"timedelta64"`. +The name of this data type is the string `"numpy.timedelta64"`. ### Configuration @@ -84,8 +84,8 @@ the following fields: | `"scale_factor"` | `integer` | yes | The number must represent an integer from the inclusive range `[1, 2147483647]` | > Note: the NumPy `timedelta64` data type is parametrized by an endianness (little or big), but the -Zarr `timedelta64` data type is not. In Zarr, the endianness of `timedelta64` arrays is determined -by the configuration of the `codecs` metadata and is thus not part of the data type configuration. +Zarr `numpy.timedelta64` data type is not. In Zarr, the endianness of `numpy.timedelta64` arrays is determined +by the configuration of the codecs defined in metadata and is thus not part of the data type configuration. > Note: as per NumPy, `"us"` and `"μs"` are equivalent and interchangeable representations of microseconds. @@ -93,13 +93,13 @@ microseconds. No additional fields are permitted in the configuration. ### Examples -The following is an example of the metadata for a `timedelta64` data type with a unit of +The following is an example of the metadata for a `numpy.timedelta64` data type with a unit of microseconds and a scale factor of 10. This configuration defines a data type equivalent to the NumPy data type `timedelta64[10us]`: ```json { - "name": "timedelta64", + "name": "numpy.timedelta64", "configuration": { "unit": "us", "scale_factor": 10 @@ -109,12 +109,12 @@ NumPy data type `timedelta64[10us]`: ## Fill value representation -For the `"fill_value"` field of array metadata, `timedelta64` scalars must be represented in one of +For the `"fill_value"` field of array metadata, `numpy.timedelta64` scalars must be represented in one of two forms: - As a JSON number with no fraction or exponent part that is within the range `[-2^63, 2^63 - 1]`. - As the string `"NaT"`, which denotes the value `NaT`. -> Note: the `NaT` value may optionally be encoded as the JSON number `-9223372036854775808`, i.e., +> Note: the `NaT` value may be encoded as the JSON number `-9223372036854775808`, i.e., `-2^63`. That is, `"fill_value": "NaT"` and `"fill_value": -9223372036854775808` should be treated as equivalent representations of the same scalar value (`NaT`). diff --git a/data-types/timedelta64/schema.json b/data-types/numpy.timedelta64/schema.json similarity index 95% rename from data-types/timedelta64/schema.json rename to data-types/numpy.timedelta64/schema.json index 2dd22ed..c2edcb1 100644 --- a/data-types/timedelta64/schema.json +++ b/data-types/numpy.timedelta64/schema.json @@ -4,7 +4,7 @@ "type": "object", "properties": { "name": { - "const": "timedelta64" + "const": "numpy.timedelta64" }, "configuration": { "type": "object",