From a455d3a125571250323edccea5c505b0d31c850b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 21 Aug 2024 16:05:18 +0000 Subject: [PATCH 1/9] Update from https://github.com/glideapps/glide/commit/e5ba6c5153f53d35a52acad6eee6b65ad3328491 --- openapi/swagger.json | 105 ++++++++----------------------------------- 1 file changed, 18 insertions(+), 87 deletions(-) diff --git a/openapi/swagger.json b/openapi/swagger.json index 32a81d3..1adb1dd 100644 --- a/openapi/swagger.json +++ b/openapi/swagger.json @@ -367,7 +367,8 @@ "properties": { "$stashID": { "type": "string", - "description": "ID of the stash whose data should be used", + "pattern": "^[a-zA-Z0-9][a-zA-Z0-9_-]{0,255}$", + "description": "ID of the stash, e.g., `20240215-job32`", "example": "20240215-job32" } }, @@ -695,7 +696,8 @@ "properties": { "$stashID": { "type": "string", - "description": "ID of the stash whose data should be used", + "pattern": "^[a-zA-Z0-9][a-zA-Z0-9_-]{0,255}$", + "description": "ID of the stash, e.g., `20240215-job32`", "example": "20240215-job32" } }, @@ -924,7 +926,8 @@ "properties": { "$stashID": { "type": "string", - "description": "ID of the stash whose data should be used", + "pattern": "^[a-zA-Z0-9][a-zA-Z0-9_-]{0,255}$", + "description": "ID of the stash, e.g., `20240215-job32`", "example": "20240215-job32" } }, @@ -941,87 +944,11 @@ } } }, - "/stashes": { - "post": { - "responses": { - "200": { - "description": "", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "data": { - "type": "object", - "properties": { - "stashID": { - "type": "string", - "description": "The newly created stash" - } - }, - "required": [ - "stashID" - ], - "additionalProperties": false - } - }, - "required": [ - "data" - ], - "additionalProperties": false - } - } - } - }, - "400": { - "description": "", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "error": { - "type": "object", - "properties": { - "type": { - "type": "string" - }, - "message": { - "type": "string" - } - }, - "required": [ - "type", - "message" - ], - "additionalProperties": false - } - }, - "required": [ - "error" - ], - "additionalProperties": false - } - } - } - } - }, - "operationId": "Create stash", - "requestBody": { - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": {}, - "additionalProperties": false - } - } - } - } - } - }, "/stashes/{stashID}/{serial}": { "post": { + "responses": {} + }, + "put": { "responses": { "200": { "description": "", @@ -1069,14 +996,15 @@ } } }, - "description": "Adds data to a stash", + "description": "Sets the content of a chunk of data inside a stash", "parameters": [ { "name": "stashID", "in": "path", "schema": { "type": "string", - "description": "ID of the stash to add data to, e.g., `20240215-job32`", + "pattern": "^[a-zA-Z0-9][a-zA-Z0-9_-]{0,255}$", + "description": "ID of the stash. The stash will be created if it doesn't already exist.", "example": "20240215-job32" }, "required": true @@ -1086,7 +1014,9 @@ "in": "path", "schema": { "type": "string", - "description": "Serial identifier of the chunk of data to add to the stash. Chunks will be assembled in the sort order of their serials, so utilize ordered identifiers for each chunk if a specific ordering of data is required, e.g., `1`, `2`, etc...\nIf the order of data is not important, random, but unique, values can be used, e.g., `c2a4567`." + "pattern": "^[a-zA-Z0-9][a-zA-Z0-9_-]{0,255}$", + "description": "Serial identifier of the chunk of data to set in the stash. If a chunk has already been sent with the same serial, its data will be overwritten. Chunks will be assembled in the sort order of their serials, so utilize ordered identifiers for each chunk if a specific ordering of data in the stash is desired, e.g., `1`, `2`, etc...\nIf the order of data is not important, random, but unique, values can be used, e.g., `c2a4567`.", + "example": "1" }, "required": true } @@ -1170,14 +1100,15 @@ } } }, - "description": "Deletes a stash and its data", + "description": "Deletes a stash and all its data", "parameters": [ { "name": "stashID", "in": "path", "schema": { "type": "string", - "description": "ID of the stash to delete, e.g., `20240215-job32`", + "pattern": "^[a-zA-Z0-9][a-zA-Z0-9_-]{0,255}$", + "description": "ID of the stash, e.g., `20240215-job32`", "example": "20240215-job32" }, "required": true From e2eece271e393265e09810b3aa4cdd5b926c34a1 Mon Sep 17 00:00:00 2001 From: Alex Corrado Date: Wed, 21 Aug 2024 22:09:55 +0100 Subject: [PATCH 2/9] Revert "Add note about stashID UUID format requirement" This reverts commit 6d10067e82686460dd49e2918008c196e84a0ab6. --- api-reference/v2/stashing/post-stashes-serial.mdx | 4 ---- api-reference/v2/tutorials/bulk-import.mdx | 4 ---- 2 files changed, 8 deletions(-) diff --git a/api-reference/v2/stashing/post-stashes-serial.mdx b/api-reference/v2/stashing/post-stashes-serial.mdx index f6b1839..a8f4095 100644 --- a/api-reference/v2/stashing/post-stashes-serial.mdx +++ b/api-reference/v2/stashing/post-stashes-serial.mdx @@ -8,7 +8,3 @@ When working with large datasets it is necessary to break it into smaller chunks To understand what stashing is and how to use it to work with large datasets, please see our [introduction to stashing](/api-reference/v2/stashing/introduction). - - - The stashID must be in the format of a UUID, e.g., `"123e4567-e89b-12d3-a456-426655440000"`. This is a known issue and will be fixed in a future release. - \ No newline at end of file diff --git a/api-reference/v2/tutorials/bulk-import.mdx b/api-reference/v2/tutorials/bulk-import.mdx index 8d1a8c8..4b5ce82 100644 --- a/api-reference/v2/tutorials/bulk-import.mdx +++ b/api-reference/v2/tutorials/bulk-import.mdx @@ -32,10 +32,6 @@ To simplify the coordination, parallelization, and idempotency of the upload pro For instance, a daily import process might have a stash ID of `20240501-import`. Or, an import specific to a single customer might have a stash ID of `customer-381-import`. - - The stashID must be in the format of a UUID, e.g., `"123e4567-e89b-12d3-a456-426655440000"`. This is a known issue and will be fixed in a future release. - - You are responsible for ensuring that the stash ID is unique and stable across associated uploads. ## Upload Data From 1d78008a93dbdff5f0583aedf65ec186ebe780a6 Mon Sep 17 00:00:00 2001 From: Alex Corrado Date: Wed, 21 Aug 2024 23:29:49 +0100 Subject: [PATCH 3/9] Update stashing docs --- api-reference/v2/stashing/introduction.mdx | 18 +++++++++--------- .../v2/stashing/post-stashes-serial.mdx | 10 ---------- .../v2/stashing/put-stashes-serial.mdx | 10 ++++++++++ 3 files changed, 19 insertions(+), 19 deletions(-) delete mode 100644 api-reference/v2/stashing/post-stashes-serial.mdx create mode 100644 api-reference/v2/stashing/put-stashes-serial.mdx diff --git a/api-reference/v2/stashing/introduction.mdx b/api-reference/v2/stashing/introduction.mdx index 0ee922b..ffecf0b 100644 --- a/api-reference/v2/stashing/introduction.mdx +++ b/api-reference/v2/stashing/introduction.mdx @@ -3,11 +3,11 @@ title: Introduction description: Stashing large datasets for use with the Glide API --- -When working with large datasets it is necessary to break it into smaller chunks for performance and reliability. We call this process "stashing". +When using large datasets with the Glide API, it may be necessary to break them into smaller chunks for performance and reliability. We call this process "stashing." ## What is Stashing? -Stashing is the process by which a large dataset is broken into smaller subsets for uploading to Glide. Each subset is uploaded to Glide independently (either sequentially or in parallel) to form the complete dataset. +Stashing is the process by which a large dataset is broken into smaller chunks for uploading to Glide. Each chunk is uploaded to Glide independently (either sequentially or in parallel) to form the complete dataset. Once all data has been uploaded to the stash, the stash can then be referenced in other API calls to refer to the full dataset. This eliminates the need to include the entire dataset in the request itself, which may not be feasible due to its size. @@ -15,22 +15,22 @@ Once all data has been uploaded to the stash, the stash can then be referenced i You should use stashing when: -* You have a large dataset that you want to upload to Glide. Anything larger than 5mb should be broken up into smaller subsets and stashed. +* You have a large dataset that you want to upload to Glide. Anything larger than 5mb should be broken up into smaller chunks and stashed. * You want to perform an atomic operation using a large dataset. For example, you may want to perform an import of data into an existing table but don't want users to see the intermediate state of the import or incremental updates while they're using their application. -## Core Concepts +## Stash IDs and Serials -The main components of a stash are its ID and the individual chunked data subsets which are identified by a serial. Both the id and serial are values you define. +The main components of a stash are its ID and the individual chunked data subsets, which are identified by serials. Both the ID and serial are values you define. -The stash ID is a unique identifier for the stash that you define from the relevant information of your domain. This is often a combination of temporal information and a domain identifier. For instance: `20240215-job32` or `2024-07-05T15:17:50Z-customer93ak`. +The **stash ID** is a unique identifier for the stash that you define. You might use information that's relevant to your domain, such as `20240215-job32` or `2024-07-05T15-17-50Z_customer93ak`, a UUID, or any other unique identifier. -Each subset of data that is uploaded to the stash is identified by a serial. If the order of each data subset is important to the overall datset, you should use the serial to represent the order of loading (e.g., `1`, `2`, etc...). +Each chunk of data that is uploaded to the stash is identified by a unique **serial**. The sort order of the serials indicates the order of the chunks in the overall datset. If all serials can be parsed as integers, numerical sort order is used, otherwise sorting is done lexicographically according to each character's Unicode code point value. If the order of data chunks in the stash is important, using integers as serials (e.g., `1`, `2`, etc...) is recommended. -If the order of each data subset is not important, then you can use a random serial for each subset like a UUID: `123e4567-e89b-12d3-a456-426655440000`. The only requirement is that the serial must be unique for each subset. +The maximum length for both stash IDs and serials is 256 characters. They may only contain letters, numbers, hyphens, and underscores, and must start with a letter or number. ## Referencing a Stash -Once a stash, and all its parts, has been uploaded you can use the stash ID in other API calls to refer to the full dataset instead of including the entire dataset itself. Think of it as a reference to all the data in the stash. +Once all chunks have been uploaded to a stash, you can use the stash ID in place of passing the full dataset inline to other Glide API calls. Think of it as a reference to all the data in the stash. For instance, instead of including all the row data in a request to create a table, you can instead reference the stash ID: diff --git a/api-reference/v2/stashing/post-stashes-serial.mdx b/api-reference/v2/stashing/post-stashes-serial.mdx deleted file mode 100644 index a8f4095..0000000 --- a/api-reference/v2/stashing/post-stashes-serial.mdx +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: Stash Data -openapi: post /stashes/{stashID}/{serial} ---- - -When working with large datasets it is necessary to break it into smaller chunks for performance and reliability. We call this process "stashing". - - - To understand what stashing is and how to use it to work with large datasets, please see our [introduction to stashing](/api-reference/v2/stashing/introduction). - diff --git a/api-reference/v2/stashing/put-stashes-serial.mdx b/api-reference/v2/stashing/put-stashes-serial.mdx new file mode 100644 index 0000000..e1f1077 --- /dev/null +++ b/api-reference/v2/stashing/put-stashes-serial.mdx @@ -0,0 +1,10 @@ +--- +title: Stash Data +openapi: put /stashes/{stashID}/{serial} +--- + +When using large datasets with the Glide API, it may be necessary to break them into smaller chunks for performance and reliability. We call this process "stashing." + + + To learn more about stashing and how to use it to work with large datasets, please see our [introduction to stashing](/api-reference/v2/stashing/introduction). + From 42160796568d9e05822812ca812c8004506f0cb0 Mon Sep 17 00:00:00 2001 From: Alex Corrado Date: Thu, 22 Aug 2024 01:35:19 +0100 Subject: [PATCH 4/9] Empty commit to bump GH actions From ac3b9bc5031613cdfcfbf644ba17320d58a44459 Mon Sep 17 00:00:00 2001 From: Alex Corrado Date: Thu, 22 Aug 2024 01:38:27 +0100 Subject: [PATCH 5/9] Update mint.json --- mint.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mint.json b/mint.json index a4f245c..a0b445a 100644 --- a/mint.json +++ b/mint.json @@ -43,7 +43,7 @@ "group": "Stashing", "pages": [ "api-reference/v2/stashing/introduction", - "api-reference/v2/stashing/post-stashes-serial", + "api-reference/v2/stashing/put-stashes-serial", "api-reference/v2/stashing/delete-stash" ] }, From c12bab24a8af5dbb0a62ae763e26e101f9fc2f2b Mon Sep 17 00:00:00 2001 From: Alex Corrado Date: Thu, 22 Aug 2024 14:41:07 +0100 Subject: [PATCH 6/9] Update changelog --- api-reference/v2/resources/changelog.mdx | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/api-reference/v2/resources/changelog.mdx b/api-reference/v2/resources/changelog.mdx index 25cee8c..351d8ff 100644 --- a/api-reference/v2/resources/changelog.mdx +++ b/api-reference/v2/resources/changelog.mdx @@ -2,6 +2,12 @@ title: Glide API Changelog sidebarTitle: Changelog --- +### August 22, 2024 + +- Users should now use the PUT method instead of POST for `/stashes/{stashID}/{serial}` to set the content of a chunk in a stash. +- Clarified that if a chunk is uploaded with an existing serial, its data will be overwritten. +- Documented the new format requirements for `stashID` and `serial`. + ### August 2, 2024 - Add [delete stash](/api-reference/v2/stashing/delete-stash) endpoint documentation From c556c47db55a1df25322582f3f9c431af7447bd8 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 22 Aug 2024 14:38:22 +0000 Subject: [PATCH 7/9] Update from https://github.com/glideapps/glide/commit/eab4000ec50143d718c02460ecb40040b555b332 --- openapi/swagger.json | 3 --- 1 file changed, 3 deletions(-) diff --git a/openapi/swagger.json b/openapi/swagger.json index 1adb1dd..d1afa08 100644 --- a/openapi/swagger.json +++ b/openapi/swagger.json @@ -945,9 +945,6 @@ } }, "/stashes/{stashID}/{serial}": { - "post": { - "responses": {} - }, "put": { "responses": { "200": { From c67f4d5702e9ddfc681ac92d649793e725143f98 Mon Sep 17 00:00:00 2001 From: Alex Corrado Date: Thu, 22 Aug 2024 14:51:16 +0000 Subject: [PATCH 8/9] Clarify that stashes are deleted even if you do not call the delete endpoint --- api-reference/v2/stashing/delete-stash.mdx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/api-reference/v2/stashing/delete-stash.mdx b/api-reference/v2/stashing/delete-stash.mdx index 9202fae..2867d68 100644 --- a/api-reference/v2/stashing/delete-stash.mdx +++ b/api-reference/v2/stashing/delete-stash.mdx @@ -3,7 +3,9 @@ title: Delete Stash openapi: delete /stashes/{stashID} --- -If you no longer need a stash, you can delete it. This will remove the stash and all the data it contains. Stashes are automatically deleted within 48 hours of creation. +If you no longer need a stash, you can delete it. This will remove the stash and all the data it contains. + +Even if you do not call this endpoint, all stashes are automatically deleted within 48 hours after they are created. To understand what stashing is and how to use it to work with large datasets, please see our [introduction to stashing](/api-reference/v2/stashing/introduction). From 37b915bca5ac69c5e371ca8b90abea3ac9d4c04b Mon Sep 17 00:00:00 2001 From: Alex Corrado Date: Thu, 22 Aug 2024 16:02:25 +0100 Subject: [PATCH 9/9] Document invalid Stash ID or serial error --- api-reference/v2/general/errors.mdx | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/api-reference/v2/general/errors.mdx b/api-reference/v2/general/errors.mdx index 65638b8..00cc789 100644 --- a/api-reference/v2/general/errors.mdx +++ b/api-reference/v2/general/errors.mdx @@ -16,7 +16,7 @@ All error responses will follow the following format with a top-level `error` ob } ``` -## Invalid Auth Token +### Invalid Auth Token Using an auth token that does not exist or is incorrect will result in a `404` response status. @@ -33,4 +33,23 @@ curl --request GET \ "message": "API key not found, or duplicate IN****ID" } } -``` \ No newline at end of file +``` + +### Invalid Stash ID or Serial + +Using a stash ID or serial that does not [meet the requirements](../stashing/introduction#stash-ids-and-serials) will result in a `400` response status. + +```bash +curl --request PUT \ + --url https://api.glideapps.com/stashes/-INVALID-/1 \ + --header 'Authorization: Bearer VALID-API-KEY' +``` + +```json +{ + "error": { + "type": "request_validation_error", + "message": "Invalid request params: Stash ID must be 256 characters max, alphanumeric with dashes and underscores, no leading dash or underscore" + } +} + ``` \ No newline at end of file