diff --git a/bin/update-version.ts b/bin/update-version.ts index 9de0dc8..7635df0 100755 --- a/bin/update-version.ts +++ b/bin/update-version.ts @@ -5,7 +5,7 @@ import { execSync } from "node:child_process"; // NOTE: Merged with API version to produce the full SDK version string // https://docs.substrate.run/versioning -const SDK_VERSION = "1.0.5"; +const SDK_VERSION = "1.1.5"; const ok = (message: string) => console.log("\x1b[32m✓\x1b[0m", message); diff --git a/examples/kitchen-sink.ts b/examples/kitchen-sink.ts index a654309..2737b48 100755 --- a/examples/kitchen-sink.ts +++ b/examples/kitchen-sink.ts @@ -37,6 +37,7 @@ import { Mixtral8x7BInstruct, Llama3Instruct8B, Llama3Instruct70B, + SplitDocument, If, Box, } from "substrate"; @@ -52,6 +53,13 @@ const ALL_ENVS = [STAGING, PRODUCTION]; const VECTOR_STORE = "kitchen-sink"; const examples = [ + new SplitDocument({ + uri: "https://news.ycombinator.com/", + doc_id: "test", + metadata: { + test: "asd", + }, + }), new Box({ value: { a: 1, diff --git a/package-lock.json b/package-lock.json index 81ea0e8..7b16f64 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "substrate", - "version": "120240617.0.5", + "version": "120240617.1.5", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "substrate", - "version": "120240617.0.5", + "version": "120240617.1.5", "license": "MIT", "dependencies": { "@types/node-fetch": "^2.6.11", diff --git a/package.json b/package.json index 54ee110..63dfa72 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "substrate", - "version": "120240617.0.5", + "version": "120240617.1.5", "description": "The official SDK for the Substrate API", "repository": { "type": "git", diff --git a/src/GEN_VERSION b/src/GEN_VERSION index 08710fd..48f67c8 100644 --- a/src/GEN_VERSION +++ b/src/GEN_VERSION @@ -1 +1 @@ -20240617.20240621 \ No newline at end of file +20240617.20240718 \ No newline at end of file diff --git a/src/Nodes.ts b/src/Nodes.ts index e888936..a31ff49 100644 --- a/src/Nodes.ts +++ b/src/Nodes.ts @@ -1,7 +1,7 @@ /** * 𐃏 Substrate * @generated file - * 20240617.20240621 + * 20240617.20240718 */ import * as OpenAPI from "substrate/OpenAPI"; @@ -712,6 +712,18 @@ export class QueryVectorStoreOutResultsItem extends FutureArray { return super._result() as Promise; } } +export class SplitDocumentInMetadata extends FutureAnyObject {} +/** Document chunks */ +export class SplitDocumentOutItems extends FutureArray { + /** Returns `EmbedTextItem` at given index. */ + override at(index: number) { + return new EmbedTextItem(this._directive.next(index)); + } + /** Returns the result for `SplitDocumentOutItems` once it's node has been run. */ + protected override async _result(): Promise { + return super._result() as Promise; + } +} /** ErrorOut */ export class ErrorOut extends FutureObject { /** The type of error returned. */ @@ -722,6 +734,10 @@ export class ErrorOut extends FutureObject { get message() { return new FutureString(this._directive.next("message")); } + /** (Optional) The HTTP status code for the error. */ + get status_code() { + return new FutureNumber(this._directive.next("status_code")); + } /** returns the result for `ErrorOut` once it's node has been run. */ protected override async _result(): Promise { return super._result() as Promise; @@ -1891,6 +1907,10 @@ export class RemoveBackgroundIn extends FutureObject { get return_mask() { return new FutureBoolean(this._directive.next("return_mask")); } + /** (Optional) Invert the mask image. Only takes effect if `return_mask` is true. */ + get invert_mask() { + return new FutureBoolean(this._directive.next("invert_mask")); + } /** (Optional) Hex value background color. Transparent if unset. */ get background_color() { return new FutureString(this._directive.next("background_color")); @@ -2538,6 +2558,10 @@ export class FindOrCreateVectorStoreOut extends FutureObject { get model() { return new FutureString(this._directive.next("model")); } + /** (Optional) Number of leaves in the vector store. */ + get num_leaves() { + return new FutureNumber(this._directive.next("num_leaves")); + } /** returns the result for `FindOrCreateVectorStoreOut` once it's node has been run. */ protected override async _result(): Promise { return super._result() as Promise; @@ -2768,6 +2792,10 @@ export class QueryVectorStoreIn extends FutureObject { get ef_search() { return new FutureNumber(this._directive.next("ef_search")); } + /** (Optional) The number of leaves in the index tree to search. */ + get num_leaves_to_search() { + return new FutureNumber(this._directive.next("num_leaves_to_search")); + } /** (Optional) Include the values of the vectors in the response. */ get include_values() { return new FutureBoolean(this._directive.next("include_values")); @@ -2828,6 +2856,44 @@ export class QueryVectorStoreOut extends FutureObject { return super._result() as Promise; } } +/** SplitDocumentIn */ +export class SplitDocumentIn extends FutureObject { + /** URI of the document. */ + get uri() { + return new FutureString(this._directive.next("uri")); + } + /** (Optional) Document ID. */ + get doc_id() { + return new FutureString(this._directive.next("doc_id")); + } + /** (Optional) Document metadata. */ + get metadata() { + return new FutureAnyObject(this._directive.next("metadata")); + } + /** (Optional) Maximum number of units per chunk. Defaults to 1024 tokens for text or 40 lines for code. */ + get chunk_size() { + return new FutureNumber(this._directive.next("chunk_size")); + } + /** (Optional) Number of units to overlap between chunks. Defaults to 200 tokens for text or 15 lines for code. */ + get chunk_overlap() { + return new FutureNumber(this._directive.next("chunk_overlap")); + } + /** returns the result for `SplitDocumentIn` once it's node has been run. */ + protected override async _result(): Promise { + return super._result() as Promise; + } +} +/** SplitDocumentOut */ +export class SplitDocumentOut extends FutureObject { + /** Document chunks */ + get items() { + return new SplitDocumentOutItems(this._directive.next("items")); + } + /** returns the result for `SplitDocumentOut` once it's node has been run. */ + protected override async _result(): Promise { + return super._result() as Promise; + } +} export namespace Experimental { /** * Experimental Input @@ -4432,7 +4498,7 @@ export namespace RemoveBackground { */ export class RemoveBackground extends Node { /** - * Input arguments: `image_uri`, `return_mask` (optional), `background_color` (optional), `store` (optional) + * Input arguments: `image_uri`, `return_mask` (optional), `invert_mask` (optional), `background_color` (optional), `store` (optional) * * Output fields: `image_uri` * @@ -4750,6 +4816,73 @@ export class SegmentAnything extends Node { return super.output() as OpenAPI.components["schemas"]["SegmentAnythingOut"]; } } +export namespace SplitDocument { + /** + * SplitDocument Input + * https://www.substrate.run/nodes#SplitDocument + */ + export type Input = FutureExpandAny< + OpenAPI.components["schemas"]["SplitDocumentIn"] + >; + + /** + * SplitDocument Output + * https://www.substrate.run/nodes#SplitDocument + */ + export type Output = OpenAPI.components["schemas"]["SplitDocumentOut"]; +} + +/** + * Split document into text segments. + * + * https://www.substrate.run/nodes#SplitDocument + */ +export class SplitDocument extends Node { + /** + * Input arguments: `uri`, `doc_id` (optional), `metadata` (optional), `chunk_size` (optional), `chunk_overlap` (optional) + * + * Output fields: `items` + * + * https://www.substrate.run/nodes#SplitDocument + */ + constructor( + args: FutureExpandAny, + options?: Options, + ) { + super(args, options); + this.node = "SplitDocument"; + } + + /** + * Retrieve this node's output from a response. + * + * Output fields: `items` + * + * https://www.substrate.run/nodes#SplitDocument + */ + protected override async result(): Promise< + OpenAPI.components["schemas"]["SplitDocumentOut"] | undefined + > { + return Promise.resolve( + this._response ? this._response.get(this) : undefined, + ) as Promise; + } + + /** + * Future reference to this node's output. + * + * Output fields: `items` + * + * https://www.substrate.run/nodes#SplitDocument + */ + override get future(): SplitDocumentOut { + return new SplitDocumentOut(new Trace([], this)); + } + + protected override output(): OpenAPI.components["schemas"]["SplitDocumentOut"] { + return super.output() as OpenAPI.components["schemas"]["SplitDocumentOut"]; + } +} export namespace EmbedText { /** * EmbedText Input @@ -5180,7 +5313,7 @@ export class FindOrCreateVectorStore extends Node { /** * Input arguments: `collection_name`, `model` * - * Output fields: `collection_name`, `model` + * Output fields: `collection_name`, `model`, `num_leaves` (optional) * * https://www.substrate.run/nodes#FindOrCreateVectorStore */ @@ -5197,7 +5330,7 @@ export class FindOrCreateVectorStore extends Node { /** * Retrieve this node's output from a response. * - * Output fields: `collection_name`, `model` + * Output fields: `collection_name`, `model`, `num_leaves` (optional) * * https://www.substrate.run/nodes#FindOrCreateVectorStore */ @@ -5214,7 +5347,7 @@ export class FindOrCreateVectorStore extends Node { /** * Future reference to this node's output. * - * Output fields: `collection_name`, `model` + * Output fields: `collection_name`, `model`, `num_leaves` (optional) * * https://www.substrate.run/nodes#FindOrCreateVectorStore */ @@ -5387,7 +5520,7 @@ export namespace QueryVectorStore { */ export class QueryVectorStore extends Node { /** - * Input arguments: `collection_name`, `model`, `query_strings` (optional), `query_image_uris` (optional), `query_vectors` (optional), `query_ids` (optional), `top_k` (optional), `ef_search` (optional), `include_values` (optional), `include_metadata` (optional), `filters` (optional) + * Input arguments: `collection_name`, `model`, `query_strings` (optional), `query_image_uris` (optional), `query_vectors` (optional), `query_ids` (optional), `top_k` (optional), `ef_search` (optional), `num_leaves_to_search` (optional), `include_values` (optional), `include_metadata` (optional), `filters` (optional) * * Output fields: `results`, `collection_name` (optional), `model` (optional) * @@ -5663,6 +5796,7 @@ export type AnyNode = | UpscaleImage | SegmentUnderPoint | SegmentAnything + | SplitDocument | EmbedText | MultiEmbedText | EmbedImage @@ -5733,30 +5867,32 @@ export type NodeOutput = T extends Experimental ? OpenAPI.components["schemas"]["SegmentUnderPointOut"] : T extends SegmentAnything ? OpenAPI.components["schemas"]["SegmentAnythingOut"] - : T extends EmbedText - ? OpenAPI.components["schemas"]["EmbedTextOut"] - : T extends MultiEmbedText - ? OpenAPI.components["schemas"]["MultiEmbedTextOut"] - : T extends EmbedImage - ? OpenAPI.components["schemas"]["EmbedImageOut"] - : T extends MultiEmbedImage - ? OpenAPI.components["schemas"]["MultiEmbedImageOut"] - : T extends JinaV2 - ? OpenAPI.components["schemas"]["JinaV2Out"] - : T extends CLIP - ? OpenAPI.components["schemas"]["CLIPOut"] - : T extends FindOrCreateVectorStore - ? OpenAPI.components["schemas"]["FindOrCreateVectorStoreOut"] - : T extends ListVectorStores - ? OpenAPI.components["schemas"]["ListVectorStoresOut"] - : T extends DeleteVectorStore - ? OpenAPI.components["schemas"]["DeleteVectorStoreOut"] - : T extends QueryVectorStore - ? OpenAPI.components["schemas"]["QueryVectorStoreOut"] - : T extends FetchVectors - ? OpenAPI.components["schemas"]["FetchVectorsOut"] - : T extends UpdateVectors - ? OpenAPI.components["schemas"]["UpdateVectorsOut"] - : T extends DeleteVectors - ? OpenAPI.components["schemas"]["DeleteVectorsOut"] - : never; + : T extends SplitDocument + ? OpenAPI.components["schemas"]["SplitDocumentOut"] + : T extends EmbedText + ? OpenAPI.components["schemas"]["EmbedTextOut"] + : T extends MultiEmbedText + ? OpenAPI.components["schemas"]["MultiEmbedTextOut"] + : T extends EmbedImage + ? OpenAPI.components["schemas"]["EmbedImageOut"] + : T extends MultiEmbedImage + ? OpenAPI.components["schemas"]["MultiEmbedImageOut"] + : T extends JinaV2 + ? OpenAPI.components["schemas"]["JinaV2Out"] + : T extends CLIP + ? OpenAPI.components["schemas"]["CLIPOut"] + : T extends FindOrCreateVectorStore + ? OpenAPI.components["schemas"]["FindOrCreateVectorStoreOut"] + : T extends ListVectorStores + ? OpenAPI.components["schemas"]["ListVectorStoresOut"] + : T extends DeleteVectorStore + ? OpenAPI.components["schemas"]["DeleteVectorStoreOut"] + : T extends QueryVectorStore + ? OpenAPI.components["schemas"]["QueryVectorStoreOut"] + : T extends FetchVectors + ? OpenAPI.components["schemas"]["FetchVectorsOut"] + : T extends UpdateVectors + ? OpenAPI.components["schemas"]["UpdateVectorsOut"] + : T extends DeleteVectors + ? OpenAPI.components["schemas"]["DeleteVectorsOut"] + : never; diff --git a/src/OpenAPI.ts b/src/OpenAPI.ts index 1bf2b07..c676857 100644 --- a/src/OpenAPI.ts +++ b/src/OpenAPI.ts @@ -207,6 +207,13 @@ export interface paths { */ post: operations["SegmentAnything"]; }; + "/SplitDocument": { + /** + * SplitDocument + * @description Split document into text segments. + */ + post: operations["SplitDocument"]; + }; "/EmbedText": { /** * EmbedText @@ -313,6 +320,11 @@ export interface components { type: "api_error" | "invalid_request_error" | "dependency_error"; /** @description A message providing more details about the error. */ message: string; + /** + * @description The HTTP status code for the error. + * @default 500 + */ + status_code?: number; }; /** ExperimentalIn */ ExperimentalIn: { @@ -1222,6 +1234,11 @@ export interface components { * @default false */ return_mask?: boolean; + /** + * @description Invert the mask image. Only takes effect if `return_mask` is true. + * @default false + */ + invert_mask?: boolean; /** @description Hex value background color. Transparent if unset. */ background_color?: string; /** @description Use "hosted" to return an image URL hosted on Substrate. You can also provide a URL to a registered [file store](https://guides.substrate.run/guides/external-file-storage). If unset, the image data will be returned as a base64-encoded string. */ @@ -1758,6 +1775,8 @@ export interface components { * @enum {string} */ model: "jina-v2" | "clip"; + /** @description Number of leaves in the vector store. */ + num_leaves?: number; }; /** ListVectorStoresIn */ ListVectorStoresIn: Record; @@ -1772,6 +1791,8 @@ export interface components { * @enum {string} */ model: "jina-v2" | "clip"; + /** @description Number of leaves in the vector store. */ + num_leaves?: number; }[]; }; /** DeleteVectorStoreIn */ @@ -1915,6 +1936,11 @@ export interface components { * @default 40 */ ef_search?: number; + /** + * @description The number of leaves in the index tree to search. + * @default 40 + */ + num_leaves_to_search?: number; /** * @description Include the values of the vectors in the response. * @default false @@ -1972,6 +1998,35 @@ export interface components { */ model?: "jina-v2" | "clip"; }; + /** SplitDocumentIn */ + SplitDocumentIn: { + /** @description URI of the document. */ + uri: string; + /** @description Document ID. */ + doc_id?: string; + /** @description Document metadata. */ + metadata?: { + [key: string]: unknown; + }; + /** @description Maximum number of units per chunk. Defaults to 1024 tokens for text or 40 lines for code. */ + chunk_size?: number; + /** @description Number of units to overlap between chunks. Defaults to 200 tokens for text or 15 lines for code. */ + chunk_overlap?: number; + }; + /** SplitDocumentOut */ + SplitDocumentOut: { + /** @description Document chunks */ + items: { + /** @description Text to embed. */ + text: string; + /** @description Metadata that can be used to query the vector store. Ignored if `collection_name` is unset. */ + metadata?: { + [key: string]: unknown; + }; + /** @description Vector store document ID. Ignored if `collection_name` is unset. */ + doc_id?: string; + }[]; + }; }; responses: never; parameters: never; @@ -3429,6 +3484,11 @@ export interface operations { * @default false */ return_mask?: boolean; + /** + * @description Invert the mask image. Only takes effect if `return_mask` is true. + * @default false + */ + invert_mask?: boolean; /** @description Hex value background color. Transparent if unset. */ background_color?: string; /** @description Use "hosted" to return an image URL hosted on Substrate. You can also provide a URL to a registered [file store](https://guides.substrate.run/guides/external-file-storage). If unset, the image data will be returned as a base64-encoded string. */ @@ -3638,6 +3698,59 @@ export interface operations { }; }; }; + /** + * SplitDocument + * @description Split document into text segments. + */ + SplitDocument: { + requestBody?: { + content: { + /** + * @example { + * "doc_id": "example_pdf", + * "uri": "https://arxiv.org/pdf/2405.07945", + * "metadata": { + * "title": "GRASS II: Simulations of Potential Granulation Noise Mitigation Methods" + * } + * } + */ + "application/json": { + /** @description URI of the document. */ + uri: string; + /** @description Document ID. */ + doc_id?: string; + /** @description Document metadata. */ + metadata?: { + [key: string]: unknown; + }; + /** @description Maximum number of units per chunk. Defaults to 1024 tokens for text or 40 lines for code. */ + chunk_size?: number; + /** @description Number of units to overlap between chunks. Defaults to 200 tokens for text or 15 lines for code. */ + chunk_overlap?: number; + }; + }; + }; + responses: { + /** @description OK */ + 200: { + content: { + "application/json": { + /** @description Document chunks */ + items: { + /** @description Text to embed. */ + text: string; + /** @description Metadata that can be used to query the vector store. Ignored if `collection_name` is unset. */ + metadata?: { + [key: string]: unknown; + }; + /** @description Vector store document ID. Ignored if `collection_name` is unset. */ + doc_id?: string; + }[]; + }; + }; + }; + }; + }; /** * EmbedText * @description Generate embedding for a text document. @@ -4054,6 +4167,8 @@ export interface operations { * @enum {string} */ model: "jina-v2" | "clip"; + /** @description Number of leaves in the vector store. */ + num_leaves?: number; }; }; }; @@ -4084,6 +4199,8 @@ export interface operations { * @enum {string} */ model: "jina-v2" | "clip"; + /** @description Number of leaves in the vector store. */ + num_leaves?: number; }[]; }; }; @@ -4176,6 +4293,11 @@ export interface operations { * @default 40 */ ef_search?: number; + /** + * @description The number of leaves in the index tree to search. + * @default 40 + */ + num_leaves_to_search?: number; /** * @description Include the values of the vectors in the response. * @default false diff --git a/src/index.ts b/src/index.ts index 1c4d49d..b386b58 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,7 +1,7 @@ /** * 𐃏 Substrate TypeScript SDK * @generated file - * 20240617.20240621 + * 20240617.20240718 */ export { SubstrateError } from "substrate/Error"; @@ -34,6 +34,7 @@ export { UpscaleImage, SegmentUnderPoint, SegmentAnything, + SplitDocument, EmbedText, MultiEmbedText, EmbedImage, diff --git a/src/openapi.json b/src/openapi.json index 5e4f29b..c352d32 100644 --- a/src/openapi.json +++ b/src/openapi.json @@ -28,6 +28,11 @@ "message": { "type": "string", "description": "A message providing more details about the error." + }, + "status_code": { + "type": "integer", + "default": 500, + "description": "The HTTP status code for the error." } }, "required": ["type", "message"] @@ -97,7 +102,7 @@ "condition": { "type": "boolean", "description": "Condition.", - "x-loggable": true + "x-loggable": false }, "value_if_true": { "description": "Result when condition is true.", @@ -1614,6 +1619,11 @@ "description": "Return a mask image instead of the original content.", "default": false }, + "invert_mask": { + "type": "boolean", + "description": "Invert the mask image. Only takes effect if `return_mask` is true.", + "default": false + }, "background_color": { "type": "string", "description": "Hex value background color. Transparent if unset.", @@ -2349,6 +2359,11 @@ "type": "string", "description": "Selected embedding model.", "enum": ["jina-v2", "clip"] + }, + "num_leaves": { + "type": "integer", + "description": "Number of leaves in the vector store.", + "minimum": 1 } }, "required": ["collection_name", "model"] @@ -2626,6 +2641,14 @@ "description": "The size of the dynamic candidate list for searching the index graph.", "x-loggable": true }, + "num_leaves_to_search": { + "type": "integer", + "minimum": 1, + "maximum": 1000, + "default": 40, + "description": "The number of leaves in the index tree to search.", + "x-loggable": true + }, "include_values": { "type": "boolean", "default": false, @@ -2702,6 +2725,50 @@ } }, "required": ["results"] + }, + "SplitDocumentIn": { + "title": "SplitDocumentIn", + "type": "object", + "properties": { + "uri": { + "type": "string", + "description": "URI of the document." + }, + "doc_id": { + "type": "string", + "description": "Document ID." + }, + "metadata": { + "type": "object", + "description": "Document metadata.", + "additionalProperties": true + }, + "chunk_size": { + "type": "integer", + "minimum": 1, + "description": "Maximum number of units per chunk. Defaults to 1024 tokens for text or 40 lines for code." + }, + "chunk_overlap": { + "type": "integer", + "minimum": 0, + "description": "Number of units to overlap between chunks. Defaults to 200 tokens for text or 15 lines for code." + } + }, + "required": ["uri"] + }, + "SplitDocumentOut": { + "title": "SplitDocumentOut", + "type": "object", + "properties": { + "items": { + "type": "array", + "description": "Document chunks", + "items": { + "$ref": "#/components/schemas/EmbedTextItem" + } + } + }, + "required": ["items"] } } }, @@ -2720,7 +2787,9 @@ }, "example": { "name": "some_name", - "args": { "foo": "bar" } + "args": { + "foo": "bar" + } } } } @@ -2734,7 +2803,9 @@ "$ref": "#/components/schemas/ExperimentalOut" }, "example": { - "output": { "foo": "bar" } + "output": { + "foo": "bar" + } } } } @@ -3984,6 +4055,62 @@ } } }, + "/SplitDocument": { + "post": { + "summary": "SplitDocument", + "operationId": "SplitDocument", + "tags": ["category:embedding"], + "description": "Split document into text segments.", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SplitDocumentIn" + }, + "example": { + "doc_id": "example_pdf", + "uri": "https://arxiv.org/pdf/2405.07945", + "metadata": { + "title": "GRASS II: Simulations of Potential Granulation Noise Mitigation Methods" + } + } + } + } + }, + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SplitDocumentOut" + }, + "example": { + "items": [ + { + "text": "This is the first chunk of the pdf", + "metadata": { + "title": "GRASS II: Simulations of Potential Granulation Noise Mitigation Methods", + "chunk_id": "chk_asd897asdhnad0j8qd8qnd98" + }, + "doc_id": "example_pdf" + }, + { + "text": "This is the second chunk of the pdf", + "metadata": { + "title": "GRASS II: Simulations of Potential Granulation Noise Mitigation Methods", + "chunk_id": "chk_nvsiusd89adsy89dahd9abs8" + }, + "doc_id": "example_pdf" + } + ] + } + } + } + } + } + } + }, "/EmbedText": { "post": { "summary": "EmbedText", diff --git a/src/version.ts b/src/version.ts index 1e7027c..2a42b82 100644 --- a/src/version.ts +++ b/src/version.ts @@ -1 +1 @@ -export const VERSION = "120240617.0.5"; +export const VERSION = "120240617.1.5";