From 86c7e2ab9aee8a92c84509194348f145a2f8862a Mon Sep 17 00:00:00 2001 From: Quest Henkart Date: Mon, 15 Apr 2024 14:58:28 +0800 Subject: [PATCH 1/5] add cost saving parameters --- run.go | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/run.go b/run.go index 1f3cb7eb7..0ef31cd9a 100644 --- a/run.go +++ b/run.go @@ -28,6 +28,16 @@ type Run struct { Metadata map[string]any `json:"metadata"` Usage Usage `json:"usage,omitempty"` + Temperature *int `json:"temperature,omitempty"` + // The maximum number of prompt tokens that may be used over the course of the run. + // If the run exceeds the number of prompt tokens specified, the run will end with status 'complete' + MaxPromptTokens int `json:"max_prompt_tokens,omitempty"` + // The maximum number of completion tokens that may be used over the course of the run. + // If the run exceeds the number of completion tokens specified, the run will end with status 'complete' + MaxCompletionTokens int `json:"max_completion_tokens,omitempty"` + // ThreadTruncationStrategy defines the truncation strategy to use for the thread + TruncationStrategy *ThreadTruncationStrategy `json:"truncation_strategy,omitempty"` + httpHeader } @@ -78,8 +88,41 @@ type RunRequest struct { AdditionalInstructions string `json:"additional_instructions,omitempty"` Tools []Tool `json:"tools,omitempty"` Metadata map[string]any `json:"metadata,omitempty"` + + // What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. + Temperature *int `json:"temperature,omitempty"` + + // The maximum number of prompt tokens that may be used over the course of the run. + // If the run exceeds the number of prompt tokens specified, the run will end with status 'complete' + MaxPromptTokens int `json:"max_prompt_tokens,omitempty"` + + // The maximum number of completion tokens that may be used over the course of the run. + // If the run exceeds the number of completion tokens specified, the run will end with status 'complete' + MaxCompletionTokens int `json:"max_completion_tokens,omitempty"` + + // ThreadTruncationStrategy defines the truncation strategy to use for the thread + TruncationStrategy *ThreadTruncationStrategy `json:"truncation_strategy,omitempty"` } +// ThreadTruncationStrategy defines the truncation strategy to use for the thread +// https://platform.openai.com/docs/assistants/how-it-works/truncation-strategy +type ThreadTruncationStrategy struct { + // default 'auto' + Type TruncationStrategy `json:"type,omitempty"` + // this field should be set if the truncation strategy is set to LastMessages + LastMessages *int `json:"last_messages,omitempty"` +} + +// TruncationStrategy defines the existing truncation strategies existing for thread management in an assistant +type TruncationStrategy string + +const ( + // TruncationStrategyAuto messages in the middle of the thread will be dropped to fit the context length of the model + TruncationStrategyAuto = TruncationStrategy("auto") + // TruncationStrategyLastMessages the thread will be truncated to the n most recent messages in the thread + TruncationStrategyLastMessages = TruncationStrategy("last_messages") +) + type RunModifyRequest struct { Metadata map[string]any `json:"metadata,omitempty"` } From ad1009378d9cf04fead960c6afa034e986438c16 Mon Sep 17 00:00:00 2001 From: Quest Henkart Date: Mon, 15 Apr 2024 15:10:10 +0800 Subject: [PATCH 2/5] add periods at the end of comments --- run.go | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/run.go b/run.go index 0ef31cd9a..1ea379ddf 100644 --- a/run.go +++ b/run.go @@ -30,12 +30,12 @@ type Run struct { Temperature *int `json:"temperature,omitempty"` // The maximum number of prompt tokens that may be used over the course of the run. - // If the run exceeds the number of prompt tokens specified, the run will end with status 'complete' + // If the run exceeds the number of prompt tokens specified, the run will end with status 'complete'. MaxPromptTokens int `json:"max_prompt_tokens,omitempty"` // The maximum number of completion tokens that may be used over the course of the run. - // If the run exceeds the number of completion tokens specified, the run will end with status 'complete' + // If the run exceeds the number of completion tokens specified, the run will end with status 'complete'. MaxCompletionTokens int `json:"max_completion_tokens,omitempty"` - // ThreadTruncationStrategy defines the truncation strategy to use for the thread + // ThreadTruncationStrategy defines the truncation strategy to use for the thread. TruncationStrategy *ThreadTruncationStrategy `json:"truncation_strategy,omitempty"` httpHeader @@ -93,33 +93,33 @@ type RunRequest struct { Temperature *int `json:"temperature,omitempty"` // The maximum number of prompt tokens that may be used over the course of the run. - // If the run exceeds the number of prompt tokens specified, the run will end with status 'complete' + // If the run exceeds the number of prompt tokens specified, the run will end with status 'complete'. MaxPromptTokens int `json:"max_prompt_tokens,omitempty"` // The maximum number of completion tokens that may be used over the course of the run. - // If the run exceeds the number of completion tokens specified, the run will end with status 'complete' + // If the run exceeds the number of completion tokens specified, the run will end with status 'complete'. MaxCompletionTokens int `json:"max_completion_tokens,omitempty"` - // ThreadTruncationStrategy defines the truncation strategy to use for the thread + // ThreadTruncationStrategy defines the truncation strategy to use for the thread. TruncationStrategy *ThreadTruncationStrategy `json:"truncation_strategy,omitempty"` } -// ThreadTruncationStrategy defines the truncation strategy to use for the thread -// https://platform.openai.com/docs/assistants/how-it-works/truncation-strategy +// ThreadTruncationStrategy defines the truncation strategy to use for the thread. +// https://platform.openai.com/docs/assistants/how-it-works/truncation-strategy. type ThreadTruncationStrategy struct { - // default 'auto' + // default 'auto'. Type TruncationStrategy `json:"type,omitempty"` - // this field should be set if the truncation strategy is set to LastMessages + // this field should be set if the truncation strategy is set to LastMessages. LastMessages *int `json:"last_messages,omitempty"` } -// TruncationStrategy defines the existing truncation strategies existing for thread management in an assistant +// TruncationStrategy defines the existing truncation strategies existing for thread management in an assistant. type TruncationStrategy string const ( - // TruncationStrategyAuto messages in the middle of the thread will be dropped to fit the context length of the model + // TruncationStrategyAuto messages in the middle of the thread will be dropped to fit the context length of the model. TruncationStrategyAuto = TruncationStrategy("auto") - // TruncationStrategyLastMessages the thread will be truncated to the n most recent messages in the thread + // TruncationStrategyLastMessages the thread will be truncated to the n most recent messages in the thread. TruncationStrategyLastMessages = TruncationStrategy("last_messages") ) From fc16a639690d8003ebeca5e3325c4f6bf87b7620 Mon Sep 17 00:00:00 2001 From: Quest Henkart Date: Mon, 15 Apr 2024 15:29:36 +0800 Subject: [PATCH 3/5] shorten commnet --- run.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run.go b/run.go index 1ea379ddf..6db063088 100644 --- a/run.go +++ b/run.go @@ -89,7 +89,7 @@ type RunRequest struct { Tools []Tool `json:"tools,omitempty"` Metadata map[string]any `json:"metadata,omitempty"` - // What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. + // Sampling temperature between 0 and 2. Higher values like 0.8 are more random, lower values are more focused and deterministic. Temperature *int `json:"temperature,omitempty"` // The maximum number of prompt tokens that may be used over the course of the run. From 77726f4111ca005428ec25b39a723b8e606ee2c1 Mon Sep 17 00:00:00 2001 From: Quest Henkart Date: Mon, 15 Apr 2024 15:41:48 +0800 Subject: [PATCH 4/5] further lower comment length --- run.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/run.go b/run.go index 6db063088..6b596ed84 100644 --- a/run.go +++ b/run.go @@ -89,7 +89,8 @@ type RunRequest struct { Tools []Tool `json:"tools,omitempty"` Metadata map[string]any `json:"metadata,omitempty"` - // Sampling temperature between 0 and 2. Higher values like 0.8 are more random, lower values are more focused and deterministic. + // Sampling temperature between 0 and 2. Higher values like 0.8 are more random. + // lower values are more focused and deterministic. Temperature *int `json:"temperature,omitempty"` // The maximum number of prompt tokens that may be used over the course of the run. From 6b5c472d962d0faaf840acd4f0886c0d4b2b7992 Mon Sep 17 00:00:00 2001 From: Quest Henkart Date: Tue, 16 Apr 2024 16:52:33 +0800 Subject: [PATCH 5/5] fix type --- run.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/run.go b/run.go index 6b596ed84..7c14779c5 100644 --- a/run.go +++ b/run.go @@ -28,7 +28,7 @@ type Run struct { Metadata map[string]any `json:"metadata"` Usage Usage `json:"usage,omitempty"` - Temperature *int `json:"temperature,omitempty"` + Temperature *float32 `json:"temperature,omitempty"` // The maximum number of prompt tokens that may be used over the course of the run. // If the run exceeds the number of prompt tokens specified, the run will end with status 'complete'. MaxPromptTokens int `json:"max_prompt_tokens,omitempty"` @@ -91,7 +91,7 @@ type RunRequest struct { // Sampling temperature between 0 and 2. Higher values like 0.8 are more random. // lower values are more focused and deterministic. - Temperature *int `json:"temperature,omitempty"` + Temperature *float32 `json:"temperature,omitempty"` // The maximum number of prompt tokens that may be used over the course of the run. // If the run exceeds the number of prompt tokens specified, the run will end with status 'complete'.