mutablelogic
diff --git a/‎README.md
Lines changed: 56 additions & 3 deletions b/‎README.md
Lines changed: 56 additions & 3 deletions
diff --git a/‎opt.go
Lines changed: 60 additions & 1 deletion b/‎opt.go
Lines changed: 60 additions & 1 deletion
diff --git a/‎pkg/anthropic/opt.go
Lines changed: 0 additions & 14 deletions b/‎pkg/anthropic/opt.go
Lines changed: 0 additions & 14 deletions
diff --git a/‎pkg/mistral/chat_completion.go
Lines changed: 116 additions & 0 deletions b/‎pkg/mistral/chat_completion.go
Lines changed: 116 additions & 0 deletions
@@ -1,13 +1,14 @@
 # go-llm
 
 Large Language Model API interface. This is a simple API interface for large language models
-which run on [Ollama](https://github.com/ollama/ollama/blob/main/docs/api.md)
-and [Anthopic](https://docs.anthropic.com/en/api/getting-started).
+which run on [Ollama](https://github.com/ollama/ollama/blob/main/docs/api.md),
+[Anthopic](https://docs.anthropic.com/en/api/getting-started) and [Mistral](https://docs.mistral.ai/).
 
 The module includes the ability to utilize:
 
 * Maintaining a session of messages
 * Tool calling support
+* Creating embeddings from text
 * Streaming responses
 
 There is a command-line tool included in the module which can be used to interact with the API.
@@ -28,7 +29,12 @@ docker run \
 ## Programmatic Usage
 
 See the documentation [here](https://pkg.go.dev/github.com/mutablelogic/go-llm)
-for integration into your own Go programs. To create an
+for integration into your own Go programs.
+
+### Agent Instantiation
+
+For each LLM provider, you create an agent which can be used to interact with the API.
+To create an
 [Ollama](https://pkg.go.dev/github.com/mutablelogic/go-llm/pkg/anthropic)
 agent,
 
@@ -66,6 +72,25 @@ func main() {
 }
 ```
 
+For Mistral models, you can use:
+
+```go
+import (
+  "github.com/mutablelogic/go-llm/pkg/mistral"
+)
+
+func main() {
+  // Create a new agent
+  agent, err := mistral.New(os.Getev("MISTRAL_API_KEY"))
+  if err != nil {
+    panic(err)
+  }
+  // ...
+}
+```
+
+### Chat Sessions
+
 You create a **chat session** with a model as follows,
 
 ```go
@@ -90,6 +115,34 @@ func session(ctx context.Context, agent llm.Agent) error {
 }
 ```
 
+## Options
+
+You can add options to sessions, or to prompts. Different providers and models support
+different options.
+
+| Option | Ollama | Anthropic | Mistral | OpenAI | Description |
+|--------|--------|-----------|---------|--------|-------------|
+| `llm.WithTemperature(float64)` | Yes | Yes | Yes | - | What sampling temperature to use, between 0.0 and 1.0. Higher values like 0.7 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. |
+| `llm.WithTopP(float64)` | Yes | Yes | Yes | - | Nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. |
+| `llm.WithTopK(uint64)` | Yes | Yes | No | - | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. |
+| `llm.WithMaxTokens(uint64)` | - | Yes | Yes | - | The maximum number of tokens to generate in the response. |
+| `llm.WithStream(func(llm.ContextContent))` | Can be enabled when tools are not used | Yes | Yes | - | Stream the response to a function. |
+| `llm.WithToolChoice(string, string, ...)` | No | Yes | Use `auto`, `any`, `none`, `required` or a function name. Only the first argument is used. | - | The tool to use for the model. |
+| `llm.WithToolKit(llm.ToolKit)` | Cannot be combined with streaming | Yes | Yes | - | The set of tools to use. |
+| `llm.WithStopSequence(string, string, ...)` | Yes | Yes | Yes | - | Stop generation if one of these tokens is detected. |
+| `llm.WithSystemPrompt(string)` | No | Yes | Yes | - | Set the system prompt for the model. |
+| `llm.WithSeed(uint64)` | No | Yes | Yes | - | The seed to use for random sampling. If set, different calls will generate deterministic results. |
+| `llm.WithFormat(string)` | No | Yes | Use `json_format` or `text` | - | The format of the response. For Mistral, you must also instruct the model to produce JSON yourself with a system or a user message. |
+| `mistral.WithPresencePenalty(float64)` | - | - | Yes | - | Determines how much the model penalizes the repetition of words or phrases. A higher presence penalty encourages the model to use a wider variety of words and phrases, making the output more diverse and creative. |
+| `mistral.WithFequencyPenalty(float64)` | - | - | Yes | - | Penalizes the repetition of words based on their frequency in the generated text. A higher frequency penalty discourages the model from repeating words that have already appeared frequently in the output, promoting diversity and reducing repetition. |
+| `mistral.WithPrediction(string)` | - | - | Yes | - | Enable users to specify expected results, optimizing response times by leveraging known or predictable content. This approach is especially effective for updating text documents or code files with minimal changes, reducing latency while maintaining high-quality results. |
+| `llm.WithSafePrompt()` | - | - | Yes | - | Whether to inject a safety prompt before all conversations. |
+| `llm.WithNumCompletions(uint64)` | - | - | Yes | - | Number of completions to return for each request. |
+| `llm.WithAttachment(io.Reader)` | Yes | Yes | Yes | - | Attach a file to a user prompt. It is the responsibility of the caller to close the reader. |
+| `antropic.WithEphemeral()` | No | Yes | No | - | Attachments should be cached server-side |
+| `antropic.WithCitations()` | No | Yes | No | - | Attachments should be used in citations |
+| `antropic.WithUser(string)` | No | Yes | No | - | Indicate the user name for the request, for debugging |
+
 ## Contributing & Distribution
 
 *This module is currently in development and subject to change*. Please do file
 
@@ -216,17 +216,76 @@ func WithTopP(v float64) Opt {
 
 // Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more
 // diverse answers, while a lower value (e.g. 10) will be more conservative.
-func WithTopK(v uint) Opt {
+func WithTopK(v uint64) Opt {
 	return func(o *Opts) error {
 		o.Set("top_k", v)
 		return nil
 	}
 }
 
+// The maximum number of tokens to generate in the completion.
+func WithMaxTokens(v uint64) Opt {
+	return func(o *Opts) error {
+		o.Set("max_tokens", v)
+		return nil
+	}
+}
+
 // Set system prompt
 func WithSystemPrompt(v string) Opt {
 	return func(o *Opts) error {
 		o.system = v
 		return nil
 	}
 }
+
+// Set stop sequence
+func WithStopSequence(v ...string) Opt {
+	return func(o *Opts) error {
+		o.Set("stop", v)
+		return nil
+	}
+}
+
+// Set random seed for deterministic behavior
+func WithSeed(v uint64) Opt {
+	return func(o *Opts) error {
+		o.Set("seed", v)
+		return nil
+	}
+}
+
+// Set format
+func WithFormat(v any) Opt {
+	return func(o *Opts) error {
+		o.Set("format", v)
+		return nil
+	}
+}
+
+// Set tool choices: can be auto, none, required, any or a list of tool names
+func WithToolChoice(v ...string) Opt {
+	return func(o *Opts) error {
+		o.Set("tool_choice", v)
+		return nil
+	}
+}
+
+// Number of completions to return for each request
+func WithNumCompletions(v uint64) Opt {
+	return func(o *Opts) error {
+		if v < 1 || v > 8 {
+			return ErrBadParameter.With("num_completions must be between 1 and 8")
+		}
+		o.Set("num_completions", v)
+		return nil
+	}
+}
+
+// Inject a safety prompt before all conversations.
+func WithSafePrompt() Opt {
+	return func(o *Opts) error {
+		o.Set("safe_prompt", true)
+		return nil
+	}
+}
@@ -17,27 +17,13 @@ type optmetadata struct {
 ////////////////////////////////////////////////////////////////////////////////
 // OPTIONS
 
-func WithMaxTokens(v uint) llm.Opt {
-	return func(o *llm.Opts) error {
-		o.Set("max_tokens", v)
-		return nil
-	}
-}
-
 func WithUser(v string) llm.Opt {
 	return func(o *llm.Opts) error {
 		o.Set("user", v)
 		return nil
 	}
 }
 
-func WithStopSequences(v ...string) llm.Opt {
-	return func(o *llm.Opts) error {
-		o.Set("stop", v)
-		return nil
-	}
-}
-
 func WithEphemeral() llm.Opt {
 	return func(o *llm.Opts) error {
 		o.Set("ephemeral", true)
 
@@ -0,0 +1,116 @@
+package mistral
+
+import (
+	"context"
+	"encoding/json"
+
+	"github.com/mutablelogic/go-client"
+	"github.com/mutablelogic/go-llm"
+)
+
+///////////////////////////////////////////////////////////////////////////////
+// TYPES
+
+// Chat Completion Response
+type Response struct {
+	Id      string   `json:"id"`
+	Type    string   `json:"object"`
+	Created uint64   `json:"created"`
+	Model   string   `json:"model"`
+	Choices []Choice `json:"choices"`
+	Metrics `json:"usage,omitempty"`
+}
+
+// Response variation
+type Choice struct {
+	Index   uint64      `json:"index"`
+	Message MessageMeta `json:"message"`
+	Reason  string      `json:"finish_reason,omitempty"`
+}
+
+// Metrics
+type Metrics struct {
+	InputTokens  uint64 `json:"prompt_tokens,omitempty"`
+	OutputTokens uint   `json:"completion_tokens,omitempty"`
+	TotalTokens  uint   `json:"total_tokens,omitempty"`
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// STRINGIFY
+
+func (r Response) String() string {
+	data, err := json.MarshalIndent(r, "", "  ")
+	if err != nil {
+		return err.Error()
+	}
+	return string(data)
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// PUBLIC METHODS
+
+type reqChatCompletion struct {
+	Model            string         `json:"model"`
+	Temperature      float64        `json:"temperature,omitempty"`
+	TopP             float64        `json:"top_p,omitempty"`
+	MaxTokens        uint64         `json:"max_tokens,omitempty"`
+	Stream           bool           `json:"stream,omitempty"`
+	StopSequences    []string       `json:"stop,omitempty"`
+	Seed             uint64         `json:"random_seed,omitempty"`
+	Messages         []*MessageMeta `json:"messages"`
+	Format           any            `json:"response_format,omitempty"`
+	Tools            []llm.Tool     `json:"tools,omitempty"`
+	ToolChoice       any            `json:"tool_choice,omitempty"`
+	PresencePenalty  float64        `json:"presence_penalty,omitempty"`
+	FrequencyPenalty float64        `json:"frequency_penalty,omitempty"`
+	NumChoices       uint64         `json:"n,omitempty"`
+	Prediction       *Content       `json:"prediction,omitempty"`
+	SafePrompt       bool           `json:"safe_prompt,omitempty"`
+}
+
+func (mistral *Client) ChatCompletion(ctx context.Context, context llm.Context, opts ...llm.Opt) (*Response, error) {
+	// Apply options
+	opt, err := llm.ApplyOpts(opts...)
+	if err != nil {
+		return nil, err
+	}
+
+	// Append the system prompt at the beginning
+	seq := make([]*MessageMeta, 0, len(context.(*session).seq)+1)
+	if system := opt.SystemPrompt(); system != "" {
+		seq = append(seq, systemPrompt(system))
+	}
+	seq = append(seq, context.(*session).seq...)
+
+	// Request
+	req, err := client.NewJSONRequest(reqChatCompletion{
+		Model:            context.(*session).model.Name(),
+		Temperature:      optTemperature(opt),
+		TopP:             optTopP(opt),
+		MaxTokens:        optMaxTokens(opt),
+		Stream:           optStream(opt),
+		StopSequences:    optStopSequences(opt),
+		Seed:             optSeed(opt),
+		Messages:         seq,
+		Format:           optFormat(opt),
+		Tools:            optTools(mistral, opt),
+		ToolChoice:       optToolChoice(opt),
+		PresencePenalty:  optPresencePenalty(opt),
+		FrequencyPenalty: optFrequencyPenalty(opt),
+		NumChoices:       optNumCompletions(opt),
+		Prediction:       optPrediction(opt),
+		SafePrompt:       optSafePrompt(opt),
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	// Response
+	var response Response
+	if err := mistral.DoWithContext(ctx, req, &response, client.OptPath("chat", "completions")); err != nil {
+		return nil, err
+	}
+
+	// Return success
+	return &response, nil
+}