From 4f65c2e43d3848b457b689ded89df0ddf0fabb8e Mon Sep 17 00:00:00 2001 From: Christopher Creutzig <89011131+ccreutzi@users.noreply.github.com> Date: Wed, 17 Jul 2024 15:27:15 +0200 Subject: [PATCH 1/6] Ignore some more stray files from NFS and Mac --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 8068a93..98963a6 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ papers_to_read.csv data/* examples/data/* ._* +.nfs* +.DS_Store From 7c3268fd781075827df8c603bec00fef76622fe9 Mon Sep 17 00:00:00 2001 From: Christopher Creutzig <89011131+ccreutzi@users.noreply.github.com> Date: Mon, 22 Jul 2024 14:13:05 +0200 Subject: [PATCH 2/6] Allow remote Ollama --- +llms/+internal/callOllamaChatAPI.m | 6 +++++- .github/workflows/ci.yml | 7 ++++++- doc/Ollama.md | 7 +++++++ ollamaChat.m | 10 +++++++--- tests/tollamaChat.m | 13 +++++++++++-- 5 files changed, 36 insertions(+), 7 deletions(-) diff --git a/+llms/+internal/callOllamaChatAPI.m b/+llms/+internal/callOllamaChatAPI.m index a7e6436..31a1616 100644 --- a/+llms/+internal/callOllamaChatAPI.m +++ b/+llms/+internal/callOllamaChatAPI.m @@ -37,9 +37,13 @@ nvp.Seed nvp.TimeOut nvp.StreamFun + nvp.Endpoint end -URL = "http://localhost:11434/api/chat"; +URL = nvp.Endpoint + "/api/chat"; +if ~startsWith(URL,"http") + URL = "http://" + URL; +end % The JSON for StopSequences must have an array, and cannot say "stop": "foo". % The easiest way to ensure that is to never pass in a scalar … diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e5f5348..70a7d6c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,19 +14,23 @@ jobs: run: | # Run the background, there is no way to daemonise at the moment ollama serve & + # Run a second server to test different endpoint + OLLAMA_HOST=127.0.0.1:11435 OLLAMA_MODELS=/tmp/ollama/models ollama serve & # A short pause is required before the HTTP port is opened sleep 5 # This endpoint blocks until ready time curl -i http://localhost:11434 + time curl -i http://localhost:11435 # For debugging, record Ollama version ollama --version - - name: Pull mistral model + - name: Pull models run: | ollama pull mistral + OLLAMA_HOST=127.0.0.1:11435 ollama pull qwen2:0.5b - name: Set up MATLAB uses: matlab-actions/setup-matlab@v2 with: @@ -39,6 +43,7 @@ jobs: AZURE_OPENAI_DEPLOYMENT: ${{ secrets.AZURE_DEPLOYMENT }} AZURE_OPENAI_ENDPOINT: ${{ secrets.AZURE_ENDPOINT }} AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_KEY }} + SECOND_OLLAMA_ENDPOINT: 127.0.0.1:11435 uses: matlab-actions/run-tests@v2 with: test-results-junit: test-results/results.xml diff --git a/doc/Ollama.md b/doc/Ollama.md index d6e0bc3..5a537e3 100644 --- a/doc/Ollama.md +++ b/doc/Ollama.md @@ -95,3 +95,10 @@ chat = ollamaChat("mistral", StreamFun=sf); txt = generate(chat,"What is Model-Based Design and how is it related to Digital Twin?"); % Should stream the response token by token ``` + +## Establishing a connection to remote LLMs using Ollama + +To connect to a remote Ollama server, use the `Endpoint` parameter. Include the server name and port number (Ollama starts on 11434 by default): +```matlab +chat = ollamaChat("mistral",Endpoint="ollamaServer:11434"); +``` diff --git a/ollamaChat.m b/ollamaChat.m index 2538038..001f18d 100644 --- a/ollamaChat.m +++ b/ollamaChat.m @@ -64,8 +64,9 @@ % Copyright 2024 The MathWorks, Inc. properties - Model (1,1) string - TopK (1,1) {mustBeReal,mustBePositive} = Inf + Model (1,1) string + Endpoint (1,1) string + TopK (1,1) {mustBeReal,mustBePositive} = Inf TailFreeSamplingZ (1,1) {mustBeReal} = 1 end @@ -82,6 +83,7 @@ nvp.TimeOut (1,1) {mustBeReal,mustBePositive} = 120 nvp.TailFreeSamplingZ (1,1) {mustBeReal} = 1 nvp.StreamFun (1,1) {mustBeA(nvp.StreamFun,'function_handle')} + nvp.Endpoint (1,1) string = "127.0.0.1:11434" end if isfield(nvp,"StreamFun") @@ -105,6 +107,7 @@ this.TailFreeSamplingZ = nvp.TailFreeSamplingZ; this.StopSequences = nvp.StopSequences; this.TimeOut = nvp.TimeOut; + this.Endpoint = nvp.Endpoint; end function [text, message, response] = generate(this, messages, nvp) @@ -147,7 +150,8 @@ TailFreeSamplingZ=this.TailFreeSamplingZ,... StopSequences=this.StopSequences, MaxNumTokens=nvp.MaxNumTokens, ... ResponseFormat=this.ResponseFormat,Seed=nvp.Seed, ... - TimeOut=this.TimeOut, StreamFun=this.StreamFun); + TimeOut=this.TimeOut, StreamFun=this.StreamFun, ... + Endpoint=this.Endpoint); if isfield(response.Body.Data,"error") err = response.Body.Data.error; diff --git a/tests/tollamaChat.m b/tests/tollamaChat.m index 9ae230b..094f13d 100644 --- a/tests/tollamaChat.m +++ b/tests/tollamaChat.m @@ -98,7 +98,6 @@ function seedFixesResult(testCase) testCase.verifyEqual(response1,response2); end - function streamFunc(testCase) function seen = sf(str) persistent data; @@ -118,6 +117,17 @@ function streamFunc(testCase) testCase.verifyGreaterThan(numel(sf("")), 1); end + function reactToEndpoint(testCase) + testCase.assumeTrue(isenv("SECOND_OLLAMA_ENDPOINT"),... + "Test point assumes a second Ollama server is running " + ... + "and $SECOND_OLLAMA_ENDPOINT points to it."); + chat = ollamaChat("qwen2:0.5b",Endpoint=getenv("SECOND_OLLAMA_ENDPOINT")); + testCase.verifyWarningFree(@() generate(chat,"dummy")); + % also make sure "http://" can be included + chat = ollamaChat("qwen2:0.5b",Endpoint="http://" + getenv("SECOND_OLLAMA_ENDPOINT")); + testCase.verifyWarningFree(@() generate(chat,"dummy")); + end + function doReturnErrors(testCase) testCase.assumeFalse( ... any(startsWith(ollamaChat.models,"abcdefghijklmnop")), ... @@ -126,7 +136,6 @@ function doReturnErrors(testCase) testCase.verifyError(@() generate(chat,"hi!"), "llms:apiReturnedError"); end - function invalidInputsConstructor(testCase, InvalidConstructorInput) testCase.verifyError(@() ollamaChat("mistral", InvalidConstructorInput.Input{:}), InvalidConstructorInput.Error); end From 1bc745196ad9c56949efb742ed02756eac92e48e Mon Sep 17 00:00:00 2001 From: Christopher Creutzig <89011131+ccreutzi@users.noreply.github.com> Date: Mon, 22 Jul 2024 14:55:17 +0200 Subject: [PATCH 3/6] Reworded as per review request --- doc/Ollama.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/Ollama.md b/doc/Ollama.md index 5a537e3..13460a1 100644 --- a/doc/Ollama.md +++ b/doc/Ollama.md @@ -98,7 +98,7 @@ txt = generate(chat,"What is Model-Based Design and how is it related to Digital ## Establishing a connection to remote LLMs using Ollama -To connect to a remote Ollama server, use the `Endpoint` parameter. Include the server name and port number (Ollama starts on 11434 by default): +To connect to a remote Ollama server, use the `Endpoint` name-value pair. Include the server name and port number. Ollama starts on 11434 by default. ```matlab chat = ollamaChat("mistral",Endpoint="ollamaServer:11434"); ``` From ccc69e487ebec3123fcc795c5caec28a13256fb5 Mon Sep 17 00:00:00 2001 From: Christopher Creutzig <89011131+ccreutzi@users.noreply.github.com> Date: Tue, 23 Jul 2024 11:06:03 +0200 Subject: [PATCH 4/6] Remove claims from doc that Ollama needs to be local --- README.md | 4 ++-- doc/Ollama.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ec29399..463070f 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![Open in MATLAB Online](https://www.mathworks.com/images/responsive/global/open-in-matlab-online.svg)](https://matlab.mathworks.com/open/github/v1?repo=matlab-deep-learning/llms-with-matlab) [![View Large Language Models (LLMs) with MATLAB on File Exchange](https://www.mathworks.com/matlabcentral/images/matlab-file-exchange.svg)](https://www.mathworks.com/matlabcentral/fileexchange/163796-large-language-models-llms-with-matlab) -This repository contains code to connect MATLAB to the [OpenAI™ Chat Completions API](https://platform.openai.com/docs/guides/text-generation/chat-completions-api) (which powers ChatGPT™), OpenAI Images API (which powers DALL·E™), [Azure® OpenAI Service](https://learn.microsoft.com/en-us/azure/ai-services/openai/), and local [Ollama®](https://ollama.com/) models. This allows you to leverage the natural language processing capabilities of large language models directly within your MATLAB environment. +This repository contains code to connect MATLAB to the [OpenAI™ Chat Completions API](https://platform.openai.com/docs/guides/text-generation/chat-completions-api) (which powers ChatGPT™), OpenAI Images API (which powers DALL·E™), [Azure® OpenAI Service](https://learn.microsoft.com/en-us/azure/ai-services/openai/), and [Ollama®](https://ollama.com/) models. This allows you to leverage the natural language processing capabilities of large language models directly within your MATLAB environment. ## Requirements @@ -15,7 +15,7 @@ This repository contains code to connect MATLAB to the [OpenAI™ Chat Completio - For OpenAI connections: An active OpenAI API subscription and API key. - For Azure OpenAI Services: An active Azure subscription with OpenAI access, deployment, and API key. -- For Ollama: A local Ollama installation. Currently, only connections on `localhost` are supported, i.e., Ollama and MATLAB must run on the same machine. +- For Ollama: An Ollama installation. ## Setup diff --git a/doc/Ollama.md b/doc/Ollama.md index 13460a1..6721e12 100644 --- a/doc/Ollama.md +++ b/doc/Ollama.md @@ -1,6 +1,6 @@ # Ollama -This repository contains code to connect MATLAB to a local [Ollama®](https://ollama.com) server, running large language models (LLMs). +This repository contains code to connect MATLAB to an [Ollama®](https://ollama.com) server, running large language models (LLMs). To use local models with Ollama, you will need to install and start an Ollama server, and “pull” models into it. Please follow the Ollama documentation for details. You should be familiar with the limitations and risks associated with using this technology, and you agree that you shall be solely responsible for full compliance with any terms that may apply to your use of any specific model. @@ -13,7 +13,7 @@ Some of the [LLMs currently supported out of the box on Ollama](https://ollama.c - gemma, codegemma - command-r -## Establishing a connection to local LLMs using Ollama +## Establishing a connection to LLMs using Ollama To create the chat assistant, call `ollamaChat` and specify the LLM you want to use: ```matlab From 3bae1785771619e3ab7d1155970b194b9466337f Mon Sep 17 00:00:00 2001 From: Christopher Creutzig <89011131+ccreutzi@users.noreply.github.com> Date: Tue, 23 Jul 2024 14:02:11 +0200 Subject: [PATCH 5/6] =?UTF-8?q?more=20=E2=80=9Cnormal=E2=80=9D=20test=20fo?= =?UTF-8?q?r=20=E2=80=9Cerror-free=E2=80=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/tollamaChat.m | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/tollamaChat.m b/tests/tollamaChat.m index 094f13d..95b9c3f 100644 --- a/tests/tollamaChat.m +++ b/tests/tollamaChat.m @@ -125,7 +125,9 @@ function reactToEndpoint(testCase) testCase.verifyWarningFree(@() generate(chat,"dummy")); % also make sure "http://" can be included chat = ollamaChat("qwen2:0.5b",Endpoint="http://" + getenv("SECOND_OLLAMA_ENDPOINT")); - testCase.verifyWarningFree(@() generate(chat,"dummy")); + response = generate(chat,"some input"); + testCase.verifyClass(response,'string'); + testCase.verifyGreaterThan(strlength(response),0); end function doReturnErrors(testCase) From 761a8438965436631ec463b036e2a72873ee379f Mon Sep 17 00:00:00 2001 From: Christopher Creutzig <89011131+ccreutzi@users.noreply.github.com> Date: Tue, 23 Jul 2024 16:26:01 +0200 Subject: [PATCH 6/6] =?UTF-8?q?=E2=80=9Clocal=20and=20nonlocal=20Ollame=20?= =?UTF-8?q?models=E2=80=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 463070f..96b2412 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![Open in MATLAB Online](https://www.mathworks.com/images/responsive/global/open-in-matlab-online.svg)](https://matlab.mathworks.com/open/github/v1?repo=matlab-deep-learning/llms-with-matlab) [![View Large Language Models (LLMs) with MATLAB on File Exchange](https://www.mathworks.com/matlabcentral/images/matlab-file-exchange.svg)](https://www.mathworks.com/matlabcentral/fileexchange/163796-large-language-models-llms-with-matlab) -This repository contains code to connect MATLAB to the [OpenAI™ Chat Completions API](https://platform.openai.com/docs/guides/text-generation/chat-completions-api) (which powers ChatGPT™), OpenAI Images API (which powers DALL·E™), [Azure® OpenAI Service](https://learn.microsoft.com/en-us/azure/ai-services/openai/), and [Ollama®](https://ollama.com/) models. This allows you to leverage the natural language processing capabilities of large language models directly within your MATLAB environment. +This repository contains code to connect MATLAB to the [OpenAI™ Chat Completions API](https://platform.openai.com/docs/guides/text-generation/chat-completions-api) (which powers ChatGPT™), OpenAI Images API (which powers DALL·E™), [Azure® OpenAI Service](https://learn.microsoft.com/en-us/azure/ai-services/openai/), and both local and nonlocal [Ollama®](https://ollama.com/) models. This allows you to leverage the natural language processing capabilities of large language models directly within your MATLAB environment. ## Requirements