Use Gemini (#1)

NeonBohdan · NeonDaniel · web-flow · commit 04a547259436 · 2024-01-17T10:56:55.000-08:00
* Updated install files * Updated readme * Updated main and rmq * Updated model file name * Updated folder name * Updated model * Updated requirements * Use gemini * Added model field * Added system_prompt wrapper * Update to support Submind participation (#2) * Resovle import error Update dependencies for chatbotsforum compat. Update license tests * Use stable embeddings --------- Co-authored-by: Daniel McKnight <daniel@neon.ai> Co-authored-by: NeonBohdan <bohdan@neon.ai> * Fix instruction loss of empty history --------- Co-authored-by: Daniel McKnight <34697904+NeonDaniel@users.noreply.github.com> Co-authored-by: Daniel McKnight <daniel@neon.ai>
diff --git a/.github/workflows/license_tests.yml b/.github/workflows/license_tests.yml
@@ -9,4 +9,4 @@ jobs:
   license_tests:
     uses: neongeckocom/.github/.github/workflows/license_tests.yml@master
     with:
-      packages-exclude: '^(neon-llm-palm2|tqdm).*'
+      packages-exclude: '^(neon-llm|tqdm|klat-connector|neon-chatbot|dnspython).*'
diff --git a/Dockerfile b/Dockerfile
@@ -1,7 +1,7 @@
 FROM python:3.9-slim
 
 LABEL vendor=neon.ai \
-    ai.neon.name="neon-llm-palm2"
+    ai.neon.name="neon-llm-gemini"
 
 ENV OVOS_CONFIG_BASE_FOLDER neon
 ENV OVOS_CONFIG_FILENAME diana.yaml
@@ -12,4 +12,4 @@ WORKDIR /app
 COPY . /app
 RUN pip install /app
 
-CMD [ "neon-llm-palm2" ]
+CMD [ "neon-llm-gemini" ]
diff --git a/README.md b/README.md
@@ -1,5 +1,5 @@
-# NeonAI LLM Palm2
-Proxies API calls to Google Palm2.
+# NeonAI LLM Gemini
+Proxies API calls to Google Gemini.
 
 ## Request Format
 API requests should include `history`, a list of tuples of strings, and the current
@@ -25,10 +25,11 @@ MQ:
   port: <MQ Port>
   server: <MQ Hostname or IP>
   users:
-    neon_llm_palm2:
-      password: <neon_palm2 user's password>
-      user: neon_palm2
-LLM_PALM2:
+    neon_llm_gemini:
+      password: <neon_gemini user's password>
+      user: neon_gemini
+LLM_GEMINI:
+  model: "gemini-pro"
   key_path: ""
   role: "You are trying to give a short answer in less than 40 words."
   context_depth: 3
@@ -39,6 +40,6 @@ LLM_PALM2:
 For example, if your configuration resides in `~/.config`:
 ```shell
 export CONFIG_PATH="/home/${USER}/.config"
-docker run -v ${CONFIG_PATH}:/config neon_llm_palm2
+docker run -v ${CONFIG_PATH}:/config neon_llm_gemini
 ```
 > Note: If connecting to a local MQ server, you may need to specify `--network host`
diff --git a/docker_overlay/etc/neon/diana.yaml b/docker_overlay/etc/neon/diana.yaml
@@ -14,7 +14,8 @@ MQ:
     mq_handler:
       user: neon_api_utils
       password: Klatchat2021
-LLM_PALM2:
+LLM_GEMINI:
+  model: "gemini-pro"
   role: "You are trying to give a short answer in less than 40 words."
   context_depth: 3
   max_tokens: 100
diff --git a/neon_llm_gemini/__init__.py b/neon_llm_gemini/__init__.py
diff --git a/neon_llm_gemini/__main__.py b/neon_llm_gemini/__main__.py
@@ -24,15 +24,15 @@
 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 # SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from neon_llm_palm2.rmq import Palm2MQ
+from neon_llm_gemini.rmq import GeminiMQ
 
 
 def main():
     # Run RabbitMQ
-    palm2MQ = Palm2MQ()
-    palm2MQ.run(run_sync=False, run_consumers=True,
+    geminiMQ = GeminiMQ()
+    geminiMQ.run(run_sync=False, run_consumers=True,
                   daemonize_consumers=True)
-    palm2MQ.observer_thread.join()
+    geminiMQ.observer_thread.join()
 
 
 if __name__ == "__main__":
diff --git a/neon_llm_gemini/gemini.py b/neon_llm_gemini/gemini.py
@@ -25,25 +25,27 @@
 # SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import os
-from vertexai.language_models import ChatModel, ChatMessage, TextEmbeddingModel
+from vertexai.preview.generative_models import GenerativeModel, Content, Part
+from vertexai.language_models import TextEmbeddingModel
 from openai.embeddings_utils import distances_from_embeddings
 
 from typing import List, Dict
 from neon_llm_core.llm import NeonLLM
 
 
-class Palm2(NeonLLM):
+class Gemini(NeonLLM):
 
     mq_to_llm_role = {
         "user": "user",
-        "llm": "bot"
+        "llm": "model"
     }
 
     def __init__(self, config):
         super().__init__(config)
         self._embedding = None
         self._context_depth = 0
 
+        self.model_name = config["model"]
         self.role = config["role"]
         self.context_depth = config["context_depth"]
         self.max_tokens = config["max_tokens"]
@@ -67,9 +69,9 @@ def tokenizer_model_name(self) -> str:
         return ""
 
     @property
-    def model(self) -> ChatModel:
+    def model(self) -> GenerativeModel:
         if self._model is None:
-            self._model = ChatModel.from_pretrained("chat-bison")
+            self._model = GenerativeModel(self.model_name)
         return self._model
     
     @property
@@ -108,19 +110,20 @@ def get_sorted_answer_indexes(self, question: str, answers: List[str], persona:
 
     def _call_model(self, prompt: Dict) -> str:
         """
-            Wrapper for Palm2 Model generation logic
+            Wrapper for Gemini Model generation logic
             :param prompt: Input messages sequence
             :returns: Output text sequence generated by model
         """
 
         chat = self._model.start_chat(
-            context=prompt["system_prompt"],
-            message_history=prompt["chat_history"],
-            max_output_tokens=self.max_tokens,
-            temperature=0,
+            history=prompt["chat_history"],
         )
         response = chat.send_message(
             prompt["message"],
+            generation_config = {
+                "temperature": 0,
+                "max_output_tokens": self.max_tokens,
+            }
         )
         text = response.text
 
@@ -140,16 +143,23 @@ def _assemble_prompt(self, message: str, chat_history: List[List[str]], persona:
         # Context N messages
         messages = []
         for role, content in chat_history[-self.context_depth:]:
-            role_palm2 = self.convert_role(role)
-            messages.append(ChatMessage(content, role_palm2))
+            if ((len(messages) == 0) and (role == "user")):
+                content = self._convert2instruction(content, system_prompt)
+            role_gemini = self.convert_role(role)
+            messages.append(Content(parts=[Part.from_text(content)], role = role_gemini))
+        if (len(messages) == 0):
+            message = self._convert2instruction(message, system_prompt)
         prompt = {
-            "system_prompt": system_prompt,
             "chat_history": messages,
             "message": message
         }
 
         return prompt
 
+    def _convert2instruction(self, content: str, system_prompt: str):
+        instruction = f"{system_prompt.strip()}\n\n{content.strip()}"
+        return instruction
+
     def _score(self, prompt: str, targets: List[str], persona: dict) -> List[float]:
         """
             Calculates logarithmic probabilities for the list of provided text sequences
diff --git a/neon_llm_gemini/rmq.py b/neon_llm_gemini/rmq.py
@@ -25,12 +25,12 @@
 # SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 from neon_llm_core.rmq import NeonLLMMQConnector
 
-from neon_llm_palm2.palm2 import Palm2
+from neon_llm_gemini.gemini import Gemini
 
 
-class Palm2MQ(NeonLLMMQConnector):
+class GeminiMQ(NeonLLMMQConnector):
     """
-        Module for processing MQ requests to Palm2
+        Module for processing MQ requests to Gemini
     """
 
     def __init__(self):
@@ -39,12 +39,12 @@ def __init__(self):
 
     @property
     def name(self):
-        return "palm2"
+        return "gemini"
 
     @property
     def model(self):
         if self._model is None:
-            self._model = Palm2(self.model_config)
+            self._model = Gemini(self.model_config)
         return self._model
 
     def warmup(self):
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
@@ -1,5 +1,5 @@
 # model
-google-cloud-aiplatform
+google-cloud-aiplatform>=1.38
 openai[embeddings]~=0.27
 # networking
-neon_llm_core~=0.1.0
+neon_llm_core[chatbots]~=0.1.0,>=0.1.1a1
diff --git a/setup.py b/setup.py
@@ -67,12 +67,12 @@ def get_requirements(requirements_filename: str):
                 version = line.split("'")[1]
 
 setup(
-    name='neon-llm-palm2',
+    name='neon-llm-gemini',
     version=version,
-    description='LLM service for Palm2',
+    description='LLM service for Gemini',
     long_description=long_description,
     long_description_content_type="text/markdown",
-    url='https://github.com/NeonGeckoCom/neon-llm-palm2',
+    url='https://github.com/NeonGeckoCom/neon-llm-gemini',
     author='Neongecko',
     author_email='developers@neon.ai',
     license='BSD-3.0',
@@ -85,7 +85,7 @@ def get_requirements(requirements_filename: str):
     ],
     entry_points={
         'console_scripts': [
-            'neon-llm-palm2=neon_llm_palm2.__main__:main'
+            'neon-llm-gemini=neon_llm_gemini.__main__:main'
         ]
     }
 )