Chat.messages() no longer trims messages by default (posit-dev#1657)

cpsievert · wch · web-flow · commit ed56097dde7c · 2024-08-29T15:54:33.000-05:00
Co-authored-by: Winston Chang &lt;winston@posit.co&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Other changes
 
 * A few changes for `ui.Chat()`, including:
+  * The `.messages()` method no longer trims messages by default (i.e., the default value of `token_limits` is now `None` instead of the overly generic and conservative value of `(4096, 1000)`). See the new generative AI in production templates (via `shiny create`) for examples of setting `token_limits` based on the model being used. (#1657)
   * User input that contains markdown now renders the expected HTML. (#1607)
   * Busy indication is now visible/apparent during the entire lifecycle of response generation. (#1607)
 
diff --git a/shiny/_main_create.py b/shiny/_main_create.py
@@ -231,6 +231,10 @@ def chat_hello_providers(self) -> list[ShinyTemplate]:
     def chat_enterprise(self) -> list[ShinyTemplate]:
         return self._templates("templates/chat/enterprise")
 
+    @property
+    def chat_production(self) -> list[ShinyTemplate]:
+        return self._templates("templates/chat/production")
+
 
 shiny_internal_templates = ShinyInternalTemplates()
 
@@ -260,6 +264,7 @@ def use_internal_template(
     chat_templates = [
         *shiny_internal_templates.chat_hello_providers,
         *shiny_internal_templates.chat_enterprise,
+        *shiny_internal_templates.chat_production,
     ]
 
     menu_choices = [
@@ -351,6 +356,7 @@ def use_internal_chat_ai_template(
             choices=[
                 Choice(title="By provider...", value="_chat-ai_hello-providers"),
                 Choice(title="Enterprise providers...", value="_chat-ai_enterprise"),
+                Choice(title="Production-ready chat AI", value="_chat-ai_production"),
                 back_choice,
                 cancel_choice,
             ],
@@ -369,11 +375,12 @@ def use_internal_chat_ai_template(
         )
         return
 
-    template_choices = (
-        shiny_internal_templates.chat_enterprise
-        if input == "_chat-ai_enterprise"
-        else shiny_internal_templates.chat_hello_providers
-    )
+    if input == "_chat-ai_production":
+        template_choices = shiny_internal_templates.chat_production
+    elif input == "_chat-ai_enterprise":
+        template_choices = shiny_internal_templates.chat_enterprise
+    else:
+        template_choices = shiny_internal_templates.chat_hello_providers
 
     choice = question_choose_template(template_choices, back_choice)
 
@@ -385,6 +392,7 @@ def use_internal_chat_ai_template(
         [
             *shiny_internal_templates.chat_hello_providers,
             *shiny_internal_templates.chat_enterprise,
+            *shiny_internal_templates.chat_production,
         ],
         choice,
     )
diff --git a/shiny/templates/chat/production/anthropic/_template.json b/shiny/templates/chat/production/anthropic/_template.json
@@ -0,0 +1,5 @@
+{
+  "type": "app",
+  "id": "chat-ai-anthropic-prod",
+  "title": "Chat in production with Anthropic"
+}
diff --git a/shiny/templates/chat/production/anthropic/app.py b/shiny/templates/chat/production/anthropic/app.py
@@ -0,0 +1,59 @@
+# ------------------------------------------------------------------------------------
+# When putting a Chat into production, there are at least a couple additional
+# considerations to keep in mind:
+#  - Token Limits: LLMs have (varying) limits on how many tokens can be included in
+#    a single request and response. To accurately respect these limits, you'll want
+#    to find the revelant limits and tokenizer for the model you're using, and inform
+#    Chat about them.
+#  - Reproducibility: Consider pinning a snapshot of the LLM model to ensure that the
+#    same model is used each time the app is run.
+#
+# See the MODEL_INFO dictionary below for an example of how to set these values for
+# Anthropic's Claude model.
+# https://docs.anthropic.com/en/docs/about-claude/models#model-comparison-table
+# ------------------------------------------------------------------------------------
+import os
+
+from anthropic import AsyncAnthropic
+from app_utils import load_dotenv
+
+from shiny.express import ui
+
+load_dotenv()
+llm = AsyncAnthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
+
+
+MODEL_INFO = {
+    "name": "claude-3-5-sonnet-20240620",
+    # DISCLAIMER: Anthropic has not yet released a public tokenizer for Claude models,
+    # so this uses the generic default provided by Chat() (for now). That is probably
+    # ok though since the default tokenizer likely overestimates the token count.
+    "tokenizer": None,
+    "token_limits": (200000, 8192),
+}
+
+
+ui.page_opts(
+    title="Hello OpenAI Chat",
+    fillable=True,
+    fillable_mobile=True,
+)
+
+chat = ui.Chat(
+    id="chat",
+    messages=[
+        {"content": "Hello! How can I help you today?", "role": "assistant"},
+    ],
+    tokenizer=MODEL_INFO["tokenizer"],
+)
+
+chat.ui()
+
+
+@chat.on_user_submit
+async def _():
+    messages = chat.messages(format="openai", token_limits=MODEL_INFO["token_limits"])
+    response = await llm.chat.completions.create(
+        model=MODEL_INFO["name"], messages=messages, stream=True
+    )
+    await chat.append_message_stream(response)
diff --git a/shiny/templates/chat/production/anthropic/app_utils.py b/shiny/templates/chat/production/anthropic/app_utils.py
@@ -0,0 +1,26 @@
+import os
+from pathlib import Path
+from typing import Any
+
+app_dir = Path(__file__).parent
+env_file = app_dir / ".env"
+
+
+def load_dotenv(dotenv_path: os.PathLike[str] = env_file, **kwargs: Any) -> None:
+    """
+    A convenience wrapper around `dotenv.load_dotenv` that warns if `dotenv` is not installed.
+    It also returns `None` to make it easier to ignore the return value.
+    """
+    try:
+        import dotenv
+
+        dotenv.load_dotenv(dotenv_path=dotenv_path, **kwargs)
+    except ImportError:
+        import warnings
+
+        warnings.warn(
+            "Could not import `dotenv`. If you want to use `.env` files to "
+            "load environment variables, please install it using "
+            "`pip install python-dotenv`.",
+            stacklevel=2,
+        )
diff --git a/shiny/templates/chat/production/anthropic/requirements.txt b/shiny/templates/chat/production/anthropic/requirements.txt
@@ -0,0 +1,4 @@
+shiny
+python-dotenv
+tokenizers
+anthropic
diff --git a/shiny/templates/chat/production/openai/_template.json b/shiny/templates/chat/production/openai/_template.json
@@ -0,0 +1,5 @@
+{
+  "type": "app",
+  "id": "chat-ai-openai-prod",
+  "title": "Chat in production with OpenAI"
+}
diff --git a/shiny/templates/chat/production/openai/app.py b/shiny/templates/chat/production/openai/app.py
@@ -0,0 +1,56 @@
+# ------------------------------------------------------------------------------------
+# When putting a Chat into production, there are at least a couple additional
+# considerations to keep in mind:
+#  - Token Limits: LLMs have (varying) limits on how many tokens can be included in
+#    a single request and response. To accurately respect these limits, you'll want
+#    to find the revelant limits and tokenizer for the model you're using, and inform
+#    Chat about them.
+#  - Reproducibility: Consider pinning a snapshot of the LLM model to ensure that the
+#    same model is used each time the app is run.
+#
+# See the MODEL_INFO dictionary below for an example of how to set these values for
+# OpenAI's GPT-4o model.
+# ------------------------------------------------------------------------------------
+import os
+
+import tiktoken
+from app_utils import load_dotenv
+from openai import AsyncOpenAI
+
+from shiny.express import ui
+
+load_dotenv()
+llm = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
+
+
+MODEL_INFO = {
+    "name": "gpt-4o-2024-08-06",
+    "tokenizer": tiktoken.encoding_for_model("gpt-4o-2024-08-06"),
+    "token_limits": (128000, 16000),
+}
+
+
+ui.page_opts(
+    title="Hello OpenAI Chat",
+    fillable=True,
+    fillable_mobile=True,
+)
+
+chat = ui.Chat(
+    id="chat",
+    messages=[
+        {"content": "Hello! How can I help you today?", "role": "assistant"},
+    ],
+    tokenizer=MODEL_INFO["tokenizer"],
+)
+
+chat.ui()
+
+
+@chat.on_user_submit
+async def _():
+    messages = chat.messages(format="openai", token_limits=MODEL_INFO["token_limits"])
+    response = await llm.chat.completions.create(
+        model=MODEL_INFO["name"], messages=messages, stream=True
+    )
+    await chat.append_message_stream(response)
diff --git a/shiny/templates/chat/production/openai/app_utils.py b/shiny/templates/chat/production/openai/app_utils.py
@@ -0,0 +1,26 @@
+import os
+from pathlib import Path
+from typing import Any
+
+app_dir = Path(__file__).parent
+env_file = app_dir / ".env"
+
+
+def load_dotenv(dotenv_path: os.PathLike[str] = env_file, **kwargs: Any) -> None:
+    """
+    A convenience wrapper around `dotenv.load_dotenv` that warns if `dotenv` is not installed.
+    It also returns `None` to make it easier to ignore the return value.
+    """
+    try:
+        import dotenv
+
+        dotenv.load_dotenv(dotenv_path=dotenv_path, **kwargs)
+    except ImportError:
+        import warnings
+
+        warnings.warn(
+            "Could not import `dotenv`. If you want to use `.env` files to "
+            "load environment variables, please install it using "
+            "`pip install python-dotenv`.",
+            stacklevel=2,
+        )
diff --git a/shiny/templates/chat/production/openai/requirements.txt b/shiny/templates/chat/production/openai/requirements.txt
@@ -0,0 +1,4 @@
+shiny
+python-dotenv
+tiktoken
+openai
diff --git a/shiny/ui/_chat.py b/shiny/ui/_chat.py
diff --git a/shiny/ui/_chat_tokenizer.py b/shiny/ui/_chat_tokenizer.py
diff --git a/shiny/ui/_chat_types.py b/shiny/ui/_chat_types.py
diff --git a/tests/pytest/test_chat.py b/tests/pytest/test_chat.py

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +shiny
 +python-dotenv
 +tiktoken
 +openai