Skip to content

Commit ed56097

Browse files
cpsievertwch
andauthored
Chat.messages() no longer trims messages by default (posit-dev#1657)
Co-authored-by: Winston Chang <winston@posit.co>
1 parent 8d6fb63 commit ed56097

File tree

14 files changed

+335
-131
lines changed

14 files changed

+335
-131
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1818
### Other changes
1919

2020
* A few changes for `ui.Chat()`, including:
21+
* The `.messages()` method no longer trims messages by default (i.e., the default value of `token_limits` is now `None` instead of the overly generic and conservative value of `(4096, 1000)`). See the new generative AI in production templates (via `shiny create`) for examples of setting `token_limits` based on the model being used. (#1657)
2122
* User input that contains markdown now renders the expected HTML. (#1607)
2223
* Busy indication is now visible/apparent during the entire lifecycle of response generation. (#1607)
2324

shiny/_main_create.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,10 @@ def chat_hello_providers(self) -> list[ShinyTemplate]:
231231
def chat_enterprise(self) -> list[ShinyTemplate]:
232232
return self._templates("templates/chat/enterprise")
233233

234+
@property
235+
def chat_production(self) -> list[ShinyTemplate]:
236+
return self._templates("templates/chat/production")
237+
234238

235239
shiny_internal_templates = ShinyInternalTemplates()
236240

@@ -260,6 +264,7 @@ def use_internal_template(
260264
chat_templates = [
261265
*shiny_internal_templates.chat_hello_providers,
262266
*shiny_internal_templates.chat_enterprise,
267+
*shiny_internal_templates.chat_production,
263268
]
264269

265270
menu_choices = [
@@ -351,6 +356,7 @@ def use_internal_chat_ai_template(
351356
choices=[
352357
Choice(title="By provider...", value="_chat-ai_hello-providers"),
353358
Choice(title="Enterprise providers...", value="_chat-ai_enterprise"),
359+
Choice(title="Production-ready chat AI", value="_chat-ai_production"),
354360
back_choice,
355361
cancel_choice,
356362
],
@@ -369,11 +375,12 @@ def use_internal_chat_ai_template(
369375
)
370376
return
371377

372-
template_choices = (
373-
shiny_internal_templates.chat_enterprise
374-
if input == "_chat-ai_enterprise"
375-
else shiny_internal_templates.chat_hello_providers
376-
)
378+
if input == "_chat-ai_production":
379+
template_choices = shiny_internal_templates.chat_production
380+
elif input == "_chat-ai_enterprise":
381+
template_choices = shiny_internal_templates.chat_enterprise
382+
else:
383+
template_choices = shiny_internal_templates.chat_hello_providers
377384

378385
choice = question_choose_template(template_choices, back_choice)
379386

@@ -385,6 +392,7 @@ def use_internal_chat_ai_template(
385392
[
386393
*shiny_internal_templates.chat_hello_providers,
387394
*shiny_internal_templates.chat_enterprise,
395+
*shiny_internal_templates.chat_production,
388396
],
389397
choice,
390398
)
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"type": "app",
3+
"id": "chat-ai-anthropic-prod",
4+
"title": "Chat in production with Anthropic"
5+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# ------------------------------------------------------------------------------------
2+
# When putting a Chat into production, there are at least a couple additional
3+
# considerations to keep in mind:
4+
# - Token Limits: LLMs have (varying) limits on how many tokens can be included in
5+
# a single request and response. To accurately respect these limits, you'll want
6+
# to find the revelant limits and tokenizer for the model you're using, and inform
7+
# Chat about them.
8+
# - Reproducibility: Consider pinning a snapshot of the LLM model to ensure that the
9+
# same model is used each time the app is run.
10+
#
11+
# See the MODEL_INFO dictionary below for an example of how to set these values for
12+
# Anthropic's Claude model.
13+
# https://docs.anthropic.com/en/docs/about-claude/models#model-comparison-table
14+
# ------------------------------------------------------------------------------------
15+
import os
16+
17+
from anthropic import AsyncAnthropic
18+
from app_utils import load_dotenv
19+
20+
from shiny.express import ui
21+
22+
load_dotenv()
23+
llm = AsyncAnthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
24+
25+
26+
MODEL_INFO = {
27+
"name": "claude-3-5-sonnet-20240620",
28+
# DISCLAIMER: Anthropic has not yet released a public tokenizer for Claude models,
29+
# so this uses the generic default provided by Chat() (for now). That is probably
30+
# ok though since the default tokenizer likely overestimates the token count.
31+
"tokenizer": None,
32+
"token_limits": (200000, 8192),
33+
}
34+
35+
36+
ui.page_opts(
37+
title="Hello OpenAI Chat",
38+
fillable=True,
39+
fillable_mobile=True,
40+
)
41+
42+
chat = ui.Chat(
43+
id="chat",
44+
messages=[
45+
{"content": "Hello! How can I help you today?", "role": "assistant"},
46+
],
47+
tokenizer=MODEL_INFO["tokenizer"],
48+
)
49+
50+
chat.ui()
51+
52+
53+
@chat.on_user_submit
54+
async def _():
55+
messages = chat.messages(format="openai", token_limits=MODEL_INFO["token_limits"])
56+
response = await llm.chat.completions.create(
57+
model=MODEL_INFO["name"], messages=messages, stream=True
58+
)
59+
await chat.append_message_stream(response)
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import os
2+
from pathlib import Path
3+
from typing import Any
4+
5+
app_dir = Path(__file__).parent
6+
env_file = app_dir / ".env"
7+
8+
9+
def load_dotenv(dotenv_path: os.PathLike[str] = env_file, **kwargs: Any) -> None:
10+
"""
11+
A convenience wrapper around `dotenv.load_dotenv` that warns if `dotenv` is not installed.
12+
It also returns `None` to make it easier to ignore the return value.
13+
"""
14+
try:
15+
import dotenv
16+
17+
dotenv.load_dotenv(dotenv_path=dotenv_path, **kwargs)
18+
except ImportError:
19+
import warnings
20+
21+
warnings.warn(
22+
"Could not import `dotenv`. If you want to use `.env` files to "
23+
"load environment variables, please install it using "
24+
"`pip install python-dotenv`.",
25+
stacklevel=2,
26+
)
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
shiny
2+
python-dotenv
3+
tokenizers
4+
anthropic
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"type": "app",
3+
"id": "chat-ai-openai-prod",
4+
"title": "Chat in production with OpenAI"
5+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# ------------------------------------------------------------------------------------
2+
# When putting a Chat into production, there are at least a couple additional
3+
# considerations to keep in mind:
4+
# - Token Limits: LLMs have (varying) limits on how many tokens can be included in
5+
# a single request and response. To accurately respect these limits, you'll want
6+
# to find the revelant limits and tokenizer for the model you're using, and inform
7+
# Chat about them.
8+
# - Reproducibility: Consider pinning a snapshot of the LLM model to ensure that the
9+
# same model is used each time the app is run.
10+
#
11+
# See the MODEL_INFO dictionary below for an example of how to set these values for
12+
# OpenAI's GPT-4o model.
13+
# ------------------------------------------------------------------------------------
14+
import os
15+
16+
import tiktoken
17+
from app_utils import load_dotenv
18+
from openai import AsyncOpenAI
19+
20+
from shiny.express import ui
21+
22+
load_dotenv()
23+
llm = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
24+
25+
26+
MODEL_INFO = {
27+
"name": "gpt-4o-2024-08-06",
28+
"tokenizer": tiktoken.encoding_for_model("gpt-4o-2024-08-06"),
29+
"token_limits": (128000, 16000),
30+
}
31+
32+
33+
ui.page_opts(
34+
title="Hello OpenAI Chat",
35+
fillable=True,
36+
fillable_mobile=True,
37+
)
38+
39+
chat = ui.Chat(
40+
id="chat",
41+
messages=[
42+
{"content": "Hello! How can I help you today?", "role": "assistant"},
43+
],
44+
tokenizer=MODEL_INFO["tokenizer"],
45+
)
46+
47+
chat.ui()
48+
49+
50+
@chat.on_user_submit
51+
async def _():
52+
messages = chat.messages(format="openai", token_limits=MODEL_INFO["token_limits"])
53+
response = await llm.chat.completions.create(
54+
model=MODEL_INFO["name"], messages=messages, stream=True
55+
)
56+
await chat.append_message_stream(response)
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import os
2+
from pathlib import Path
3+
from typing import Any
4+
5+
app_dir = Path(__file__).parent
6+
env_file = app_dir / ".env"
7+
8+
9+
def load_dotenv(dotenv_path: os.PathLike[str] = env_file, **kwargs: Any) -> None:
10+
"""
11+
A convenience wrapper around `dotenv.load_dotenv` that warns if `dotenv` is not installed.
12+
It also returns `None` to make it easier to ignore the return value.
13+
"""
14+
try:
15+
import dotenv
16+
17+
dotenv.load_dotenv(dotenv_path=dotenv_path, **kwargs)
18+
except ImportError:
19+
import warnings
20+
21+
warnings.warn(
22+
"Could not import `dotenv`. If you want to use `.env` files to "
23+
"load environment variables, please install it using "
24+
"`pip install python-dotenv`.",
25+
stacklevel=2,
26+
)
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
shiny
2+
python-dotenv
3+
tiktoken
4+
openai

0 commit comments

Comments
 (0)