Skip to content

Commit 288cc54

Browse files
committed
feat(instrumentation): add get attribute helpers for remaining semantic conventions
1 parent 60ee275 commit 288cc54

File tree

4 files changed

+218
-7
lines changed

4 files changed

+218
-7
lines changed

python/openinference-instrumentation/src/openinference/instrumentation/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
ImageMessageContent,
2121
Message,
2222
MessageContent,
23+
PromptDetails,
2324
TextMessageContent,
2425
TokenCount,
2526
Tool,
@@ -78,6 +79,7 @@
7879
"ImageMessageContent",
7980
"Message",
8081
"MessageContent",
82+
"PromptDetails",
8183
"TextMessageContent",
8284
"TokenCount",
8385
"Tool",

python/openinference-instrumentation/src/openinference/instrumentation/_attributes.py

Lines changed: 177 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
Any,
1010
Dict,
1111
Iterator,
12+
List,
1213
Literal,
1314
Optional,
1415
Tuple,
@@ -19,19 +20,24 @@
1920
from typing_extensions import TypeGuard
2021

2122
from openinference.semconv.trace import (
23+
DocumentAttributes,
24+
EmbeddingAttributes,
2225
ImageAttributes,
2326
MessageAttributes,
2427
MessageContentAttributes,
2528
OpenInferenceLLMProviderValues,
2629
OpenInferenceLLMSystemValues,
2730
OpenInferenceMimeTypeValues,
2831
OpenInferenceSpanKindValues,
32+
RerankerAttributes,
2933
SpanAttributes,
3034
ToolAttributes,
3135
ToolCallAttributes,
3236
)
3337

3438
from ._types import (
39+
Document,
40+
Embedding,
3541
Message,
3642
OpenInferenceLLMProvider,
3743
OpenInferenceLLMSystem,
@@ -40,6 +46,7 @@
4046
TokenCount,
4147
Tool,
4248
)
49+
from .helpers import safe_json_dumps
4350

4451
pydantic: Optional[ModuleType]
4552
try:
@@ -51,6 +58,140 @@
5158
from _typeshed import DataclassInstance
5259

5360

61+
def get_reranker_attributes(
62+
*,
63+
query: Optional[str] = None,
64+
model_name: Optional[str] = None,
65+
input_documents: Optional[List[Document]] = None,
66+
output_documents: Optional[List[Document]] = None,
67+
top_k: Optional[int] = None,
68+
) -> Dict[str, AttributeValue]:
69+
attributes: Dict[str, AttributeValue] = {}
70+
if query is not None:
71+
attributes[RERANKER_QUERY] = query
72+
if model_name is not None:
73+
attributes[RERANKER_MODEL_NAME] = model_name
74+
if top_k is not None:
75+
attributes[RERANKER_TOP_K] = top_k
76+
if isinstance(input_documents, list):
77+
for index, document in enumerate(input_documents):
78+
attributes.update(
79+
_document_attributes(
80+
document=document,
81+
document_index=index,
82+
key_prefix=RERANKER_INPUT_DOCUMENTS,
83+
)
84+
)
85+
if isinstance(output_documents, list):
86+
for index, document in enumerate(output_documents):
87+
attributes.update(
88+
_document_attributes(
89+
document=document,
90+
document_index=index,
91+
key_prefix=RERANKER_OUTPUT_DOCUMENTS,
92+
)
93+
)
94+
return attributes
95+
96+
97+
def get_retriever_attributes(*, documents: List[Document]) -> Dict[str, AttributeValue]:
98+
attributes: Dict[str, AttributeValue] = {}
99+
if not isinstance(documents, list):
100+
return attributes
101+
for index, document in enumerate(documents):
102+
attributes.update(
103+
_document_attributes(
104+
document=document,
105+
document_index=index,
106+
key_prefix=RETRIEVAL_DOCUMENTS,
107+
)
108+
)
109+
return attributes
110+
111+
112+
def _document_attributes(
113+
*,
114+
document: Document,
115+
document_index: str,
116+
key_prefix: str,
117+
) -> Iterator[Tuple[str, AttributeValue]]:
118+
if not isinstance(document, dict):
119+
return
120+
if (content := document.get("content")) is not None:
121+
yield f"{key_prefix}.{document_index}.{DOCUMENT_CONTENT}", content
122+
if (document_id := document.get("id")) is not None:
123+
yield f"{key_prefix}.{document_index}.{DOCUMENT_ID}", document_id
124+
if (metadata := document.get("metadata")) is not None:
125+
key = f"{key_prefix}.{document_index}.{DOCUMENT_METADATA}"
126+
serialized_metadata: str
127+
if isinstance(metadata, str):
128+
serialized_metadata = metadata
129+
else:
130+
serialized_metadata = safe_json_dumps(metadata)
131+
yield key, serialized_metadata
132+
if (score := document.get("score")) is not None:
133+
return f"{key_prefix}.{document_index}.{DOCUMENT_SCORE}", score
134+
135+
136+
def get_embedding_attributes(
137+
*,
138+
model_name: Optional[str] = None,
139+
embeddings: Optional[List[Embedding]] = None,
140+
) -> Dict[str, AttributeValue]:
141+
attributes: Dict[str, AttributeValue] = {}
142+
if model_name is not None:
143+
attributes[EMBEDDING_MODEL_NAME] = model_name
144+
if isinstance(embeddings, list):
145+
for index, embedding in enumerate(embeddings):
146+
if (text := embedding.get("text")) is not None:
147+
key = f"{EMBEDDING_EMBEDDINGS}.{index}.{EMBEDDING_TEXT}"
148+
attributes[key] = text
149+
if (vector := embedding.get("vector")) is not None:
150+
key = f"{EMBEDDING_EMBEDDINGS}.{index}.{EMBEDDING_VECTOR}"
151+
attributes[key] = vector
152+
return attributes
153+
154+
155+
def get_context_attributes(
156+
*,
157+
session_id: Optional[str] = None,
158+
user_id: Optional[str] = None,
159+
metadata: Optional[Union[str, Dict[str, Any]]] = None,
160+
tags: Optional[List[str]] = None,
161+
) -> Dict[str, AttributeValue]:
162+
attributes: Dict[str, AttributeValue] = {}
163+
if session_id is not None:
164+
attributes.update(get_session_attributes(session_id=session_id))
165+
if user_id is not None:
166+
attributes.update(get_user_id_attributes(user_id=user_id))
167+
if metadata is not None:
168+
attributes.update(get_metadata_attributes(metadata=metadata))
169+
if tags is not None:
170+
attributes.update(get_tag_attributes(tags=tags))
171+
return attributes
172+
173+
174+
def get_session_attributes(*, session_id: str) -> Dict[str, AttributeValue]:
175+
return {SESSION_ID: session_id}
176+
177+
178+
def get_tag_attributes(*, tags: List[str]) -> Dict[str, AttributeValue]:
179+
return {TAG_TAGS: tags}
180+
181+
182+
def get_metadata_attributes(*, metadata: Union[str, Dict[str, Any]]) -> Dict[str, AttributeValue]:
183+
serialized_metadata: str
184+
if isinstance(metadata, str):
185+
serialized_metadata = metadata
186+
else:
187+
serialized_metadata = safe_json_dumps(metadata)
188+
return {METADATA: serialized_metadata}
189+
190+
191+
def get_user_id_attributes(*, user_id: str) -> Dict[str, AttributeValue]:
192+
return {USER_ID: user_id}
193+
194+
54195
def get_span_kind_attributes(kind: "OpenInferenceSpanKind", /) -> Dict[str, AttributeValue]:
55196
normalized_kind = _normalize_openinference_span_kind(kind)
56197
return {
@@ -350,6 +491,14 @@ def get_llm_token_count_attributes(
350491
attributes[LLM_TOKEN_COUNT_COMPLETION] = completion
351492
if (total := token_count.get("total")) is not None:
352493
attributes[LLM_TOKEN_COUNT_TOTAL] = total
494+
if (prompt_details := token_count.get("prompt_details")) is not None:
495+
if isinstance(prompt_details, dict):
496+
if (cache_write := prompt_details.get("cache_write")) is not None:
497+
attributes[LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE] = cache_write
498+
if (cache_read := prompt_details.get("cache_read")) is not None:
499+
attributes[LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ] = cache_read
500+
if (audio := prompt_details.get("audio")) is not None:
501+
attributes[LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO] = audio
353502
return attributes
354503

355504

@@ -371,25 +520,41 @@ def get_llm_tool_attributes(
371520
return attributes
372521

373522

523+
# document attributes
524+
DOCUMENT_CONTENT = DocumentAttributes.DOCUMENT_CONTENT
525+
DOCUMENT_ID = DocumentAttributes.DOCUMENT_ID
526+
DOCUMENT_METADATA = DocumentAttributes.DOCUMENT_METADATA
527+
DOCUMENT_SCORE = DocumentAttributes.DOCUMENT_SCORE
528+
529+
# embedding attributes
530+
EMBEDDING_TEXT = EmbeddingAttributes.EMBEDDING_TEXT
531+
EMBEDDING_VECTOR = EmbeddingAttributes.EMBEDDING_VECTOR
532+
374533
# image attributes
375534
IMAGE_URL = ImageAttributes.IMAGE_URL
376535

377-
378536
# message attributes
379537
MESSAGE_CONTENT = MessageAttributes.MESSAGE_CONTENT
380538
MESSAGE_CONTENTS = MessageAttributes.MESSAGE_CONTENTS
381539
MESSAGE_ROLE = MessageAttributes.MESSAGE_ROLE
382540
MESSAGE_TOOL_CALL_ID = MessageAttributes.MESSAGE_TOOL_CALL_ID
383541
MESSAGE_TOOL_CALLS = MessageAttributes.MESSAGE_TOOL_CALLS
384542

385-
386543
# message content attributes
387544
MESSAGE_CONTENT_IMAGE = MessageContentAttributes.MESSAGE_CONTENT_IMAGE
388545
MESSAGE_CONTENT_TEXT = MessageContentAttributes.MESSAGE_CONTENT_TEXT
389546
MESSAGE_CONTENT_TYPE = MessageContentAttributes.MESSAGE_CONTENT_TYPE
390547

548+
# reranker attributes
549+
RERANKER_INPUT_DOCUMENTS = RerankerAttributes.RERANKER_INPUT_DOCUMENTS
550+
RERANKER_MODEL_NAME = RerankerAttributes.RERANKER_MODEL_NAME
551+
RERANKER_OUTPUT_DOCUMENTS = RerankerAttributes.RERANKER_OUTPUT_DOCUMENTS
552+
RERANKER_QUERY = RerankerAttributes.RERANKER_QUERY
553+
RERANKER_TOP_K = RerankerAttributes.RERANKER_TOP_K
391554

392555
# span attributes
556+
EMBEDDING_EMBEDDINGS = SpanAttributes.EMBEDDING_EMBEDDINGS
557+
EMBEDDING_MODEL_NAME = SpanAttributes.EMBEDDING_MODEL_NAME
393558
INPUT_MIME_TYPE = SpanAttributes.INPUT_MIME_TYPE
394559
INPUT_VALUE = SpanAttributes.INPUT_VALUE
395560
LLM_INPUT_MESSAGES = SpanAttributes.LLM_INPUT_MESSAGES
@@ -400,20 +565,28 @@ def get_llm_tool_attributes(
400565
LLM_SYSTEM = SpanAttributes.LLM_SYSTEM
401566
LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION
402567
LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT
568+
LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO = SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO
569+
LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ = SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ
570+
LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE = (
571+
SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE
572+
)
403573
LLM_TOKEN_COUNT_TOTAL = SpanAttributes.LLM_TOKEN_COUNT_TOTAL
404574
LLM_TOOLS = SpanAttributes.LLM_TOOLS
575+
METADATA = SpanAttributes.METADATA
405576
OPENINFERENCE_SPAN_KIND = SpanAttributes.OPENINFERENCE_SPAN_KIND
406577
OUTPUT_MIME_TYPE = SpanAttributes.OUTPUT_MIME_TYPE
407578
OUTPUT_VALUE = SpanAttributes.OUTPUT_VALUE
579+
RETRIEVAL_DOCUMENTS = SpanAttributes.RETRIEVAL_DOCUMENTS
580+
SESSION_ID = SpanAttributes.SESSION_ID
581+
TAG_TAGS = SpanAttributes.TAG_TAGS
408582
TOOL_DESCRIPTION = SpanAttributes.TOOL_DESCRIPTION
409583
TOOL_NAME = SpanAttributes.TOOL_NAME
410584
TOOL_PARAMETERS = SpanAttributes.TOOL_PARAMETERS
411-
585+
USER_ID = SpanAttributes.USER_ID
412586

413587
# tool attributes
414588
TOOL_JSON_SCHEMA = ToolAttributes.TOOL_JSON_SCHEMA
415589

416-
417590
# tool call attributes
418591
TOOL_CALL_FUNCTION_ARGUMENTS_JSON = ToolCallAttributes.TOOL_CALL_FUNCTION_ARGUMENTS_JSON
419592
TOOL_CALL_FUNCTION_NAME = ToolCallAttributes.TOOL_CALL_FUNCTION_NAME

python/openinference-instrumentation/src/openinference/instrumentation/_types.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from collections.abc import Sequence
2-
from typing import Any, Dict, Literal, TypedDict, Union
2+
from typing import Any, Dict, List, Literal, TypedDict, Union
33

44
from typing_extensions import Required, TypeAlias
55

@@ -68,11 +68,30 @@ class Message(TypedDict, total=False):
6868
tool_calls: "Sequence[ToolCall]"
6969

7070

71+
class PromptDetails(TypedDict, total=False):
72+
audio: int
73+
cache_read: int
74+
cache_write: int
75+
76+
7177
class TokenCount(TypedDict, total=False):
7278
prompt: int
7379
completion: int
7480
total: int
81+
prompt_details: PromptDetails
7582

7683

7784
class Tool(TypedDict, total=False):
7885
json_schema: Required[Union[str, Dict[str, Any]]]
86+
87+
88+
class Embedding(TypedDict, total=False):
89+
text: str
90+
vector: List[float]
91+
92+
93+
class Document(TypedDict, total=False):
94+
content: str
95+
id: Union[str, int]
96+
metadata: Union[str, Dict[str, Any]]
97+
score: float

python/openinference-instrumentation/tests/test_manual_instrumentation.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
ImageMessageContent,
3838
Message,
3939
OITracer,
40+
PromptDetails,
4041
TextMessageContent,
4142
TokenCount,
4243
Tool,
@@ -2235,7 +2236,16 @@ def test_get_llm_attributes_returns_expected_attributes() -> None:
22352236
contents=[TextMessageContent(type="text", text="Hi there!")],
22362237
)
22372238
]
2238-
token_count: TokenCount = TokenCount(prompt=10, completion=5, total=15)
2239+
token_count: TokenCount = TokenCount(
2240+
prompt=10,
2241+
completion=5,
2242+
total=15,
2243+
prompt_details=PromptDetails(
2244+
audio=3,
2245+
cache_read=2,
2246+
cache_write=1,
2247+
),
2248+
)
22392249
tools: Sequence[Tool] = [
22402250
Tool(
22412251
json_schema=json.dumps({"type": "object", "properties": {"query": {"type": "string"}}})
@@ -2252,7 +2262,6 @@ def test_get_llm_attributes_returns_expected_attributes() -> None:
22522262
token_count=token_count,
22532263
tools=tools,
22542264
)
2255-
22562265
assert attributes.pop(LLM_PROVIDER) == "openai"
22572266
assert attributes.pop(LLM_SYSTEM) == "openai"
22582267
assert attributes.pop(LLM_MODEL_NAME) == "gpt-4"
@@ -2319,6 +2328,9 @@ def test_get_llm_attributes_returns_expected_attributes() -> None:
23192328
== "Hi there!"
23202329
)
23212330
assert attributes.pop(LLM_TOKEN_COUNT_PROMPT) == 10
2331+
assert attributes.pop(LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO) == 3
2332+
assert attributes.pop(LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ) == 2
2333+
assert attributes.pop(LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE) == 1
23222334
assert attributes.pop(LLM_TOKEN_COUNT_COMPLETION) == 5
23232335
assert attributes.pop(LLM_TOKEN_COUNT_TOTAL) == 15
23242336
assert (
@@ -2705,6 +2717,11 @@ def example_function( # type: ignore[no-untyped-def]
27052717
LLM_SYSTEM = SpanAttributes.LLM_SYSTEM
27062718
LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION
27072719
LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT
2720+
LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO = SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO
2721+
LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ = SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ
2722+
LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE = (
2723+
SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE
2724+
)
27082725
LLM_TOKEN_COUNT_TOTAL = SpanAttributes.LLM_TOKEN_COUNT_TOTAL
27092726
LLM_TOOLS = SpanAttributes.LLM_TOOLS
27102727
OPENINFERENCE_SPAN_KIND = SpanAttributes.OPENINFERENCE_SPAN_KIND

0 commit comments

Comments
 (0)