Arize-ai · axiomofjoy · May 10, 2025 · May 12, 2025 · mikeldking · May 13, 2025
@@ -649,7 +649,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.20"
+   "version": "3.9.18"
   }
  },
  "nbformat": 4,

@@ -722,31 +722,31 @@
     "    }\n",
     "\n",
     "\n",
-    "def to_oi_message(message: AnthropicMessageParam) -> OIMessage:\n",
+    "def to_oi_message(message: AnthropicMessageParam) -> oi.Message:\n",
     "    role = message[\"role\"]\n",
     "    content = message[\"content\"]\n",
     "    if isinstance(content, str):\n",
-    "        return OIMessage(role=role, content=content)\n",
+    "        return oi.Message(role=role, content=content)\n",
     "\n",
-    "    contents: List[OIMessageContent] = []\n",
+    "    contents: List[oi.MessageContent] = []\n",
     "    for content_block in content:\n",
     "        if not isinstance(content_block, dict):\n",
     "            raise NotImplementedError(\"Only typed dict message params are supported\")\n",
     "        if (content_type := content_block[\"type\"]) == \"text\":\n",
     "            assert isinstance(text := content_block.get(\"text\"), str)\n",
-    "            contents.append(OITextMessageContent(type=\"text\", text=text))\n",
+    "            contents.append(oi.TextMessageContent(type=\"text\", text=text))\n",
     "        elif content_type == \"image\":\n",
     "            assert isinstance(source := content_block.get(\"source\"), dict)\n",
     "            assert isinstance(url := source.get(\"url\"), str)\n",
     "            contents.append(\n",
-    "                OIImageMessageContent(\n",
+    "                oi.ImageMessageContent(\n",
     "                    type=\"image\",\n",
-    "                    image=OIImage(url=url),\n",
+    "                    image=oi.Image(url=url),\n",
     "                )\n",
     "            )\n",
     "        else:\n",
     "            raise NotImplementedError(\"Only text and image message content blocks are supported\")\n",
-    "    return OIMessage(role=role, contents=contents)\n",
+    "    return oi.Message(role=role, contents=contents)\n",
     "\n",
     "\n",
     "def process_output(message: AnthropicMessage) -> Dict[str, Any]:\n",
@@ -815,7 +815,7 @@
     "def get_attributes_from_generator_outputs(outputs: List[ChatCompletionChunk]) -> Attributes:\n",
     "    role: Optional[str] = None\n",
     "    content = \"\"\n",
-    "    oi_token_count = OITokenCount()\n",
+    "    oi_token_count = oi.TokenCount()\n",
     "    for chunk in outputs:\n",
     "        if choices := chunk.choices:\n",
     "            assert len(choices) == 1\n",
@@ -831,7 +831,7 @@
     "                oi_token_count[\"completion\"] = completion_tokens\n",
     "    oi_messages = []\n",
     "    if role and content:\n",
-    "        oi_messages.append(OIMessage(role=role, content=content))\n",
+    "        oi_messages.append(oi.Message(role=role, content=content))\n",
     "    return {\n",
     "        **get_llm_attributes(\n",
     "            output_messages=oi_messages,\n",
@@ -1475,7 +1475,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.20"
+   "version": "3.9.18"
   }
  },
  "nbformat": 4,

@@ -20,6 +20,7 @@
     ImageMessageContent,
     Message,
     MessageContent,
+    PromptDetails,
     TextMessageContent,
     TokenCount,
     Tool,
@@ -78,6 +79,7 @@
     "ImageMessageContent",
     "Message",
     "MessageContent",
+    "PromptDetails",
     "TextMessageContent",
     "TokenCount",
     "Tool",

@@ -9,6 +9,7 @@
     Any,
     Dict,
     Iterator,
+    List,
     Literal,
     Optional,
     Tuple,
@@ -19,19 +20,24 @@
 from typing_extensions import TypeGuard
 
 from openinference.semconv.trace import (
+    DocumentAttributes,
+    EmbeddingAttributes,
     ImageAttributes,
     MessageAttributes,
     MessageContentAttributes,
     OpenInferenceLLMProviderValues,
     OpenInferenceLLMSystemValues,
     OpenInferenceMimeTypeValues,
     OpenInferenceSpanKindValues,
+    RerankerAttributes,
     SpanAttributes,
     ToolAttributes,
     ToolCallAttributes,
 )
 
 from ._types import (
+    Document,
+    Embedding,
     Message,
     OpenInferenceLLMProvider,
     OpenInferenceLLMSystem,
@@ -40,6 +46,7 @@
     TokenCount,
     Tool,
 )
+from .helpers import safe_json_dumps
 
 pydantic: Optional[ModuleType]
 try:
@@ -51,6 +58,140 @@
     from _typeshed import DataclassInstance
 
 
+def get_reranker_attributes(
+    *,
+    query: Optional[str] = None,
+    model_name: Optional[str] = None,
+    input_documents: Optional[List[Document]] = None,
+    output_documents: Optional[List[Document]] = None,
+    top_k: Optional[int] = None,
+) -> Dict[str, AttributeValue]:
+    attributes: Dict[str, AttributeValue] = {}
+    if query is not None:
+        attributes[RERANKER_QUERY] = query
+    if model_name is not None:
+        attributes[RERANKER_MODEL_NAME] = model_name
+    if top_k is not None:
+        attributes[RERANKER_TOP_K] = top_k
+    if isinstance(input_documents, list):
+        for index, document in enumerate(input_documents):
+            attributes.update(
+                _document_attributes(
+                    document=document,
+                    document_index=index,
+                    key_prefix=RERANKER_INPUT_DOCUMENTS,
+                )
+            )
+    if isinstance(output_documents, list):
+        for index, document in enumerate(output_documents):
+            attributes.update(
+                _document_attributes(
+                    document=document,
+                    document_index=index,
+                    key_prefix=RERANKER_OUTPUT_DOCUMENTS,
+                )
+            )
+    return attributes
+
+
+def get_retriever_attributes(*, documents: List[Document]) -> Dict[str, AttributeValue]:
+    attributes: Dict[str, AttributeValue] = {}
+    if not isinstance(documents, list):
+        return attributes
+    for index, document in enumerate(documents):
+        attributes.update(
+            _document_attributes(
+                document=document,
+                document_index=index,
+                key_prefix=RETRIEVAL_DOCUMENTS,
+            )
+        )
+    return attributes
+
+
+def _document_attributes(
+    *,
+    document: Document,
+    document_index: str,
+    key_prefix: str,
+) -> Iterator[Tuple[str, AttributeValue]]:
+    if not isinstance(document, dict):
+        return
+    if (content := document.get("content")) is not None:
+        yield f"{key_prefix}.{document_index}.{DOCUMENT_CONTENT}", content
+    if (document_id := document.get("id")) is not None:
+        yield f"{key_prefix}.{document_index}.{DOCUMENT_ID}", document_id
+    if (metadata := document.get("metadata")) is not None:
+        key = f"{key_prefix}.{document_index}.{DOCUMENT_METADATA}"
+        serialized_metadata: str
+        if isinstance(metadata, str):
+            serialized_metadata = metadata
+        else:
+            serialized_metadata = safe_json_dumps(metadata)
+        yield key, serialized_metadata
+    if (score := document.get("score")) is not None:
+        return f"{key_prefix}.{document_index}.{DOCUMENT_SCORE}", score
+
+
+def get_embedding_attributes(
+    *,
+    model_name: Optional[str] = None,
+    embeddings: Optional[List[Embedding]] = None,
+) -> Dict[str, AttributeValue]:
+    attributes: Dict[str, AttributeValue] = {}
+    if model_name is not None:
+        attributes[EMBEDDING_MODEL_NAME] = model_name
+    if isinstance(embeddings, list):
+        for index, embedding in enumerate(embeddings):
+            if (text := embedding.get("text")) is not None:
+                key = f"{EMBEDDING_EMBEDDINGS}.{index}.{EMBEDDING_TEXT}"
+                attributes[key] = text
+            if (vector := embedding.get("vector")) is not None:
+                key = f"{EMBEDDING_EMBEDDINGS}.{index}.{EMBEDDING_VECTOR}"
+                attributes[key] = vector
+    return attributes
+
+
+def get_context_attributes(
+    *,
+    session_id: Optional[str] = None,
+    user_id: Optional[str] = None,
+    metadata: Optional[Union[str, Dict[str, Any]]] = None,
+    tags: Optional[List[str]] = None,
+) -> Dict[str, AttributeValue]:
+    attributes: Dict[str, AttributeValue] = {}
+    if session_id is not None:
+        attributes.update(get_session_attributes(session_id=session_id))
+    if user_id is not None:
+        attributes.update(get_user_id_attributes(user_id=user_id))
+    if metadata is not None:
+        attributes.update(get_metadata_attributes(metadata=metadata))
+    if tags is not None:
+        attributes.update(get_tag_attributes(tags=tags))
+    return attributes
+
+
+def get_session_attributes(*, session_id: str) -> Dict[str, AttributeValue]:
+    return {SESSION_ID: session_id}
+
+
+def get_tag_attributes(*, tags: List[str]) -> Dict[str, AttributeValue]:
+    return {TAG_TAGS: tags}
+
+
+def get_metadata_attributes(*, metadata: Union[str, Dict[str, Any]]) -> Dict[str, AttributeValue]:
+    serialized_metadata: str
+    if isinstance(metadata, str):
+        serialized_metadata = metadata
+    else:
+        serialized_metadata = safe_json_dumps(metadata)
+    return {METADATA: serialized_metadata}
+
+
+def get_user_id_attributes(*, user_id: str) -> Dict[str, AttributeValue]:
+    return {USER_ID: user_id}
+
+
 def get_span_kind_attributes(kind: "OpenInferenceSpanKind", /) -> Dict[str, AttributeValue]:
     normalized_kind = _normalize_openinference_span_kind(kind)
     return {
@@ -350,6 +491,14 @@ def get_llm_token_count_attributes(
             attributes[LLM_TOKEN_COUNT_COMPLETION] = completion
         if (total := token_count.get("total")) is not None:
             attributes[LLM_TOKEN_COUNT_TOTAL] = total
+        if (prompt_details := token_count.get("prompt_details")) is not None:
+            if isinstance(prompt_details, dict):
+                if (cache_write := prompt_details.get("cache_write")) is not None:
+                    attributes[LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE] = cache_write
+                if (cache_read := prompt_details.get("cache_read")) is not None:
+                    attributes[LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ] = cache_read
+                if (audio := prompt_details.get("audio")) is not None:
+                    attributes[LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO] = audio
     return attributes
 
 
@@ -371,25 +520,41 @@ def get_llm_tool_attributes(
     return attributes
 
 
+# document attributes
+DOCUMENT_CONTENT = DocumentAttributes.DOCUMENT_CONTENT
+DOCUMENT_ID = DocumentAttributes.DOCUMENT_ID
+DOCUMENT_METADATA = DocumentAttributes.DOCUMENT_METADATA
+DOCUMENT_SCORE = DocumentAttributes.DOCUMENT_SCORE
+
+# embedding attributes
+EMBEDDING_TEXT = EmbeddingAttributes.EMBEDDING_TEXT
+EMBEDDING_VECTOR = EmbeddingAttributes.EMBEDDING_VECTOR
+
 # image attributes
 IMAGE_URL = ImageAttributes.IMAGE_URL
 
-
 # message attributes
 MESSAGE_CONTENT = MessageAttributes.MESSAGE_CONTENT
 MESSAGE_CONTENTS = MessageAttributes.MESSAGE_CONTENTS
 MESSAGE_ROLE = MessageAttributes.MESSAGE_ROLE
 MESSAGE_TOOL_CALL_ID = MessageAttributes.MESSAGE_TOOL_CALL_ID
 MESSAGE_TOOL_CALLS = MessageAttributes.MESSAGE_TOOL_CALLS
 
-
 # message content attributes
 MESSAGE_CONTENT_IMAGE = MessageContentAttributes.MESSAGE_CONTENT_IMAGE
 MESSAGE_CONTENT_TEXT = MessageContentAttributes.MESSAGE_CONTENT_TEXT
 MESSAGE_CONTENT_TYPE = MessageContentAttributes.MESSAGE_CONTENT_TYPE
 
+# reranker attributes
+RERANKER_INPUT_DOCUMENTS = RerankerAttributes.RERANKER_INPUT_DOCUMENTS
+RERANKER_MODEL_NAME = RerankerAttributes.RERANKER_MODEL_NAME
+RERANKER_OUTPUT_DOCUMENTS = RerankerAttributes.RERANKER_OUTPUT_DOCUMENTS
+RERANKER_QUERY = RerankerAttributes.RERANKER_QUERY
+RERANKER_TOP_K = RerankerAttributes.RERANKER_TOP_K
 
 # span attributes
+EMBEDDING_EMBEDDINGS = SpanAttributes.EMBEDDING_EMBEDDINGS
+EMBEDDING_MODEL_NAME = SpanAttributes.EMBEDDING_MODEL_NAME
 INPUT_MIME_TYPE = SpanAttributes.INPUT_MIME_TYPE
 INPUT_VALUE = SpanAttributes.INPUT_VALUE
 LLM_INPUT_MESSAGES = SpanAttributes.LLM_INPUT_MESSAGES
@@ -400,20 +565,28 @@ def get_llm_tool_attributes(
 LLM_SYSTEM = SpanAttributes.LLM_SYSTEM
 LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION
 LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT
+LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO = SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO
+LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ = SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ
+LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE = (
+    SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE
+)
 LLM_TOKEN_COUNT_TOTAL = SpanAttributes.LLM_TOKEN_COUNT_TOTAL
 LLM_TOOLS = SpanAttributes.LLM_TOOLS
+METADATA = SpanAttributes.METADATA
 OPENINFERENCE_SPAN_KIND = SpanAttributes.OPENINFERENCE_SPAN_KIND
 OUTPUT_MIME_TYPE = SpanAttributes.OUTPUT_MIME_TYPE
 OUTPUT_VALUE = SpanAttributes.OUTPUT_VALUE
+RETRIEVAL_DOCUMENTS = SpanAttributes.RETRIEVAL_DOCUMENTS
+SESSION_ID = SpanAttributes.SESSION_ID
+TAG_TAGS = SpanAttributes.TAG_TAGS
 TOOL_DESCRIPTION = SpanAttributes.TOOL_DESCRIPTION
 TOOL_NAME = SpanAttributes.TOOL_NAME
 TOOL_PARAMETERS = SpanAttributes.TOOL_PARAMETERS
-
+USER_ID = SpanAttributes.USER_ID
 
 # tool attributes
 TOOL_JSON_SCHEMA = ToolAttributes.TOOL_JSON_SCHEMA
 
-
 # tool call attributes
 TOOL_CALL_FUNCTION_ARGUMENTS_JSON = ToolCallAttributes.TOOL_CALL_FUNCTION_ARGUMENTS_JSON
 TOOL_CALL_FUNCTION_NAME = ToolCallAttributes.TOOL_CALL_FUNCTION_NAME