Skip to content

feat(instrumentation): add helpers for remaining semantic conventions #1606

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.20"
"version": "3.9.18"
}
},
"nbformat": 4,
Expand Down
20 changes: 10 additions & 10 deletions python/openinference-instrumentation/examples/tracer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -722,31 +722,31 @@
" }\n",
"\n",
"\n",
"def to_oi_message(message: AnthropicMessageParam) -> OIMessage:\n",
"def to_oi_message(message: AnthropicMessageParam) -> oi.Message:\n",
" role = message[\"role\"]\n",
" content = message[\"content\"]\n",
" if isinstance(content, str):\n",
" return OIMessage(role=role, content=content)\n",
" return oi.Message(role=role, content=content)\n",
"\n",
" contents: List[OIMessageContent] = []\n",
" contents: List[oi.MessageContent] = []\n",
" for content_block in content:\n",
" if not isinstance(content_block, dict):\n",
" raise NotImplementedError(\"Only typed dict message params are supported\")\n",
" if (content_type := content_block[\"type\"]) == \"text\":\n",
" assert isinstance(text := content_block.get(\"text\"), str)\n",
" contents.append(OITextMessageContent(type=\"text\", text=text))\n",
" contents.append(oi.TextMessageContent(type=\"text\", text=text))\n",
" elif content_type == \"image\":\n",
" assert isinstance(source := content_block.get(\"source\"), dict)\n",
" assert isinstance(url := source.get(\"url\"), str)\n",
" contents.append(\n",
" OIImageMessageContent(\n",
" oi.ImageMessageContent(\n",
" type=\"image\",\n",
" image=OIImage(url=url),\n",
" image=oi.Image(url=url),\n",
" )\n",
" )\n",
" else:\n",
" raise NotImplementedError(\"Only text and image message content blocks are supported\")\n",
" return OIMessage(role=role, contents=contents)\n",
" return oi.Message(role=role, contents=contents)\n",
"\n",
"\n",
"def process_output(message: AnthropicMessage) -> Dict[str, Any]:\n",
Expand Down Expand Up @@ -815,7 +815,7 @@
"def get_attributes_from_generator_outputs(outputs: List[ChatCompletionChunk]) -> Attributes:\n",
" role: Optional[str] = None\n",
" content = \"\"\n",
" oi_token_count = OITokenCount()\n",
" oi_token_count = oi.TokenCount()\n",
" for chunk in outputs:\n",
" if choices := chunk.choices:\n",
" assert len(choices) == 1\n",
Expand All @@ -831,7 +831,7 @@
" oi_token_count[\"completion\"] = completion_tokens\n",
" oi_messages = []\n",
" if role and content:\n",
" oi_messages.append(OIMessage(role=role, content=content))\n",
" oi_messages.append(oi.Message(role=role, content=content))\n",
" return {\n",
" **get_llm_attributes(\n",
" output_messages=oi_messages,\n",
Expand Down Expand Up @@ -1475,7 +1475,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.20"
"version": "3.9.18"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
ImageMessageContent,
Message,
MessageContent,
PromptDetails,
TextMessageContent,
TokenCount,
Tool,
Expand Down Expand Up @@ -78,6 +79,7 @@
"ImageMessageContent",
"Message",
"MessageContent",
"PromptDetails",
"TextMessageContent",
"TokenCount",
"Tool",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
Any,
Dict,
Iterator,
List,
Literal,
Optional,
Tuple,
Expand All @@ -19,19 +20,24 @@
from typing_extensions import TypeGuard

from openinference.semconv.trace import (
DocumentAttributes,
EmbeddingAttributes,
ImageAttributes,
MessageAttributes,
MessageContentAttributes,
OpenInferenceLLMProviderValues,
OpenInferenceLLMSystemValues,
OpenInferenceMimeTypeValues,
OpenInferenceSpanKindValues,
RerankerAttributes,
SpanAttributes,
ToolAttributes,
ToolCallAttributes,
)

from ._types import (
Document,
Embedding,
Message,
OpenInferenceLLMProvider,
OpenInferenceLLMSystem,
Expand All @@ -40,6 +46,7 @@
TokenCount,
Tool,
)
from .helpers import safe_json_dumps

pydantic: Optional[ModuleType]
try:
Expand All @@ -51,6 +58,140 @@
from _typeshed import DataclassInstance


def get_reranker_attributes(
*,
query: Optional[str] = None,
model_name: Optional[str] = None,
input_documents: Optional[List[Document]] = None,
output_documents: Optional[List[Document]] = None,
top_k: Optional[int] = None,
) -> Dict[str, AttributeValue]:
Comment on lines +61 to +68
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add docstrings

attributes: Dict[str, AttributeValue] = {}
if query is not None:
attributes[RERANKER_QUERY] = query
if model_name is not None:
attributes[RERANKER_MODEL_NAME] = model_name
if top_k is not None:
attributes[RERANKER_TOP_K] = top_k
if isinstance(input_documents, list):
for index, document in enumerate(input_documents):
attributes.update(
_document_attributes(
document=document,
document_index=index,
key_prefix=RERANKER_INPUT_DOCUMENTS,
)
)
if isinstance(output_documents, list):
for index, document in enumerate(output_documents):
attributes.update(
_document_attributes(
document=document,
document_index=index,
key_prefix=RERANKER_OUTPUT_DOCUMENTS,
)
)
return attributes


def get_retriever_attributes(*, documents: List[Document]) -> Dict[str, AttributeValue]:
attributes: Dict[str, AttributeValue] = {}
if not isinstance(documents, list):
return attributes
for index, document in enumerate(documents):
attributes.update(
_document_attributes(
document=document,
document_index=index,
key_prefix=RETRIEVAL_DOCUMENTS,
)
)
return attributes


def _document_attributes(
*,
document: Document,
document_index: str,
key_prefix: str,
) -> Iterator[Tuple[str, AttributeValue]]:
if not isinstance(document, dict):
return
if (content := document.get("content")) is not None:
yield f"{key_prefix}.{document_index}.{DOCUMENT_CONTENT}", content
if (document_id := document.get("id")) is not None:
yield f"{key_prefix}.{document_index}.{DOCUMENT_ID}", document_id
if (metadata := document.get("metadata")) is not None:
key = f"{key_prefix}.{document_index}.{DOCUMENT_METADATA}"
serialized_metadata: str
if isinstance(metadata, str):
serialized_metadata = metadata
else:
serialized_metadata = safe_json_dumps(metadata)
yield key, serialized_metadata
if (score := document.get("score")) is not None:
return f"{key_prefix}.{document_index}.{DOCUMENT_SCORE}", score


def get_embedding_attributes(
*,
model_name: Optional[str] = None,
embeddings: Optional[List[Embedding]] = None,
) -> Dict[str, AttributeValue]:
attributes: Dict[str, AttributeValue] = {}
if model_name is not None:
attributes[EMBEDDING_MODEL_NAME] = model_name
if isinstance(embeddings, list):
for index, embedding in enumerate(embeddings):
if (text := embedding.get("text")) is not None:
key = f"{EMBEDDING_EMBEDDINGS}.{index}.{EMBEDDING_TEXT}"
attributes[key] = text
if (vector := embedding.get("vector")) is not None:
key = f"{EMBEDDING_EMBEDDINGS}.{index}.{EMBEDDING_VECTOR}"
attributes[key] = vector
return attributes


def get_context_attributes(
*,
session_id: Optional[str] = None,
user_id: Optional[str] = None,
metadata: Optional[Union[str, Dict[str, Any]]] = None,
tags: Optional[List[str]] = None,
) -> Dict[str, AttributeValue]:
attributes: Dict[str, AttributeValue] = {}
if session_id is not None:
attributes.update(get_session_attributes(session_id=session_id))
if user_id is not None:
attributes.update(get_user_id_attributes(user_id=user_id))
if metadata is not None:
attributes.update(get_metadata_attributes(metadata=metadata))
if tags is not None:
attributes.update(get_tag_attributes(tags=tags))
return attributes


def get_session_attributes(*, session_id: str) -> Dict[str, AttributeValue]:
return {SESSION_ID: session_id}


def get_tag_attributes(*, tags: List[str]) -> Dict[str, AttributeValue]:
return {TAG_TAGS: tags}


def get_metadata_attributes(*, metadata: Union[str, Dict[str, Any]]) -> Dict[str, AttributeValue]:
serialized_metadata: str
if isinstance(metadata, str):
serialized_metadata = metadata
else:
serialized_metadata = safe_json_dumps(metadata)
return {METADATA: serialized_metadata}


def get_user_id_attributes(*, user_id: str) -> Dict[str, AttributeValue]:
return {USER_ID: user_id}


def get_span_kind_attributes(kind: "OpenInferenceSpanKind", /) -> Dict[str, AttributeValue]:
normalized_kind = _normalize_openinference_span_kind(kind)
return {
Expand Down Expand Up @@ -350,6 +491,14 @@ def get_llm_token_count_attributes(
attributes[LLM_TOKEN_COUNT_COMPLETION] = completion
if (total := token_count.get("total")) is not None:
attributes[LLM_TOKEN_COUNT_TOTAL] = total
if (prompt_details := token_count.get("prompt_details")) is not None:
if isinstance(prompt_details, dict):
if (cache_write := prompt_details.get("cache_write")) is not None:
attributes[LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE] = cache_write
if (cache_read := prompt_details.get("cache_read")) is not None:
attributes[LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ] = cache_read
if (audio := prompt_details.get("audio")) is not None:
attributes[LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO] = audio
return attributes


Expand All @@ -371,25 +520,41 @@ def get_llm_tool_attributes(
return attributes


# document attributes
DOCUMENT_CONTENT = DocumentAttributes.DOCUMENT_CONTENT
DOCUMENT_ID = DocumentAttributes.DOCUMENT_ID
DOCUMENT_METADATA = DocumentAttributes.DOCUMENT_METADATA
DOCUMENT_SCORE = DocumentAttributes.DOCUMENT_SCORE

# embedding attributes
EMBEDDING_TEXT = EmbeddingAttributes.EMBEDDING_TEXT
EMBEDDING_VECTOR = EmbeddingAttributes.EMBEDDING_VECTOR

# image attributes
IMAGE_URL = ImageAttributes.IMAGE_URL


# message attributes
MESSAGE_CONTENT = MessageAttributes.MESSAGE_CONTENT
MESSAGE_CONTENTS = MessageAttributes.MESSAGE_CONTENTS
MESSAGE_ROLE = MessageAttributes.MESSAGE_ROLE
MESSAGE_TOOL_CALL_ID = MessageAttributes.MESSAGE_TOOL_CALL_ID
MESSAGE_TOOL_CALLS = MessageAttributes.MESSAGE_TOOL_CALLS


# message content attributes
MESSAGE_CONTENT_IMAGE = MessageContentAttributes.MESSAGE_CONTENT_IMAGE
MESSAGE_CONTENT_TEXT = MessageContentAttributes.MESSAGE_CONTENT_TEXT
MESSAGE_CONTENT_TYPE = MessageContentAttributes.MESSAGE_CONTENT_TYPE

# reranker attributes
RERANKER_INPUT_DOCUMENTS = RerankerAttributes.RERANKER_INPUT_DOCUMENTS
RERANKER_MODEL_NAME = RerankerAttributes.RERANKER_MODEL_NAME
RERANKER_OUTPUT_DOCUMENTS = RerankerAttributes.RERANKER_OUTPUT_DOCUMENTS
RERANKER_QUERY = RerankerAttributes.RERANKER_QUERY
RERANKER_TOP_K = RerankerAttributes.RERANKER_TOP_K

# span attributes
EMBEDDING_EMBEDDINGS = SpanAttributes.EMBEDDING_EMBEDDINGS
EMBEDDING_MODEL_NAME = SpanAttributes.EMBEDDING_MODEL_NAME
INPUT_MIME_TYPE = SpanAttributes.INPUT_MIME_TYPE
INPUT_VALUE = SpanAttributes.INPUT_VALUE
LLM_INPUT_MESSAGES = SpanAttributes.LLM_INPUT_MESSAGES
Expand All @@ -400,20 +565,28 @@ def get_llm_tool_attributes(
LLM_SYSTEM = SpanAttributes.LLM_SYSTEM
LLM_TOKEN_COUNT_COMPLETION = SpanAttributes.LLM_TOKEN_COUNT_COMPLETION
LLM_TOKEN_COUNT_PROMPT = SpanAttributes.LLM_TOKEN_COUNT_PROMPT
LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO = SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_AUDIO
LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ = SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_READ
LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE = (
SpanAttributes.LLM_TOKEN_COUNT_PROMPT_DETAILS_CACHE_WRITE
)
LLM_TOKEN_COUNT_TOTAL = SpanAttributes.LLM_TOKEN_COUNT_TOTAL
LLM_TOOLS = SpanAttributes.LLM_TOOLS
METADATA = SpanAttributes.METADATA
OPENINFERENCE_SPAN_KIND = SpanAttributes.OPENINFERENCE_SPAN_KIND
OUTPUT_MIME_TYPE = SpanAttributes.OUTPUT_MIME_TYPE
OUTPUT_VALUE = SpanAttributes.OUTPUT_VALUE
RETRIEVAL_DOCUMENTS = SpanAttributes.RETRIEVAL_DOCUMENTS
SESSION_ID = SpanAttributes.SESSION_ID
TAG_TAGS = SpanAttributes.TAG_TAGS
TOOL_DESCRIPTION = SpanAttributes.TOOL_DESCRIPTION
TOOL_NAME = SpanAttributes.TOOL_NAME
TOOL_PARAMETERS = SpanAttributes.TOOL_PARAMETERS

USER_ID = SpanAttributes.USER_ID

# tool attributes
TOOL_JSON_SCHEMA = ToolAttributes.TOOL_JSON_SCHEMA


# tool call attributes
TOOL_CALL_FUNCTION_ARGUMENTS_JSON = ToolCallAttributes.TOOL_CALL_FUNCTION_ARGUMENTS_JSON
TOOL_CALL_FUNCTION_NAME = ToolCallAttributes.TOOL_CALL_FUNCTION_NAME
Expand Down
Loading
Loading