Skip to content

Commit 0c78e2a

Browse files
committed
retriever attributes
1 parent 6d9b34e commit 0c78e2a

File tree

2 files changed

+53
-12
lines changed

2 files changed

+53
-12
lines changed

python/openinference-instrumentation/src/openinference/instrumentation/_attributes.py

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from typing_extensions import TypeGuard
2121

2222
from openinference.semconv.trace import (
23+
DocumentAttributes,
2324
EmbeddingAttributes,
2425
ImageAttributes,
2526
MessageAttributes,
@@ -34,6 +35,7 @@
3435
)
3536

3637
from ._types import (
38+
Document,
3739
Embedding,
3840
Message,
3941
OpenInferenceLLMProvider,
@@ -55,23 +57,48 @@
5557
from _typeshed import DataclassInstance
5658

5759

60+
def get_retriever_attributes(*, documents: List[Document]) -> Dict[str, AttributeValue]:
61+
attributes: Dict[str, AttributeValue] = {}
62+
for index, document in enumerate(documents):
63+
if not isinstance(document, dict):
64+
continue
65+
if (content := document.get("content")) is not None:
66+
key = f"{RETRIEVAL_DOCUMENTS}.{index}.{DOCUMENT_CONTENT}"
67+
attributes[key] = content
68+
if (document_id := document.get("id")) is not None:
69+
key = f"{RETRIEVAL_DOCUMENTS}.{index}.{DOCUMENT_ID}"
70+
attributes[key] = document_id
71+
if (metadata := document.get("metadata")) is not None:
72+
key = f"{RETRIEVAL_DOCUMENTS}.{index}.{DOCUMENT_METADATA}"
73+
serialized_metadata: str
74+
if isinstance(metadata, str):
75+
serialized_metadata = metadata
76+
else:
77+
serialized_metadata = safe_json_dumps(metadata)
78+
attributes[key] = serialized_metadata
79+
if (score := document.get("score")) is not None:
80+
key = f"{RETRIEVAL_DOCUMENTS}.{index}.{DOCUMENT_SCORE}"
81+
attributes[key] = score
82+
return attributes
83+
84+
5885
def get_embedding_attributes(
5986
*,
6087
model_name: Optional[str] = None,
6188
embeddings: Optional[List[Embedding]] = None,
62-
) -> Dict[str, Any]:
63-
embedding_attributes: Dict[str, AttributeValue] = {}
89+
) -> Dict[str, AttributeValue]:
90+
attributes: Dict[str, AttributeValue] = {}
6491
if model_name is not None:
65-
embedding_attributes[EMBEDDING_MODEL_NAME] = model_name
92+
attributes[EMBEDDING_MODEL_NAME] = model_name
6693
if isinstance(embeddings, list):
6794
for index, embedding in enumerate(embeddings):
6895
if (text := embedding.get("text")) is not None:
6996
key = f"{EMBEDDING_EMBEDDINGS}.{index}.{EMBEDDING_TEXT}"
70-
embedding_attributes[key] = text
97+
attributes[key] = text
7198
if (vector := embedding.get("vector")) is not None:
7299
key = f"{EMBEDDING_EMBEDDINGS}.{index}.{EMBEDDING_VECTOR}"
73-
embedding_attributes[key] = vector
74-
return embedding_attributes
100+
attributes[key] = vector
101+
return attributes
75102

76103

77104
def get_context_attributes(
@@ -81,16 +108,16 @@ def get_context_attributes(
81108
metadata: Optional[Union[str, Dict[str, Any]]] = None,
82109
tags: Optional[List[str]] = None,
83110
) -> Dict[str, AttributeValue]:
84-
context_attributes: Dict[str, AttributeValue] = {}
111+
attributes: Dict[str, AttributeValue] = {}
85112
if session_id is not None:
86-
context_attributes.update(get_session_attributes(session_id=session_id))
113+
attributes.update(get_session_attributes(session_id=session_id))
87114
if user_id is not None:
88-
context_attributes.update(get_user_id(user_id=user_id))
115+
attributes.update(get_user_id(user_id=user_id))
89116
if metadata is not None:
90-
context_attributes.update(get_metadata_attributes(metadata=metadata))
117+
attributes.update(get_metadata_attributes(metadata=metadata))
91118
if tags is not None:
92-
context_attributes.update(get_tag_attributes(tags=tags))
93-
return context_attributes
119+
attributes.update(get_tag_attributes(tags=tags))
120+
return attributes
94121

95122

96123
def get_session_attributes(*, session_id: str) -> Dict[str, AttributeValue]:
@@ -434,6 +461,12 @@ def get_llm_tool_attributes(
434461
return attributes
435462

436463

464+
# document attributes
465+
DOCUMENT_CONTENT = DocumentAttributes.DOCUMENT_CONTENT
466+
DOCUMENT_ID = DocumentAttributes.DOCUMENT_ID
467+
DOCUMENT_METADATA = DocumentAttributes.DOCUMENT_METADATA
468+
DOCUMENT_SCORE = DocumentAttributes.DOCUMENT_SCORE
469+
437470
# embedding attributes
438471
EMBEDDING_TEXT = EmbeddingAttributes.EMBEDDING_TEXT
439472
EMBEDDING_VECTOR = EmbeddingAttributes.EMBEDDING_VECTOR
@@ -475,6 +508,7 @@ def get_llm_tool_attributes(
475508
OPENINFERENCE_SPAN_KIND = SpanAttributes.OPENINFERENCE_SPAN_KIND
476509
OUTPUT_MIME_TYPE = SpanAttributes.OUTPUT_MIME_TYPE
477510
OUTPUT_VALUE = SpanAttributes.OUTPUT_VALUE
511+
RETRIEVAL_DOCUMENTS = SpanAttributes.RETRIEVAL_DOCUMENTS
478512
SESSION_ID = SpanAttributes.SESSION_ID
479513
TAG_TAGS = SpanAttributes.TAG_TAGS
480514
TOOL_DESCRIPTION = SpanAttributes.TOOL_DESCRIPTION

python/openinference-instrumentation/src/openinference/instrumentation/_types.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,10 @@ class Tool(TypedDict, total=False):
8181
class Embedding(TypedDict, total=False):
8282
text: str
8383
vector: List[float]
84+
85+
86+
class Document(TypedDict, total=False):
87+
content: str
88+
id: Union[str, int]
89+
metadata: Union[str, Dict[str, Any]]
90+
score: float

0 commit comments

Comments
 (0)