rerankers

axiomofjoy · axiomofjoy · commit bc3e5b6c9b27 · 2025-05-10T14:17:52.000-07:00
diff --git a/python/openinference-instrumentation/src/openinference/instrumentation/_attributes.py b/python/openinference-instrumentation/src/openinference/instrumentation/_attributes.py
@@ -29,6 +29,7 @@
     OpenInferenceLLMSystemValues,
     OpenInferenceMimeTypeValues,
     OpenInferenceSpanKindValues,
+    RerankerAttributes,
     SpanAttributes,
     ToolAttributes,
     ToolCallAttributes,
@@ -57,31 +58,81 @@
     from _typeshed import DataclassInstance
 
 
+def get_reranker_attributes(
+    *,
+    query: Optional[str] = None,
+    model_name: Optional[str] = None,
+    input_documents: Optional[List[Document]] = None,
+    output_documents: Optional[List[Document]] = None,
+    top_k: Optional[int] = None,
+) -> Dict[str, AttributeValue]:
+    attributes: Dict[str, AttributeValue] = {}
+    if query is not None:
+        attributes[RERANKER_QUERY] = query
+    if model_name is not None:
+        attributes[RERANKER_MODEL_NAME] = model_name
+    if top_k is not None:
+        attributes[RERANKER_TOP_K] = top_k
+    if isinstance(input_documents, list):
+        for index, document in enumerate(input_documents):
+            attributes.update(
+                _document_attributes(
+                    document=document,
+                    document_index=index,
+                    key_prefix=RERANKER_INPUT_DOCUMENTS,
+                )
+            )
+    if isinstance(output_documents, list):
+        for index, document in enumerate(output_documents):
+            attributes.update(
+                _document_attributes(
+                    document=document,
+                    document_index=index,
+                    key_prefix=RERANKER_OUTPUT_DOCUMENTS,
+                )
+            )
+    return attributes
+
+
 def get_retriever_attributes(*, documents: List[Document]) -> Dict[str, AttributeValue]:
     attributes: Dict[str, AttributeValue] = {}
+    if not isinstance(documents, list):
+        return attributes
     for index, document in enumerate(documents):
-        if not isinstance(document, dict):
-            continue
-        if (content := document.get("content")) is not None:
-            key = f"{RETRIEVAL_DOCUMENTS}.{index}.{DOCUMENT_CONTENT}"
-            attributes[key] = content
-        if (document_id := document.get("id")) is not None:
-            key = f"{RETRIEVAL_DOCUMENTS}.{index}.{DOCUMENT_ID}"
-            attributes[key] = document_id
-        if (metadata := document.get("metadata")) is not None:
-            key = f"{RETRIEVAL_DOCUMENTS}.{index}.{DOCUMENT_METADATA}"
-            serialized_metadata: str
-            if isinstance(metadata, str):
-                serialized_metadata = metadata
-            else:
-                serialized_metadata = safe_json_dumps(metadata)
-            attributes[key] = serialized_metadata
-        if (score := document.get("score")) is not None:
-            key = f"{RETRIEVAL_DOCUMENTS}.{index}.{DOCUMENT_SCORE}"
-            attributes[key] = score
+        attributes.update(
+            _document_attributes(
+                document=document,
+                document_index=index,
+                key_prefix=RETRIEVAL_DOCUMENTS,
+            )
+        )
     return attributes
 
 
+def _document_attributes(
+    *,
+    document: Document,
+    document_index: str,
+    key_prefix: str,
+) -> Iterator[Tuple[str, AttributeValue]]:
+    if not isinstance(document, dict):
+        return
+    if (content := document.get("content")) is not None:
+        yield f"{key_prefix}.{document_index}.{DOCUMENT_CONTENT}", content
+    if (document_id := document.get("id")) is not None:
+        yield f"{key_prefix}.{document_index}.{DOCUMENT_ID}", document_id
+    if (metadata := document.get("metadata")) is not None:
+        key = f"{key_prefix}.{document_index}.{DOCUMENT_METADATA}"
+        serialized_metadata: str
+        if isinstance(metadata, str):
+            serialized_metadata = metadata
+        else:
+            serialized_metadata = safe_json_dumps(metadata)
+        yield key, serialized_metadata
+    if (score := document.get("score")) is not None:
+        return f"{key_prefix}.{document_index}.{DOCUMENT_SCORE}", score
+
+
 def get_embedding_attributes(
     *,
     model_name: Optional[str] = None,
@@ -474,20 +525,24 @@ def get_llm_tool_attributes(
 # image attributes
 IMAGE_URL = ImageAttributes.IMAGE_URL
 
-
 # message attributes
 MESSAGE_CONTENT = MessageAttributes.MESSAGE_CONTENT
 MESSAGE_CONTENTS = MessageAttributes.MESSAGE_CONTENTS
 MESSAGE_ROLE = MessageAttributes.MESSAGE_ROLE
 MESSAGE_TOOL_CALL_ID = MessageAttributes.MESSAGE_TOOL_CALL_ID
 MESSAGE_TOOL_CALLS = MessageAttributes.MESSAGE_TOOL_CALLS
 
-
 # message content attributes
 MESSAGE_CONTENT_IMAGE = MessageContentAttributes.MESSAGE_CONTENT_IMAGE
 MESSAGE_CONTENT_TEXT = MessageContentAttributes.MESSAGE_CONTENT_TEXT
 MESSAGE_CONTENT_TYPE = MessageContentAttributes.MESSAGE_CONTENT_TYPE
 
+# reranker attributes
+RERANKER_INPUT_DOCUMENTS = RerankerAttributes.RERANKER_INPUT_DOCUMENTS
+RERANKER_MODEL_NAME = RerankerAttributes.RERANKER_MODEL_NAME
+RERANKER_OUTPUT_DOCUMENTS = RerankerAttributes.RERANKER_OUTPUT_DOCUMENTS
+RERANKER_QUERY = RerankerAttributes.RERANKER_QUERY
+RERANKER_TOP_K = RerankerAttributes.RERANKER_TOP_K
 
 # span attributes
 EMBEDDING_EMBEDDINGS = SpanAttributes.EMBEDDING_EMBEDDINGS
@@ -516,11 +571,9 @@ def get_llm_tool_attributes(
 TOOL_PARAMETERS = SpanAttributes.TOOL_PARAMETERS
 USER_ID = SpanAttributes.USER_ID
 
-
 # tool attributes
 TOOL_JSON_SCHEMA = ToolAttributes.TOOL_JSON_SCHEMA
 
-
 # tool call attributes
 TOOL_CALL_FUNCTION_ARGUMENTS_JSON = ToolCallAttributes.TOOL_CALL_FUNCTION_ARGUMENTS_JSON
 TOOL_CALL_FUNCTION_NAME = ToolCallAttributes.TOOL_CALL_FUNCTION_NAME