Skip to content

Commit aaf5655

Browse files
RogerHYangGeLi2001
authored andcommitted
fix: allow context.span_id as column name (#7368)
1 parent 62c7f00 commit aaf5655

File tree

2 files changed

+116
-22
lines changed

2 files changed

+116
-22
lines changed

packages/phoenix-client/src/phoenix/client/resources/annotations/__init__.py

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -625,20 +625,31 @@ def _validate_dataframe(
625625
raise ValueError(f"{name_column} values must be strings")
626626

627627
# Check for span_id in either columns or index
628-
if "span_id" not in dataframe.columns and not all(isinstance(x, str) for x in dataframe.index): # pyright: ignore[reportUnknownVariableType,reportUnknownMemberType]
629-
raise ValueError("DataFrame must have either a 'span_id' column or a string-based index")
628+
has_span_id = "span_id" in dataframe.columns
629+
has_context_span_id = "context.span_id" in dataframe.columns
630+
if has_span_id and has_context_span_id:
631+
raise ValueError("DataFrame cannot have both 'span_id' and 'context.span_id' columns")
632+
if (
633+
not has_span_id
634+
and not has_context_span_id
635+
and not all(isinstance(x, str) for x in dataframe.index) # pyright: ignore[reportUnknownVariableType,reportUnknownMemberType]
636+
):
637+
raise ValueError(
638+
"DataFrame must have either a 'span_id' or 'context.span_id' column, or a string-based index" # noqa: E501
639+
)
630640

631641
# Validate span_id values if using column
632-
if "span_id" in dataframe.columns:
642+
span_id_column = "context.span_id" if has_context_span_id else "span_id"
643+
if span_id_column in dataframe.columns:
633644
# Check for None values
634-
if dataframe["span_id"].isna().any(): # pyright: ignore[reportUnknownMemberType]
635-
raise ValueError("span_id values cannot be None")
645+
if dataframe[span_id_column].isna().any(): # pyright: ignore[reportUnknownMemberType]
646+
raise ValueError(f"{span_id_column} values cannot be None")
636647
# Check for empty or whitespace-only strings
637-
if (dataframe["span_id"].str.strip() == "").any(): # pyright: ignore[reportUnknownMemberType]
638-
raise ValueError("span_id values must be non-empty strings")
648+
if (dataframe[span_id_column].str.strip() == "").any(): # pyright: ignore[reportUnknownMemberType]
649+
raise ValueError(f"{span_id_column} values must be non-empty strings")
639650
# Check for non-string values
640-
if not all(isinstance(x, str) for x in dataframe["span_id"]): # pyright: ignore[reportUnknownVariableType,reportUnknownMemberType]
641-
raise ValueError("span_id values must be strings")
651+
if not all(isinstance(x, str) for x in dataframe[span_id_column]): # pyright: ignore[reportUnknownVariableType,reportUnknownMemberType]
652+
raise ValueError(f"{span_id_column} values must be strings")
642653
# Validate index values if using index as span_id
643654
else:
644655
# Check for empty or whitespace-only strings
@@ -723,6 +734,8 @@ def _chunk_dataframe(
723734
span_id = (
724735
str(row["span_id"]) # pyright: ignore[reportUnknownArgumentType]
725736
if "span_id" in dataframe.columns and bool(row["span_id"]) # pyright: ignore[reportUnknownArgumentType]
737+
else str(row["context.span_id"]) # pyright: ignore[reportUnknownArgumentType]
738+
if "context.span_id" in dataframe.columns and bool(row["context.span_id"]) # pyright: ignore[reportUnknownArgumentType]
726739
else str(idx)
727740
)
728741

packages/phoenix-client/tests/client/resources/annotations/test_annotations.py

Lines changed: 94 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -94,10 +94,72 @@ def test_missing_span_id(self) -> None:
9494
df = pd.DataFrame({"name": ["sentiment"], "annotator_kind": ["HUMAN"]})
9595
with pytest.raises(
9696
ValueError,
97-
match="DataFrame must have either a 'span_id' column or a string-based index",
97+
match="DataFrame must have either a 'span_id' or 'context.span_id' column, or a string-based index", # noqa: E501
9898
):
9999
_validate_dataframe(dataframe=df)
100100

101+
def test_both_span_id_columns(self) -> None:
102+
"""Test validation when both span_id and context.span_id columns are present."""
103+
df = pd.DataFrame(
104+
{
105+
"name": ["sentiment"],
106+
"annotator_kind": ["HUMAN"],
107+
"span_id": ["span1"],
108+
"context.span_id": ["span1"],
109+
}
110+
)
111+
with pytest.raises(
112+
ValueError, match="DataFrame cannot have both 'span_id' and 'context.span_id' columns"
113+
):
114+
_validate_dataframe(dataframe=df)
115+
116+
def test_valid_with_context_span_id(self) -> None:
117+
"""Test validation with valid DataFrame using context.span_id column."""
118+
df = pd.DataFrame(
119+
{
120+
"name": ["sentiment"],
121+
"annotator_kind": ["HUMAN"],
122+
"context.span_id": ["span1"],
123+
}
124+
)
125+
_validate_dataframe(dataframe=df) # Should not raise
126+
127+
def test_invalid_context_span_id_values(self) -> None:
128+
"""Test validation with invalid context.span_id values."""
129+
df = pd.DataFrame(
130+
{
131+
"name": ["sentiment", "sentiment"],
132+
"annotator_kind": ["HUMAN", "HUMAN"],
133+
"context.span_id": ["", " "], # Empty strings
134+
}
135+
)
136+
with pytest.raises(ValueError, match="context.span_id values must be non-empty strings"):
137+
_validate_dataframe(dataframe=df)
138+
139+
def test_none_context_span_id_values(self) -> None:
140+
"""Test validation with None values in context.span_id column."""
141+
df = pd.DataFrame(
142+
{
143+
"name": ["sentiment", "sentiment"],
144+
"annotator_kind": ["HUMAN", "HUMAN"],
145+
"context.span_id": [None, "valid_id"], # None value
146+
}
147+
)
148+
with pytest.raises(ValueError, match="context.span_id values cannot be None"):
149+
_validate_dataframe(dataframe=df)
150+
151+
def test_non_string_context_span_id(self) -> None:
152+
"""Test validation with non-string values in context.span_id column."""
153+
df = pd.DataFrame(
154+
{
155+
"name": ["sentiment", "sentiment"],
156+
"annotator_kind": ["HUMAN", "HUMAN"],
157+
"context.span_id": [123, "valid_id"], # Non-string value
158+
}
159+
)
160+
with pytest.raises(ValueError, match="context.span_id values must be strings"):
161+
_validate_dataframe(dataframe=df)
162+
101163
def test_valid_with_index(self) -> None:
102164
"""Test validation with valid DataFrame using index as span_id."""
103165
df = pd.DataFrame({"name": ["sentiment"], "annotator_kind": ["HUMAN"]}, index=["span1"])
@@ -153,18 +215,6 @@ def test_invalid_index_values(self) -> None:
153215
):
154216
_validate_dataframe(dataframe=df)
155217

156-
def test_none_span_id_values(self) -> None:
157-
"""Test validation with None values in span_id column."""
158-
df = pd.DataFrame(
159-
{
160-
"name": ["sentiment", "sentiment"],
161-
"annotator_kind": ["HUMAN", "HUMAN"],
162-
"span_id": [None, "valid_id"], # None value
163-
}
164-
)
165-
with pytest.raises(ValueError, match="span_id values cannot be None"):
166-
_validate_dataframe(dataframe=df)
167-
168218
def test_mixed_valid_invalid_values(self) -> None:
169219
"""Test validation with mixed valid and invalid values in columns."""
170220
df = pd.DataFrame(
@@ -383,3 +433,34 @@ def test_invalid_score_type(self) -> None:
383433
match="Error processing row 0: Score value 'not_a_number' cannot be converted to float",
384434
):
385435
list(_chunk_dataframe(dataframe=df))
436+
437+
def test_chunk_with_context_span_id(self) -> None:
438+
"""Test chunking with context.span_id column."""
439+
df = pd.DataFrame(
440+
{
441+
"name": ["test1", "test2"],
442+
"annotator_kind": ["HUMAN", "HUMAN"],
443+
"context.span_id": ["id1", "id2"],
444+
"label": ["label1", "label2"],
445+
}
446+
)
447+
chunks = list(_chunk_dataframe(dataframe=df))
448+
assert len(chunks) == 1
449+
assert chunks[0][0]["span_id"] == "id1"
450+
assert chunks[0][1]["span_id"] == "id2"
451+
452+
def test_chunk_with_both_span_id_columns(self) -> None:
453+
"""Test chunking with both span_id and context.span_id columns."""
454+
df = pd.DataFrame(
455+
{
456+
"name": ["test1", "test2"],
457+
"annotator_kind": ["HUMAN", "HUMAN"],
458+
"span_id": ["id1", "id2"],
459+
"context.span_id": ["id1", "id2"],
460+
"label": ["label1", "label2"],
461+
}
462+
)
463+
with pytest.raises(
464+
ValueError, match="DataFrame cannot have both 'span_id' and 'context.span_id' columns"
465+
):
466+
list(_chunk_dataframe(dataframe=df))

0 commit comments

Comments
 (0)