@@ -94,10 +94,72 @@ def test_missing_span_id(self) -> None:
94
94
df = pd .DataFrame ({"name" : ["sentiment" ], "annotator_kind" : ["HUMAN" ]})
95
95
with pytest .raises (
96
96
ValueError ,
97
- match = "DataFrame must have either a 'span_id' column or a string-based index" ,
97
+ match = "DataFrame must have either a 'span_id' or 'context.span_id' column, or a string-based index" , # noqa: E501
98
98
):
99
99
_validate_dataframe (dataframe = df )
100
100
101
+ def test_both_span_id_columns (self ) -> None :
102
+ """Test validation when both span_id and context.span_id columns are present."""
103
+ df = pd .DataFrame (
104
+ {
105
+ "name" : ["sentiment" ],
106
+ "annotator_kind" : ["HUMAN" ],
107
+ "span_id" : ["span1" ],
108
+ "context.span_id" : ["span1" ],
109
+ }
110
+ )
111
+ with pytest .raises (
112
+ ValueError , match = "DataFrame cannot have both 'span_id' and 'context.span_id' columns"
113
+ ):
114
+ _validate_dataframe (dataframe = df )
115
+
116
+ def test_valid_with_context_span_id (self ) -> None :
117
+ """Test validation with valid DataFrame using context.span_id column."""
118
+ df = pd .DataFrame (
119
+ {
120
+ "name" : ["sentiment" ],
121
+ "annotator_kind" : ["HUMAN" ],
122
+ "context.span_id" : ["span1" ],
123
+ }
124
+ )
125
+ _validate_dataframe (dataframe = df ) # Should not raise
126
+
127
+ def test_invalid_context_span_id_values (self ) -> None :
128
+ """Test validation with invalid context.span_id values."""
129
+ df = pd .DataFrame (
130
+ {
131
+ "name" : ["sentiment" , "sentiment" ],
132
+ "annotator_kind" : ["HUMAN" , "HUMAN" ],
133
+ "context.span_id" : ["" , " " ], # Empty strings
134
+ }
135
+ )
136
+ with pytest .raises (ValueError , match = "context.span_id values must be non-empty strings" ):
137
+ _validate_dataframe (dataframe = df )
138
+
139
+ def test_none_context_span_id_values (self ) -> None :
140
+ """Test validation with None values in context.span_id column."""
141
+ df = pd .DataFrame (
142
+ {
143
+ "name" : ["sentiment" , "sentiment" ],
144
+ "annotator_kind" : ["HUMAN" , "HUMAN" ],
145
+ "context.span_id" : [None , "valid_id" ], # None value
146
+ }
147
+ )
148
+ with pytest .raises (ValueError , match = "context.span_id values cannot be None" ):
149
+ _validate_dataframe (dataframe = df )
150
+
151
+ def test_non_string_context_span_id (self ) -> None :
152
+ """Test validation with non-string values in context.span_id column."""
153
+ df = pd .DataFrame (
154
+ {
155
+ "name" : ["sentiment" , "sentiment" ],
156
+ "annotator_kind" : ["HUMAN" , "HUMAN" ],
157
+ "context.span_id" : [123 , "valid_id" ], # Non-string value
158
+ }
159
+ )
160
+ with pytest .raises (ValueError , match = "context.span_id values must be strings" ):
161
+ _validate_dataframe (dataframe = df )
162
+
101
163
def test_valid_with_index (self ) -> None :
102
164
"""Test validation with valid DataFrame using index as span_id."""
103
165
df = pd .DataFrame ({"name" : ["sentiment" ], "annotator_kind" : ["HUMAN" ]}, index = ["span1" ])
@@ -153,18 +215,6 @@ def test_invalid_index_values(self) -> None:
153
215
):
154
216
_validate_dataframe (dataframe = df )
155
217
156
- def test_none_span_id_values (self ) -> None :
157
- """Test validation with None values in span_id column."""
158
- df = pd .DataFrame (
159
- {
160
- "name" : ["sentiment" , "sentiment" ],
161
- "annotator_kind" : ["HUMAN" , "HUMAN" ],
162
- "span_id" : [None , "valid_id" ], # None value
163
- }
164
- )
165
- with pytest .raises (ValueError , match = "span_id values cannot be None" ):
166
- _validate_dataframe (dataframe = df )
167
-
168
218
def test_mixed_valid_invalid_values (self ) -> None :
169
219
"""Test validation with mixed valid and invalid values in columns."""
170
220
df = pd .DataFrame (
@@ -383,3 +433,34 @@ def test_invalid_score_type(self) -> None:
383
433
match = "Error processing row 0: Score value 'not_a_number' cannot be converted to float" ,
384
434
):
385
435
list (_chunk_dataframe (dataframe = df ))
436
+
437
+ def test_chunk_with_context_span_id (self ) -> None :
438
+ """Test chunking with context.span_id column."""
439
+ df = pd .DataFrame (
440
+ {
441
+ "name" : ["test1" , "test2" ],
442
+ "annotator_kind" : ["HUMAN" , "HUMAN" ],
443
+ "context.span_id" : ["id1" , "id2" ],
444
+ "label" : ["label1" , "label2" ],
445
+ }
446
+ )
447
+ chunks = list (_chunk_dataframe (dataframe = df ))
448
+ assert len (chunks ) == 1
449
+ assert chunks [0 ][0 ]["span_id" ] == "id1"
450
+ assert chunks [0 ][1 ]["span_id" ] == "id2"
451
+
452
+ def test_chunk_with_both_span_id_columns (self ) -> None :
453
+ """Test chunking with both span_id and context.span_id columns."""
454
+ df = pd .DataFrame (
455
+ {
456
+ "name" : ["test1" , "test2" ],
457
+ "annotator_kind" : ["HUMAN" , "HUMAN" ],
458
+ "span_id" : ["id1" , "id2" ],
459
+ "context.span_id" : ["id1" , "id2" ],
460
+ "label" : ["label1" , "label2" ],
461
+ }
462
+ )
463
+ with pytest .raises (
464
+ ValueError , match = "DataFrame cannot have both 'span_id' and 'context.span_id' columns"
465
+ ):
466
+ list (_chunk_dataframe (dataframe = df ))
0 commit comments