Skip to content

Commit 57b0cc6

Browse files
authored
bugfix/fix file data serialization and add unit tests (#34)
1 parent ac790ac commit 57b0cc6

File tree

15 files changed

+233
-55
lines changed

15 files changed

+233
-55
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## 0.0.15
2+
3+
* **Bugfix for file data serialization**
4+
15
## 0.0.14
26

37
* **Add support for batch file data**

requirements/cli.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ idna==3.10
3232
# via anyio
3333
importlib-metadata==8.5.0
3434
# via opentelemetry-api
35-
marshmallow==3.23.1
35+
marshmallow==3.23.2
3636
# via dataclasses-json
3737
mypy-extensions==1.0.0
3838
# via typing-inspect
@@ -84,15 +84,15 @@ packaging==24.2
8484
# opentelemetry-instrumentation
8585
pandas==2.2.3
8686
# via unstructured-ingest
87-
protobuf==5.29.1
87+
protobuf==5.29.2
8888
# via
8989
# googleapis-common-protos
9090
# opentelemetry-proto
91-
pydantic==2.10.3
91+
pydantic==2.10.4
9292
# via
9393
# fastapi
9494
# unstructured-ingest
95-
pydantic-core==2.27.1
95+
pydantic-core==2.27.2
9696
# via pydantic
9797
python-dateutil==2.9.0.post0
9898
# via
@@ -122,7 +122,7 @@ typing-inspect==0.9.0
122122
# via dataclasses-json
123123
tzdata==2024.2
124124
# via pandas
125-
unstructured-ingest==0.3.10
125+
unstructured-ingest==0.3.11
126126
# via -r ./cli.in
127127
uvicorn==0.34.0
128128
# via -r ./cli.in

requirements/constraints.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
unstructured-ingest>=0.3.10
1+
unstructured-ingest>=0.3.1

requirements/lint.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ pyflakes==3.2.0
3434
# via
3535
# autoflake
3636
# flake8
37-
ruff==0.8.3
37+
ruff==0.8.4
3838
# via -r ./lint.in
3939
tomli==2.2.1
4040
# via

requirements/test.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
pytest
2+
httpx

requirements/test.txt

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,25 @@
11
# This file was autogenerated by uv via the following command:
22
# uv pip compile ./test.in --output-file ./test.txt --no-strip-extras --python-version 3.10
3+
anyio==4.7.0
4+
# via httpx
5+
certifi==2024.12.14
6+
# via
7+
# httpcore
8+
# httpx
39
exceptiongroup==1.2.2
4-
# via pytest
10+
# via
11+
# anyio
12+
# pytest
13+
h11==0.14.0
14+
# via httpcore
15+
httpcore==1.0.7
16+
# via httpx
17+
httpx==0.28.1
18+
# via -r ./test.in
19+
idna==3.10
20+
# via
21+
# anyio
22+
# httpx
523
iniconfig==2.0.0
624
# via pytest
725
packaging==24.2
@@ -10,5 +28,9 @@ pluggy==1.5.0
1028
# via pytest
1129
pytest==8.3.4
1230
# via -r ./test.in
31+
sniffio==1.3.1
32+
# via anyio
1333
tomli==2.2.1
1434
# via pytest
35+
typing-extensions==4.12.2
36+
# via anyio

test/api/__init__.py

Whitespace-only changes.

test/api/test_api.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
from pathlib import Path
2+
from typing import Any, Optional
3+
4+
import pytest
5+
from fastapi.testclient import TestClient
6+
from pydantic import BaseModel
7+
from unstructured_ingest.v2.interfaces import BatchFileData, BatchItem, FileData, SourceIdentifiers
8+
9+
from unstructured_platform_plugins.etl_uvicorn.api_generator import (
10+
EtlApiException,
11+
UsageData,
12+
wrap_in_fastapi,
13+
)
14+
from unstructured_platform_plugins.schema.filedata_meta import FileDataMeta
15+
16+
17+
class InvokeResponse(BaseModel):
18+
usage: list[UsageData]
19+
status_code: int
20+
filedata_meta: FileDataMeta
21+
status_code_text: Optional[str] = None
22+
output: Optional[Any] = None
23+
24+
def generic_validation(self):
25+
assert self.status_code == 200
26+
assert not self.status_code_text
27+
28+
29+
mock_file_data = [
30+
FileData(
31+
identifier="mock file data",
32+
connector_type="CON",
33+
source_identifiers=SourceIdentifiers(filename="n", fullpath="n"),
34+
),
35+
BatchFileData(
36+
identifier="mock file data", connector_type="CON", batch_items=[BatchItem(identifier="bid")]
37+
),
38+
]
39+
40+
41+
@pytest.mark.parametrize(
42+
"file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data]
43+
)
44+
def test_async_sample_function(file_data):
45+
from test.assets.async_typed_dict_response import async_sample_function as test_fn
46+
47+
client = TestClient(wrap_in_fastapi(func=test_fn, plugin_id="mock_plugin"))
48+
49+
post_body = {"file_data": file_data.model_dump(), "content": {"a": 1, "b": 2}}
50+
resp = client.post("/invoke", json=post_body)
51+
resp_content = resp.json()
52+
invoke_response = InvokeResponse.model_validate(resp_content)
53+
invoke_response.generic_validation()
54+
output = invoke_response.output
55+
assert isinstance(output, dict)
56+
assert output == {"response": {"a_out": 1, "b_out": 2}}
57+
58+
59+
@pytest.mark.parametrize(
60+
"file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data]
61+
)
62+
def test_dataclass_response(file_data):
63+
from test.assets.dataclass_response import sample_function_with_path as test_fn
64+
65+
client = TestClient(wrap_in_fastapi(func=test_fn, plugin_id="mock_plugin"))
66+
current_path = Path(__file__)
67+
68+
post_body = {"file_data": file_data.model_dump(), "c": 1, "b": "2", "a": str(current_path)}
69+
resp = client.post("/invoke", json=post_body)
70+
resp_content = resp.json()
71+
invoke_response = InvokeResponse.model_validate(resp_content)
72+
invoke_response.generic_validation()
73+
output = invoke_response.output
74+
assert isinstance(output, dict)
75+
assert output == {
76+
"t": "PosixPath",
77+
"exists": True,
78+
"resolved": str(current_path.resolve()),
79+
"b": "2",
80+
"c": 1,
81+
}
82+
83+
84+
@pytest.mark.parametrize(
85+
"file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data]
86+
)
87+
def test_empty_input_and_output(file_data):
88+
from test.assets.empty_input_and_response import SampleClass as TestClass
89+
90+
test_class = TestClass()
91+
client = TestClient(wrap_in_fastapi(func=test_class.do_nothing, plugin_id="mock_plugin"))
92+
93+
resp = client.post("/invoke", json={"file_data": file_data.model_dump()})
94+
resp_content = resp.json()
95+
invoke_response = InvokeResponse.model_validate(resp_content)
96+
invoke_response.generic_validation()
97+
output = invoke_response.output
98+
assert not output
99+
100+
101+
@pytest.mark.parametrize(
102+
"file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data]
103+
)
104+
def test_filedata_meta(file_data):
105+
from test.assets.filedata_meta import Input
106+
from test.assets.filedata_meta import process_input as test_fn
107+
108+
client = TestClient(wrap_in_fastapi(func=test_fn, plugin_id="mock_plugin"))
109+
110+
post_body = {"file_data": file_data.model_dump(), "i": Input(m=15).model_dump()}
111+
resp = client.post("/invoke", json=post_body)
112+
resp_content = resp.json()
113+
invoke_response = InvokeResponse.model_validate(resp_content)
114+
invoke_response.generic_validation()
115+
filedata_meta = invoke_response.filedata_meta
116+
assert len(filedata_meta.new_records) == 15
117+
assert filedata_meta.terminate_current
118+
assert not invoke_response.output
119+
120+
121+
def test_improper_function():
122+
from test.assets.improper_function import sample_improper_function as test_fn
123+
124+
with pytest.raises(EtlApiException):
125+
TestClient(wrap_in_fastapi(func=test_fn, plugin_id="mock_plugin"))

test/assets/async_typed_dict_response.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
from typing import Any, TypedDict
1+
from typing import Any
2+
3+
from typing_extensions import TypedDict
24

35

46
class SampleFunctionResponse(TypedDict):

test/assets/filedata_meta.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import math
2-
from typing import Optional
2+
from typing import Optional, Union
33

44
from pydantic import BaseModel
5-
from unstructured_ingest.v2.interfaces import FileData
5+
from unstructured_ingest.v2.interfaces import BatchFileData, FileData, SourceIdentifiers
66

77
from unstructured_platform_plugins.schema import FileDataMeta, NewRecord
88

@@ -15,17 +15,21 @@ class Output(BaseModel):
1515
n: float
1616

1717

18-
def process_input(i: Input, file_data: FileData, filedata_meta: FileDataMeta) -> Optional[Output]:
18+
def process_input(
19+
i: Input, file_data: Union[FileData, BatchFileData], filedata_meta: FileDataMeta
20+
) -> Optional[Output]:
1921
if i.m > 10:
2022
filedata_meta.terminate_current = True
2123
new_content = [
2224
NewRecord(
2325
file_data=FileData(
24-
identifier=str(i.m + x), connector_type=file_data.connector_type
26+
identifier=str(i.m + x),
27+
connector_type=file_data.connector_type,
28+
source_identifiers=SourceIdentifiers(filename=f"{x}.txt", fullpath=f"{x}.txt"),
2529
),
2630
contents=Output(n=float(i.m + x)),
2731
)
28-
for x in range(5)
32+
for x in range(i.m)
2933
]
3034
filedata_meta.new_records.extend(new_content)
3135
return None

0 commit comments

Comments
 (0)