Skip to content

bugfix/fix file data serialization and add unit tests #34

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 0.0.15

* **Bugfix for file data serialization**

## 0.0.14

* **Add support for batch file data**
Expand Down
10 changes: 5 additions & 5 deletions requirements/cli.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ idna==3.10
# via anyio
importlib-metadata==8.5.0
# via opentelemetry-api
marshmallow==3.23.1
marshmallow==3.23.2
# via dataclasses-json
mypy-extensions==1.0.0
# via typing-inspect
Expand Down Expand Up @@ -84,15 +84,15 @@ packaging==24.2
# opentelemetry-instrumentation
pandas==2.2.3
# via unstructured-ingest
protobuf==5.29.1
protobuf==5.29.2
# via
# googleapis-common-protos
# opentelemetry-proto
pydantic==2.10.3
pydantic==2.10.4
# via
# fastapi
# unstructured-ingest
pydantic-core==2.27.1
pydantic-core==2.27.2
# via pydantic
python-dateutil==2.9.0.post0
# via
Expand Down Expand Up @@ -122,7 +122,7 @@ typing-inspect==0.9.0
# via dataclasses-json
tzdata==2024.2
# via pandas
unstructured-ingest==0.3.10
unstructured-ingest==0.3.11
# via -r ./cli.in
uvicorn==0.34.0
# via -r ./cli.in
Expand Down
2 changes: 1 addition & 1 deletion requirements/constraints.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
unstructured-ingest>=0.3.10
unstructured-ingest>=0.3.1
2 changes: 1 addition & 1 deletion requirements/lint.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ pyflakes==3.2.0
# via
# autoflake
# flake8
ruff==0.8.3
ruff==0.8.4
# via -r ./lint.in
tomli==2.2.1
# via
Expand Down
1 change: 1 addition & 0 deletions requirements/test.in
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
pytest
httpx
24 changes: 23 additions & 1 deletion requirements/test.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,25 @@
# This file was autogenerated by uv via the following command:
# uv pip compile ./test.in --output-file ./test.txt --no-strip-extras --python-version 3.10
anyio==4.7.0
# via httpx
certifi==2024.12.14
# via
# httpcore
# httpx
exceptiongroup==1.2.2
# via pytest
# via
# anyio
# pytest
h11==0.14.0
# via httpcore
httpcore==1.0.7
# via httpx
httpx==0.28.1
# via -r ./test.in
idna==3.10
# via
# anyio
# httpx
iniconfig==2.0.0
# via pytest
packaging==24.2
Expand All @@ -10,5 +28,9 @@ pluggy==1.5.0
# via pytest
pytest==8.3.4
# via -r ./test.in
sniffio==1.3.1
# via anyio
tomli==2.2.1
# via pytest
typing-extensions==4.12.2
# via anyio
Empty file added test/api/__init__.py
Empty file.
125 changes: 125 additions & 0 deletions test/api/test_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
from pathlib import Path
from typing import Any, Optional

import pytest
from fastapi.testclient import TestClient
from pydantic import BaseModel
from unstructured_ingest.v2.interfaces import BatchFileData, BatchItem, FileData, SourceIdentifiers

from unstructured_platform_plugins.etl_uvicorn.api_generator import (
EtlApiException,
UsageData,
wrap_in_fastapi,
)
from unstructured_platform_plugins.schema.filedata_meta import FileDataMeta


class InvokeResponse(BaseModel):
usage: list[UsageData]
status_code: int
filedata_meta: FileDataMeta
status_code_text: Optional[str] = None
output: Optional[Any] = None

def generic_validation(self):
assert self.status_code == 200
assert not self.status_code_text


mock_file_data = [
FileData(
identifier="mock file data",
connector_type="CON",
source_identifiers=SourceIdentifiers(filename="n", fullpath="n"),
),
BatchFileData(
identifier="mock file data", connector_type="CON", batch_items=[BatchItem(identifier="bid")]
),
]


@pytest.mark.parametrize(
"file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data]
)
def test_async_sample_function(file_data):
from test.assets.async_typed_dict_response import async_sample_function as test_fn

client = TestClient(wrap_in_fastapi(func=test_fn, plugin_id="mock_plugin"))

post_body = {"file_data": file_data.model_dump(), "content": {"a": 1, "b": 2}}
resp = client.post("/invoke", json=post_body)
resp_content = resp.json()
invoke_response = InvokeResponse.model_validate(resp_content)
invoke_response.generic_validation()
output = invoke_response.output
assert isinstance(output, dict)
assert output == {"response": {"a_out": 1, "b_out": 2}}


@pytest.mark.parametrize(
"file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data]
)
def test_dataclass_response(file_data):
from test.assets.dataclass_response import sample_function_with_path as test_fn

client = TestClient(wrap_in_fastapi(func=test_fn, plugin_id="mock_plugin"))
current_path = Path(__file__)

post_body = {"file_data": file_data.model_dump(), "c": 1, "b": "2", "a": str(current_path)}
resp = client.post("/invoke", json=post_body)
resp_content = resp.json()
invoke_response = InvokeResponse.model_validate(resp_content)
invoke_response.generic_validation()
output = invoke_response.output
assert isinstance(output, dict)
assert output == {
"t": "PosixPath",
"exists": True,
"resolved": str(current_path.resolve()),
"b": "2",
"c": 1,
}


@pytest.mark.parametrize(
"file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data]
)
def test_empty_input_and_output(file_data):
from test.assets.empty_input_and_response import SampleClass as TestClass

test_class = TestClass()
client = TestClient(wrap_in_fastapi(func=test_class.do_nothing, plugin_id="mock_plugin"))

resp = client.post("/invoke", json={"file_data": file_data.model_dump()})
resp_content = resp.json()
invoke_response = InvokeResponse.model_validate(resp_content)
invoke_response.generic_validation()
output = invoke_response.output
assert not output


@pytest.mark.parametrize(
"file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data]
)
def test_filedata_meta(file_data):
from test.assets.filedata_meta import Input
from test.assets.filedata_meta import process_input as test_fn

client = TestClient(wrap_in_fastapi(func=test_fn, plugin_id="mock_plugin"))

post_body = {"file_data": file_data.model_dump(), "i": Input(m=15).model_dump()}
resp = client.post("/invoke", json=post_body)
resp_content = resp.json()
invoke_response = InvokeResponse.model_validate(resp_content)
invoke_response.generic_validation()
filedata_meta = invoke_response.filedata_meta
assert len(filedata_meta.new_records) == 15
assert filedata_meta.terminate_current
assert not invoke_response.output


def test_improper_function():
from test.assets.improper_function import sample_improper_function as test_fn

with pytest.raises(EtlApiException):
TestClient(wrap_in_fastapi(func=test_fn, plugin_id="mock_plugin"))
4 changes: 3 additions & 1 deletion test/assets/async_typed_dict_response.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Any, TypedDict
from typing import Any

from typing_extensions import TypedDict


class SampleFunctionResponse(TypedDict):
Expand Down
14 changes: 9 additions & 5 deletions test/assets/filedata_meta.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import math
from typing import Optional
from typing import Optional, Union

from pydantic import BaseModel
from unstructured_ingest.v2.interfaces import FileData
from unstructured_ingest.v2.interfaces import BatchFileData, FileData, SourceIdentifiers

from unstructured_platform_plugins.schema import FileDataMeta, NewRecord

Expand All @@ -15,17 +15,21 @@ class Output(BaseModel):
n: float


def process_input(i: Input, file_data: FileData, filedata_meta: FileDataMeta) -> Optional[Output]:
def process_input(
i: Input, file_data: Union[FileData, BatchFileData], filedata_meta: FileDataMeta
) -> Optional[Output]:
if i.m > 10:
filedata_meta.terminate_current = True
new_content = [
NewRecord(
file_data=FileData(
identifier=str(i.m + x), connector_type=file_data.connector_type
identifier=str(i.m + x),
connector_type=file_data.connector_type,
source_identifiers=SourceIdentifiers(filename=f"{x}.txt", fullpath=f"{x}.txt"),
),
contents=Output(n=float(i.m + x)),
)
for x in range(5)
for x in range(i.m)
]
filedata_meta.new_records.extend(new_content)
return None
Expand Down
64 changes: 32 additions & 32 deletions test/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,22 +449,16 @@ def fn(a: FileData) -> list[FileData]:
"identifier": {"type": "string"},
"connector_type": {"type": "string"},
"source_identifiers": {
"anyOf": [
{
"type": "object",
"properties": {
"filename": {"type": "string"},
"fullpath": {"type": "string"},
"rel_path": {
"anyOf": [{"type": "string"}, {"type": "null"}],
"default": None,
},
},
"required": ["filename", "fullpath"],
"type": "object",
"properties": {
"filename": {"type": "string"},
"fullpath": {"type": "string"},
"rel_path": {
"anyOf": [{"type": "string"}, {"type": "null"}],
"default": None,
},
{"type": "null"},
],
"default": None,
},
"required": ["filename", "fullpath"],
},
"metadata": {
"type": "object",
Expand Down Expand Up @@ -533,7 +527,13 @@ def fn(a: FileData) -> list[FileData]:
"default": None,
},
},
"required": ["identifier", "connector_type", "metadata", "additional_metadata"],
"required": [
"identifier",
"connector_type",
"source_identifiers",
"metadata",
"additional_metadata",
],
}
},
}
Expand All @@ -551,22 +551,16 @@ def fn(a: FileData) -> list[FileData]:
"identifier": {"type": "string"},
"connector_type": {"type": "string"},
"source_identifiers": {
"anyOf": [
{
"type": "object",
"properties": {
"filename": {"type": "string"},
"fullpath": {"type": "string"},
"rel_path": {
"anyOf": [{"type": "string"}, {"type": "null"}],
"default": None,
},
},
"required": ["filename", "fullpath"],
"type": "object",
"properties": {
"filename": {"type": "string"},
"fullpath": {"type": "string"},
"rel_path": {
"anyOf": [{"type": "string"}, {"type": "null"}],
"default": None,
},
{"type": "null"},
],
"default": None,
},
"required": ["filename", "fullpath"],
},
"metadata": {
"type": "object",
Expand Down Expand Up @@ -629,7 +623,13 @@ def fn(a: FileData) -> list[FileData]:
},
"display_name": {"anyOf": [{"type": "string"}, {"type": "null"}], "default": None},
},
"required": ["identifier", "connector_type", "metadata", "additional_metadata"],
"required": [
"identifier",
"connector_type",
"source_identifiers",
"metadata",
"additional_metadata",
],
},
}

Expand Down
3 changes: 2 additions & 1 deletion test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import pytest
from pydantic import BaseModel
from unstructured_ingest.v2.interfaces import FileData
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
from uvicorn.importer import import_from_string

from unstructured_platform_plugins.etl_uvicorn import utils
Expand Down Expand Up @@ -110,6 +110,7 @@ def fn(a: A, b: B, c: MyEnum, d: list, e: FileData) -> None:
identifier="custom_file_data",
connector_type="mock_connector",
additional_metadata={"additional": "metadata"},
source_identifiers=SourceIdentifiers(filename="file.txt", fullpath="file.txt"),
)
inputs = {
"a": {"b": 4, "c": 5.6},
Expand Down
2 changes: 1 addition & 1 deletion unstructured_platform_plugins/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.14" # pragma: no cover
__version__ = "0.0.15" # pragma: no cover
Loading
Loading