From eeea180cfb6d1fda9eb36b8eaf7cb0970f42d35e Mon Sep 17 00:00:00 2001 From: Roman Isecke Date: Thu, 2 Jan 2025 16:37:37 -0500 Subject: [PATCH] Fix support for file data models --- CHANGELOG.md | 4 ++++ test/api/test_api.py | 22 +++++++++---------- test/assets/dataclass_response.py | 10 ++++++++- unstructured_platform_plugins/__version__.py | 2 +- .../etl_uvicorn/api_generator.py | 6 +++++ 5 files changed, 30 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index caee205..f8047ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.0.16 + +* **Bugfix for file data deserialization** + ## 0.0.15 * **Bugfix for file data serialization** diff --git a/test/api/test_api.py b/test/api/test_api.py index 9792bce..fcc06b1 100644 --- a/test/api/test_api.py +++ b/test/api/test_api.py @@ -38,9 +38,15 @@ def generic_validation(self): ] -@pytest.mark.parametrize( - "file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data] -) +@pytest.fixture +def file_data() -> FileData: + return FileData( + identifier="mock file data", + connector_type="CON", + source_identifiers=SourceIdentifiers(filename="n", fullpath="n"), + ) + + def test_async_sample_function(file_data): from test.assets.async_typed_dict_response import async_sample_function as test_fn @@ -56,9 +62,6 @@ def test_async_sample_function(file_data): assert output == {"response": {"a_out": 1, "b_out": 2}} -@pytest.mark.parametrize( - "file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data] -) def test_dataclass_response(file_data): from test.assets.dataclass_response import sample_function_with_path as test_fn @@ -78,12 +81,10 @@ def test_dataclass_response(file_data): "resolved": str(current_path.resolve()), "b": "2", "c": 1, + "p": not isinstance(file_data, BatchFileData), } -@pytest.mark.parametrize( - "file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data] -) def test_empty_input_and_output(file_data): from test.assets.empty_input_and_response import SampleClass as TestClass @@ -98,9 +99,6 @@ def test_empty_input_and_output(file_data): assert not output -@pytest.mark.parametrize( - "file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data] -) def test_filedata_meta(file_data): from test.assets.filedata_meta import Input from test.assets.filedata_meta import process_input as test_fn diff --git a/test/assets/dataclass_response.py b/test/assets/dataclass_response.py index fb40e04..03e059e 100644 --- a/test/assets/dataclass_response.py +++ b/test/assets/dataclass_response.py @@ -2,6 +2,8 @@ from pathlib import Path from typing import Any, Optional, TypedDict +from unstructured_ingest.v2.interfaces import BatchFileData, FileData + class SampleFunctionResponse(TypedDict): response: dict[str, Any] @@ -20,10 +22,11 @@ class SampleFunctionWithPathResponse: resolved: str b: str c: int + p: bool def sample_function_with_path( - b: str, c: int, a: Optional[Path] = None + file_data: FileData, b: str, c: int, a: Optional[Path] = None ) -> SampleFunctionWithPathResponse: s: list[Any] = [type(a).__name__, f"[exists: {a.exists()}]", a.resolve()] if a else [] s.extend([b, c]) @@ -33,5 +36,10 @@ def sample_function_with_path( "resolved": a.resolve(), "b": b, "c": c, + "p": ( + False + if isinstance(file_data, BatchFileData) + else file_data.source_identifiers.relative_path is not None + ), } return SampleFunctionWithPathResponse(**resp) diff --git a/unstructured_platform_plugins/__version__.py b/unstructured_platform_plugins/__version__.py index 5864351..fe2619d 100644 --- a/unstructured_platform_plugins/__version__.py +++ b/unstructured_platform_plugins/__version__.py @@ -1 +1 @@ -__version__ = "0.0.15" # pragma: no cover +__version__ = "0.0.16" # pragma: no cover diff --git a/unstructured_platform_plugins/etl_uvicorn/api_generator.py b/unstructured_platform_plugins/etl_uvicorn/api_generator.py index b95e009..dd1e503 100644 --- a/unstructured_platform_plugins/etl_uvicorn/api_generator.py +++ b/unstructured_platform_plugins/etl_uvicorn/api_generator.py @@ -11,6 +11,7 @@ from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor from pydantic import BaseModel, Field, create_model from starlette.responses import RedirectResponse +from unstructured_ingest.v2.interfaces.file_data import file_data_from_dict from uvicorn.config import LOG_LEVELS from uvicorn.importer import import_from_string @@ -200,6 +201,11 @@ async def run_job(request: input_schema_model) -> ResponseType: log_func_and_body(func=func, body=request.json()) # Create dictionary from pydantic model while preserving underlying types request_dict = {f: getattr(request, f) for f in request.model_fields} + # Make sure nested classes get instantiated correctly + if "file_data" in request_dict: + request_dict["file_data"] = file_data_from_dict( + request_dict["file_data"].model_dump() + ) map_inputs(func=func, raw_inputs=request_dict) if logger.level == LOG_LEVELS.get("trace", logging.NOTSET): logger.log(level=logger.level, msg=f"passing inputs to function: {request_dict}")