bugfix/Fix support for file data models (#35)

rbiseck3 · web-flow · commit 15cb34aa997d · 2025-01-02T17:13:33.000-05:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,7 @@
+## 0.0.16
+
+* **Bugfix for file data deserialization**
+
 ## 0.0.15
 
 * **Bugfix for file data serialization**
diff --git a/test/api/test_api.py b/test/api/test_api.py
@@ -38,9 +38,15 @@ def generic_validation(self):
 ]
 
 
-@pytest.mark.parametrize(
-    "file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data]
-)
+@pytest.fixture
+def file_data() -> FileData:
+    return FileData(
+        identifier="mock file data",
+        connector_type="CON",
+        source_identifiers=SourceIdentifiers(filename="n", fullpath="n"),
+    )
+
+
 def test_async_sample_function(file_data):
     from test.assets.async_typed_dict_response import async_sample_function as test_fn
 
@@ -56,9 +62,6 @@ def test_async_sample_function(file_data):
     assert output == {"response": {"a_out": 1, "b_out": 2}}
 
 
-@pytest.mark.parametrize(
-    "file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data]
-)
 def test_dataclass_response(file_data):
     from test.assets.dataclass_response import sample_function_with_path as test_fn
 
@@ -78,12 +81,10 @@ def test_dataclass_response(file_data):
         "resolved": str(current_path.resolve()),
         "b": "2",
         "c": 1,
+        "p": not isinstance(file_data, BatchFileData),
     }
 
 
-@pytest.mark.parametrize(
-    "file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data]
-)
 def test_empty_input_and_output(file_data):
     from test.assets.empty_input_and_response import SampleClass as TestClass
 
@@ -98,9 +99,6 @@ def test_empty_input_and_output(file_data):
     assert not output
 
 
-@pytest.mark.parametrize(
-    "file_data", mock_file_data, ids=[type(fd).__name__ for fd in mock_file_data]
-)
 def test_filedata_meta(file_data):
     from test.assets.filedata_meta import Input
     from test.assets.filedata_meta import process_input as test_fn
diff --git a/test/assets/dataclass_response.py b/test/assets/dataclass_response.py
@@ -2,6 +2,8 @@
 from pathlib import Path
 from typing import Any, Optional, TypedDict
 
+from unstructured_ingest.v2.interfaces import BatchFileData, FileData
+
 
 class SampleFunctionResponse(TypedDict):
     response: dict[str, Any]
@@ -20,10 +22,11 @@ class SampleFunctionWithPathResponse:
     resolved: str
     b: str
     c: int
+    p: bool
 
 
 def sample_function_with_path(
-    b: str, c: int, a: Optional[Path] = None
+    file_data: FileData, b: str, c: int, a: Optional[Path] = None
 ) -> SampleFunctionWithPathResponse:
     s: list[Any] = [type(a).__name__, f"[exists: {a.exists()}]", a.resolve()] if a else []
     s.extend([b, c])
@@ -33,5 +36,10 @@ def sample_function_with_path(
         "resolved": a.resolve(),
         "b": b,
         "c": c,
+        "p": (
+            False
+            if isinstance(file_data, BatchFileData)
+            else file_data.source_identifiers.relative_path is not None
+        ),
     }
     return SampleFunctionWithPathResponse(**resp)
diff --git a/unstructured_platform_plugins/__version__.py b/unstructured_platform_plugins/__version__.py
@@ -1 +1 @@
-__version__ = "0.0.15"  # pragma: no cover
+__version__ = "0.0.16"  # pragma: no cover
diff --git a/unstructured_platform_plugins/etl_uvicorn/api_generator.py b/unstructured_platform_plugins/etl_uvicorn/api_generator.py
@@ -11,6 +11,7 @@
 from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
 from pydantic import BaseModel, Field, create_model
 from starlette.responses import RedirectResponse
+from unstructured_ingest.v2.interfaces.file_data import file_data_from_dict
 from uvicorn.config import LOG_LEVELS
 from uvicorn.importer import import_from_string
 
@@ -200,6 +201,11 @@ async def run_job(request: input_schema_model) -> ResponseType:
             log_func_and_body(func=func, body=request.json())
             # Create dictionary from pydantic model while preserving underlying types
             request_dict = {f: getattr(request, f) for f in request.model_fields}
+            # Make sure nested classes get instantiated correctly
+            if "file_data" in request_dict:
+                request_dict["file_data"] = file_data_from_dict(
+                    request_dict["file_data"].model_dump()
+                )
             map_inputs(func=func, raw_inputs=request_dict)
             if logger.level == LOG_LEVELS.get("trace", logging.NOTSET):
                 logger.log(level=logger.level, msg=f"passing inputs to function: {request_dict}")