Skip to content

Commit 01fb857

Browse files
authored
bugfix/FileData Literal edge case (#18)
1 parent 3f5722a commit 01fb857

File tree

10 files changed

+66
-37
lines changed

10 files changed

+66
-37
lines changed

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
## 0.0.2
2+
3+
### Enhancements
4+
5+
### Features
6+
7+
### Fixes
8+
9+
* **FileData Literal not handled** FileData content was updated to use Literal rather than Enum. This case needed to be added.
10+
111
## 0.0.1
212

313
### Enhancements

requirements/cli.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
uvicorn
22
fastapi
33
click
4-
unstructured
4+
unstructured-ingest

requirements/cli.txt

Lines changed: 33 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#
2-
# This file is autogenerated by pip-compile with Python 3.10
2+
# This file is autogenerated by pip-compile with Python 3.11
33
# by the following command:
44
#
5-
# pip-compile requirements//cli.in
5+
# pip-compile cli.in
66
#
77
annotated-types==0.7.0
88
# via pydantic
@@ -15,7 +15,7 @@ backoff==2.2.1
1515
# via unstructured
1616
beautifulsoup4==4.12.3
1717
# via unstructured
18-
certifi==2024.6.2
18+
certifi==2024.7.4
1919
# via
2020
# httpcore
2121
# httpx
@@ -29,7 +29,7 @@ charset-normalizer==3.3.2
2929
# unstructured-client
3030
click==8.1.7
3131
# via
32-
# -r requirements//cli.in
32+
# -r cli.in
3333
# nltk
3434
# typer
3535
# uvicorn
@@ -45,10 +45,8 @@ email-validator==2.2.0
4545
# via fastapi
4646
emoji==2.12.1
4747
# via unstructured
48-
exceptiongroup==1.2.1
49-
# via anyio
50-
fastapi==0.111.0
51-
# via -r requirements//cli.in
48+
fastapi==0.111.1
49+
# via -r cli.in
5250
fastapi-cli==0.0.4
5351
# via fastapi
5452
filetype==1.2.0
@@ -101,25 +99,32 @@ nest-asyncio==1.6.0
10199
nltk==3.8.1
102100
# via unstructured
103101
numpy==1.26.4
104-
# via unstructured
102+
# via
103+
# pandas
104+
# unstructured
105105
ordered-set==4.1.0
106106
# via deepdiff
107-
orjson==3.10.6
108-
# via fastapi
109107
packaging==24.1
110108
# via
111109
# marshmallow
112110
# unstructured-client
113-
pydantic==2.8.0
111+
pandas==2.2.2
112+
# via unstructured-ingest
113+
psutil==6.0.0
114+
# via unstructured
115+
pydantic==2.8.2
114116
# via fastapi
115-
pydantic-core==2.20.0
117+
pydantic-core==2.20.1
116118
# via pydantic
117119
pygments==2.18.0
118120
# via rich
119-
pypdf==4.2.0
121+
pypdf==4.3.1
120122
# via unstructured-client
121123
python-dateutil==2.9.0.post0
122-
# via unstructured-client
124+
# via
125+
# pandas
126+
# unstructured-client
127+
# unstructured-ingest
123128
python-dotenv==1.0.1
124129
# via uvicorn
125130
python-iso639==2024.4.27
@@ -128,11 +133,13 @@ python-magic==0.4.27
128133
# via unstructured
129134
python-multipart==0.0.9
130135
# via fastapi
136+
pytz==2024.1
137+
# via pandas
131138
pyyaml==6.0.1
132139
# via uvicorn
133-
rapidfuzz==3.9.4
140+
rapidfuzz==3.9.5
134141
# via unstructured
135-
regex==2024.5.15
142+
regex==2024.7.24
136143
# via nltk
137144
requests==2.32.3
138145
# via
@@ -168,34 +175,33 @@ typer==0.12.3
168175
# via fastapi-cli
169176
typing-extensions==4.12.2
170177
# via
171-
# anyio
172178
# emoji
173179
# fastapi
174180
# pydantic
175181
# pydantic-core
176-
# pypdf
177182
# typer
178183
# typing-inspect
179184
# unstructured
180185
# unstructured-client
181-
# uvicorn
182186
typing-inspect==0.9.0
183187
# via
184188
# dataclasses-json
185189
# unstructured-client
186-
ujson==5.10.0
187-
# via fastapi
188-
unstructured==0.14.9
189-
# via -r requirements//cli.in
190-
unstructured-client==0.23.8
190+
tzdata==2024.1
191+
# via pandas
192+
unstructured==0.15.0
193+
# via unstructured-ingest
194+
unstructured-client==0.25.0
191195
# via unstructured
196+
unstructured-ingest==0.0.0
197+
# via -r cli.in
192198
urllib3==2.2.2
193199
# via
194200
# requests
195201
# unstructured-client
196-
uvicorn[standard]==0.30.1
202+
uvicorn[standard]==0.30.3
197203
# via
198-
# -r requirements//cli.in
204+
# -r cli.in
199205
# fastapi
200206
uvloop==0.19.0
201207
# via uvicorn

requirements/constraints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
unstructured-ingest==0.0.0

test/test_schema.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import pytest
88
from pydantic import BaseModel
99
from typing_extensions import TypedDict
10-
from unstructured.ingest.v2.interfaces import FileData
10+
from unstructured_ingest.v2.interfaces import FileData
1111

1212
import unstructured_platform_plugins.schema.json_schema as js
1313
from unstructured_platform_plugins.etl_uvicorn.utils import get_input_schema
@@ -507,7 +507,7 @@ def fn(a: FileData) -> list[FileData]:
507507
],
508508
"default": None,
509509
},
510-
"doc_type": {"type": "string", "enum": ["batch", "file"], "default": "file"},
510+
"doc_type": {"type": "string", "default": "file"},
511511
"metadata": {
512512
"type": "object",
513513
"properties": {
@@ -599,7 +599,7 @@ def fn(a: FileData) -> list[FileData]:
599599
],
600600
"default": None,
601601
},
602-
"doc_type": {"type": "string", "enum": ["batch", "file"], "default": "file"},
602+
"doc_type": {"type": "string", "default": "file"},
603603
"metadata": {
604604
"type": "object",
605605
"properties": {

test/test_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import pytest
66
from pydantic import BaseModel
7-
from unstructured.ingest.v2.interfaces import FileData
7+
from unstructured_ingest.v2.interfaces import FileData
88
from uvicorn.importer import import_from_string
99

1010
from unstructured_platform_plugins.etl_uvicorn import utils
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.0.1" # pragma: no cover
1+
__version__ = "0.0.2" # pragma: no cover

unstructured_platform_plugins/etl_uvicorn/api_generator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,8 @@ async def run_job(request: input_schema_model) -> InvokeResponse:
122122

123123
else:
124124

125-
@fastapi_app.post("/invoke", response_model=response_type)
126-
async def run_job() -> response_type:
125+
@fastapi_app.post("/invoke", response_model=InvokeResponse)
126+
async def run_job() -> InvokeResponse:
127127
logger.debug(f"invoking function without inputs: {func}")
128128
return await wrap_fn(
129129
func=func,

unstructured_platform_plugins/etl_uvicorn/utils.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import inspect
22
from dataclasses import is_dataclass
33
from enum import EnumMeta
4+
from inspect import Parameter
45
from types import GenericAlias, NoneType
56
from typing import Any, Callable, Optional
67

@@ -58,6 +59,10 @@ def get_input_schema(func: Callable, omit: Optional[list[str]] = None) -> dict:
5859
parameters = get_typed_parameters(func)
5960
if omit:
6061
parameters = [p for p in parameters if p.name not in omit]
62+
# Omit self if wrapping method
63+
parameters = [
64+
p for p in parameters if not (p.param_type is Parameter.empty and p.name == "self")
65+
]
6166
return parameters_to_json_schema(parameters)
6267

6368

unstructured_platform_plugins/schema/json_schema.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44
from inspect import Parameter
55
from pathlib import Path
66
from types import GenericAlias, NoneType, UnionType
7-
from typing import Any, Optional, Type, Union, _UnionGenericAlias
7+
from typing import Any, Literal, Optional, Type, Union, _UnionGenericAlias
88

99
from pydantic import BaseModel, create_model
1010
from pydantic.fields import FieldInfo, PydanticUndefined
11-
from unstructured.ingest.v2.interfaces import FileData
11+
from unstructured_ingest.v2.interfaces import FileData
1212

1313
from unstructured_platform_plugins.schema.utils import TypedParameter
1414
from unstructured_platform_plugins.type_hints import get_type_hints
@@ -37,6 +37,13 @@ def is_typed_dict(val: Any) -> bool:
3737

3838

3939
def type_to_json_schema(t: Type, args: Optional[tuple[Any, ...]] = None) -> dict:
40+
if t is Literal:
41+
arg_types = [type(a) for a in args]
42+
if len(set(arg_types)) != 1:
43+
raise TypeError(
44+
"Literal must be of a single type. Defined with: {}".format(", ".join(arg_types))
45+
)
46+
t = arg_types[0]
4047
resp = {"type": types_map[t]}
4148
if t is list and args:
4249
list_type = args[0]

0 commit comments

Comments
 (0)